KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > textmining > text > extraction > chp > Word6CHPBinTable


1 /* Copyright 2004 Ryan Ackley
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */

15
16 package org.textmining.text.extraction.chp;
17
18 import java.util.List JavaDoc;
19 import java.util.ArrayList JavaDoc;
20 import java.io.OutputStream JavaDoc;
21 import java.io.IOException JavaDoc;
22
23 import org.apache.poi.poifs.common.POIFSConstants;
24 import org.apache.poi.util.LittleEndian;
25 import org.apache.poi.hwpf.model.io.*;
26 import org.apache.poi.hwpf.model.*;
27
28 /**
29  * This class holds all of the character formatting properties from a Word
30  * 6.0/95 document.
31  *
32  * @author Ryan Ackley
33  */

34 public class Word6CHPBinTable
35 {
36   /** List of character properties.*/
37   ArrayList JavaDoc _textRuns = new ArrayList JavaDoc();
38
39   /**
40    * Constructor used to read a binTable in from a Word document.
41    *
42    * @param documentStream The POIFS "WordDocument" stream from a Word document
43    * @param offset The offset of the Chp bin table in the main stream.
44    * @param size The size of the Chp bin table in the main stream.
45    * @param fcMin The start of text in the main stream.
46    */

47   public Word6CHPBinTable(byte[] documentStream, int offset,
48                      int size, int fcMin)
49   {
50     PlexOfCps binTable = new PlexOfCps(documentStream, offset, size, 2);
51
52     int length = binTable.length();
53     for (int x = 0; x < length; x++)
54     {
55       GenericPropertyNode node = binTable.getProperty(x);
56
57       int pageNum = LittleEndian.getShort((byte[])node.getBytes());
58       int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
59
60       CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
61         pageOffset, fcMin);
62
63       int fkpSize = cfkp.size();
64
65       for (int y = 0; y < fkpSize; y++)
66       {
67         _textRuns.add(cfkp.getCHPX(y));
68       }
69     }
70   }
71
72   public List JavaDoc getTextRuns()
73   {
74     return _textRuns;
75   }
76
77 }
Popular Tags