KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > poi > hwpf > HWPFDocument


1 /* ====================================================================
2    Copyright 2002-2004 Apache Software Foundation
3
4    Licensed under the Apache License, Version 2.0 (the "License");
5    you may not use this file except in compliance with the License.
6    You may obtain a copy of the License at
7
8        http://www.apache.org/licenses/LICENSE-2.0
9
10    Unless required by applicable law or agreed to in writing, software
11    distributed under the License is distributed on an "AS IS" BASIS,
12    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13    See the License for the specific language governing permissions and
14    limitations under the License.
15 ==================================================================== */

16
17
18 package org.apache.poi.hwpf;
19
20 import java.io.InputStream JavaDoc;
21 import java.io.FileInputStream JavaDoc;
22 import java.io.IOException JavaDoc;
23 import java.io.OutputStream JavaDoc;
24 import java.io.ByteArrayInputStream JavaDoc;
25 import java.io.FileOutputStream JavaDoc;
26
27 import java.util.Iterator JavaDoc;
28
29 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
30 import org.apache.poi.poifs.filesystem.DocumentEntry;
31 import org.apache.poi.poifs.common.POIFSConstants;
32 import org.apache.poi.hwpf.usermodel.CharacterRun;
33 import org.apache.poi.hwpf.usermodel.Paragraph;
34 import org.apache.poi.hwpf.usermodel.TableProperties;
35 import org.apache.poi.hwpf.sprm.TableSprmUncompressor;
36 import org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor;
37
38 import org.apache.poi.hwpf.model.*;
39 import org.apache.poi.hwpf.model.io.*;
40 import org.apache.poi.hwpf.usermodel.*;
41
42
43 /**
44  *
45  * This class acts as the bucket that we throw all of the Word data structures
46  * into.
47  *
48  * @author Ryan Ackley
49  */

50 public class HWPFDocument
51 // implements Cloneable
52
{
53   /** The FIB*/
54   protected FileInformationBlock _fib;
55
56   /** main document stream buffer*/
57   private byte[] _mainStream;
58
59   /** table stream buffer*/
60   private byte[] _tableStream;
61
62   /** data stream buffer*/
63   protected byte[] _dataStream;
64
65   /** Document wide Properties*/
66   protected DocumentProperties _dop;
67
68   /** Contains text of the document wrapped in a obfuscated Word data
69   * structure*/

70   protected ComplexFileTable _cft;
71
72   protected TextPieceTable _tpt;
73
74   /** Contains formatting properties for text*/
75   protected CHPBinTable _cbt;
76
77   /** Contains formatting properties for paragraphs*/
78   protected PAPBinTable _pbt;
79
80   /** Contains formatting properties for sections.*/
81   protected SectionTable _st;
82
83   /** Holds styles for this document.*/
84   protected StyleSheet _ss;
85
86   /** Holds fonts for this document.*/
87   protected FontTable _ft;
88
89   /** Hold list tables */
90   protected ListTables _lt;
91
92   protected HWPFDocument()
93   {
94
95   }
96
97   /**
98    * This constructor loads a Word document from an InputStream.
99    *
100    * @param istream The InputStream that contains the Word document.
101    * @throws IOException If there is an unexpected IOException from the passed
102    * in InputStream.
103    */

104   public HWPFDocument(InputStream JavaDoc istream) throws IOException JavaDoc
105   {
106     //do Ole stuff
107
POIFSFileSystem filesystem = new POIFSFileSystem(istream);
108
109     // read in the main stream.
110
DocumentEntry documentProps =
111        (DocumentEntry)filesystem.getRoot().getEntry("WordDocument");
112     _mainStream = new byte[documentProps.getSize()];
113     filesystem.createDocumentInputStream("WordDocument").read(_mainStream);
114
115     // use the fib to determine the name of the table stream.
116
_fib = new FileInformationBlock(_mainStream);
117
118     String JavaDoc name = "0Table";
119     if (_fib.isFWhichTblStm())
120     {
121       name = "1Table";
122     }
123
124     // read in the table stream.
125
DocumentEntry tableProps =
126       (DocumentEntry)filesystem.getRoot().getEntry(name);
127     _tableStream = new byte[tableProps.getSize()];
128     filesystem.createDocumentInputStream(name).read(_tableStream);
129
130     _fib.fillVariableFields(_mainStream, _tableStream);
131
132     // read in the data stream.
133
try
134     {
135       DocumentEntry dataProps =
136           (DocumentEntry) filesystem.getRoot().getEntry("Data");
137       _dataStream = new byte[dataProps.getSize()];
138       filesystem.createDocumentInputStream("Data").read(_dataStream);
139     }
140     catch(java.io.FileNotFoundException JavaDoc e)
141     {
142         _dataStream = new byte[0];
143     }
144
145     // get the start of text in the main stream
146
int fcMin = _fib.getFcMin();
147
148     // load up our standard structures.
149
_dop = new DocumentProperties(_tableStream, _fib.getFcDop());
150     _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
151     _tpt = _cft.getTextPieceTable();
152     _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), fcMin);
153     _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin);
154
155     // Word XP puts in a zero filled buffer in front of the text and it screws
156
// up my system for offsets. This is an adjustment.
157
int cpMin = _tpt.getCpMin();
158     if (cpMin > 0)
159     {
160       _cbt.adjustForDelete(0, 0, cpMin);
161       _pbt.adjustForDelete(0, 0, cpMin);
162     }
163
164     _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, getTextTable().getTextPieces());
165     _ss = new StyleSheet(_tableStream, _fib.getFcStshf());
166     _ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());
167
168     int listOffset = _fib.getFcPlcfLst();
169     int lfoOffset = _fib.getFcPlfLfo();
170     if (listOffset != 0 && _fib.getLcbPlcfLst() != 0)
171     {
172       _lt = new ListTables(_tableStream, _fib.getFcPlcfLst(), _fib.getFcPlfLfo());
173     }
174
175     PlexOfCps plc = new PlexOfCps(_tableStream, _fib.getFcPlcffldMom(), _fib.getLcbPlcffldMom(), 2);
176     for (int x = 0; x < plc.length(); x++)
177     {
178       GenericPropertyNode node = plc.getProperty(x);
179       byte[] fld = node.getBytes();
180       int breakpoint = 0;
181     }
182   }
183
184   public StyleSheet getStyleSheet()
185   {
186     return _ss;
187   }
188
189   public FileInformationBlock getFileInformationBlock()
190   {
191     return _fib;
192   }
193
194   public DocumentProperties getDocProperties()
195   {
196     return _dop;
197   }
198
199   public Range getRange()
200   {
201     // hack to get the ending cp of the document, Have to revisit this.
202
java.util.List JavaDoc text = _tpt.getTextPieces();
203     PropertyNode p = (PropertyNode)text.get(text.size() - 1);
204
205     return new Range(0, p.getEnd(), this);
206   }
207
208   /**
209    * Returns the character length of a document.
210    * @return the character length of a document
211    */

212   public int characterLength()
213   {
214     java.util.List JavaDoc textPieces = _tpt.getTextPieces();
215     Iterator JavaDoc textIt = textPieces.iterator();
216
217     int length = 0;
218     while(textIt.hasNext())
219     {
220       TextPiece tp = (TextPiece)textIt.next();
221       length += tp.characterLength();
222     }
223     return length;
224   }
225
226   public ListTables getListTables()
227   {
228     return _lt;
229   }
230   /**
231    * Writes out the word file that is represented by an instance of this class.
232    *
233    * @param out The OutputStream to write to.
234    * @throws IOException If there is an unexpected IOException from the passed
235    * in OutputStream.
236    */

237   public void write(OutputStream JavaDoc out)
238     throws IOException JavaDoc
239   {
240     // initialize our streams for writing.
241
HWPFFileSystem docSys = new HWPFFileSystem();
242     HWPFOutputStream mainStream = docSys.getStream("WordDocument");
243     HWPFOutputStream tableStream = docSys.getStream("1Table");
244     //HWPFOutputStream dataStream = docSys.getStream("Data");
245
int tableOffset = 0;
246
247     // FileInformationBlock fib = (FileInformationBlock)_fib.clone();
248
// clear the offsets and sizes in our FileInformationBlock.
249
_fib.clearOffsetsSizes();
250
251     // determine the FileInformationBLock size
252
int fibSize = _fib.getSize();
253     fibSize += POIFSConstants.BIG_BLOCK_SIZE -
254         (fibSize % POIFSConstants.BIG_BLOCK_SIZE);
255
256     // preserve space for the FileInformationBlock because we will be writing
257
// it after we write everything else.
258
byte[] placeHolder = new byte[fibSize];
259     mainStream.write(placeHolder);
260     int mainOffset = mainStream.getOffset();
261
262     // write out the StyleSheet.
263
_fib.setFcStshf(tableOffset);
264     _ss.writeTo(tableStream);
265     _fib.setLcbStshf(tableStream.getOffset() - tableOffset);
266     tableOffset = tableStream.getOffset();
267
268     // get fcMin and fcMac because we will be writing the actual text with the
269
// complex table.
270
int fcMin = mainOffset;
271
272     // write out the Complex table, includes text.
273
_fib.setFcClx(tableOffset);
274     _cft.writeTo(docSys);
275     _fib.setLcbClx(tableStream.getOffset() - tableOffset);
276     tableOffset = tableStream.getOffset();
277     int fcMac = mainStream.getOffset();
278
279     // write out the CHPBinTable.
280
_fib.setFcPlcfbteChpx(tableOffset);
281     _cbt.writeTo(docSys, fcMin);
282     _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
283     tableOffset = tableStream.getOffset();
284
285     // write out the PAPBinTable.
286
_fib.setFcPlcfbtePapx(tableOffset);
287     _pbt.writeTo(docSys, fcMin);
288     _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
289     tableOffset = tableStream.getOffset();
290
291     // write out the SectionTable.
292
_fib.setFcPlcfsed(tableOffset);
293     _st.writeTo(docSys, fcMin);
294     _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
295     tableOffset = tableStream.getOffset();
296
297     // write out the list tables
298
if (_lt != null)
299     {
300       _fib.setFcPlcfLst(tableOffset);
301       _lt.writeListDataTo(tableStream);
302       _fib.setLcbPlcfLst(tableStream.getOffset() - tableOffset);
303
304       _fib.setFcPlfLfo(tableStream.getOffset());
305       _lt.writeListOverridesTo(tableStream);
306       _fib.setLcbPlfLfo(tableStream.getOffset() - tableOffset);
307       tableOffset = tableStream.getOffset();
308     }
309
310     // write out the FontTable.
311
_fib.setFcSttbfffn(tableOffset);
312     _ft.writeTo(docSys);
313     _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
314     tableOffset = tableStream.getOffset();
315
316     // write out the DocumentProperties.
317
_fib.setFcDop(tableOffset);
318     byte[] buf = new byte[_dop.getSize()];
319     _fib.setLcbDop(_dop.getSize());
320     _dop.serialize(buf, 0);
321     tableStream.write(buf);
322
323     // set some variables in the FileInformationBlock.
324
_fib.setFcMin(fcMin);
325     _fib.setFcMac(fcMac);
326     _fib.setCbMac(mainStream.getOffset());
327
328     // make sure that the table, doc and data streams use big blocks.
329
byte[] mainBuf = mainStream.toByteArray();
330     if (mainBuf.length < 4096)
331     {
332       byte[] tempBuf = new byte[4096];
333       System.arraycopy(mainBuf, 0, tempBuf, 0, mainBuf.length);
334       mainBuf = tempBuf;
335     }
336
337     // write out the FileInformationBlock.
338
//_fib.serialize(mainBuf, 0);
339
_fib.writeTo(mainBuf, tableStream);
340
341     byte[] tableBuf = tableStream.toByteArray();
342     if (tableBuf.length < 4096)
343     {
344       byte[] tempBuf = new byte[4096];
345       System.arraycopy(tableBuf, 0, tempBuf, 0, tableBuf.length);
346       tableBuf = tempBuf;
347     }
348
349     byte[] dataBuf = _dataStream;
350     if (dataBuf == null)
351     {
352       dataBuf = new byte[4096];
353     }
354     if (dataBuf.length < 4096)
355     {
356       byte[] tempBuf = new byte[4096];
357       System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length);
358       dataBuf = tempBuf;
359     }
360
361
362     // spit out the Word document.
363
POIFSFileSystem pfs = new POIFSFileSystem();
364     pfs.createDocument(new ByteArrayInputStream JavaDoc(mainBuf), "WordDocument");
365     pfs.createDocument(new ByteArrayInputStream JavaDoc(tableBuf), "1Table");
366     pfs.createDocument(new ByteArrayInputStream JavaDoc(dataBuf), "Data");
367
368     pfs.writeFilesystem(out);
369   }
370
371   public CHPBinTable getCharacterTable()
372   {
373     return _cbt;
374   }
375
376   public PAPBinTable getParagraphTable()
377   {
378     return _pbt;
379   }
380
381   public SectionTable getSectionTable()
382   {
383     return _st;
384   }
385
386   public TextPieceTable getTextTable()
387   {
388     return _cft.getTextPieceTable();
389   }
390
391   public byte[] getDataStream()
392   {
393     return _dataStream;
394   }
395
396   public int registerList(HWPFList list)
397   {
398     if (_lt == null)
399     {
400       _lt = new ListTables();
401     }
402     return _lt.addList(list.getListData(), list.getOverride());
403   }
404
405   public FontTable getFontTable()
406   {
407     return _ft;
408   }
409
410   public void delete(int start, int length)
411   {
412     Range r = new Range(start, start + length, this);
413     r.delete();
414   }
415
416   /**
417    * Takes two arguments, 1) name of the Word file to read in 2) location to
418    * write it out at.
419    * @param args
420    */

421   public static void main(String JavaDoc[] args)
422   {
423
424     try
425     {
426       HWPFDocument doc = new HWPFDocument(new FileInputStream JavaDoc(args[0]));
427       Range r = doc.getRange();
428       String JavaDoc str = r.text();
429       int x = 0;
430 // CharacterRun run = new CharacterRun();
431
// run.setBold(true);
432
// run.setItalic(true);
433
// run.setCapitalized(true);
434
//
435
// Range range = doc.getRange();
436
// range.insertBefore("Hello World!!! HAHAHAHAHA I DID IT!!!", run);
437
//
438
// OutputStream out = new FileOutputStream(args[1]);
439
// doc.write(out);
440
//
441
// out.flush();
442
// out.close();
443

444
445     }
446     catch (Throwable JavaDoc t)
447     {
448       t.printStackTrace();
449     }
450   }
451
452 // public Object clone()
453
// throws CloneNotSupportedException
454
// {
455
// _tpt;
456
//
457
// _cbt;
458
//
459
// _pbt;
460
//
461
// _st;
462
//
463
// }
464
}
465
Popular Tags