1 16 17 18 package org.apache.poi.hwpf; 19 20 import java.io.InputStream ; 21 import java.io.FileInputStream ; 22 import java.io.IOException ; 23 import java.io.OutputStream ; 24 import java.io.ByteArrayInputStream ; 25 import java.io.FileOutputStream ; 26 27 import java.util.Iterator ; 28 29 import org.apache.poi.poifs.filesystem.POIFSFileSystem; 30 import org.apache.poi.poifs.filesystem.DocumentEntry; 31 import org.apache.poi.poifs.common.POIFSConstants; 32 import org.apache.poi.hwpf.usermodel.CharacterRun; 33 import org.apache.poi.hwpf.usermodel.Paragraph; 34 import org.apache.poi.hwpf.usermodel.TableProperties; 35 import org.apache.poi.hwpf.sprm.TableSprmUncompressor; 36 import org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor; 37 38 import org.apache.poi.hwpf.model.*; 39 import org.apache.poi.hwpf.model.io.*; 40 import org.apache.poi.hwpf.usermodel.*; 41 42 43 50 public class HWPFDocument 51 { 53 54 protected FileInformationBlock _fib; 55 56 57 private byte[] _mainStream; 58 59 60 private byte[] _tableStream; 61 62 63 protected byte[] _dataStream; 64 65 66 protected DocumentProperties _dop; 67 68 70 protected ComplexFileTable _cft; 71 72 protected TextPieceTable _tpt; 73 74 75 protected CHPBinTable _cbt; 76 77 78 protected PAPBinTable _pbt; 79 80 81 protected SectionTable _st; 82 83 84 protected StyleSheet _ss; 85 86 87 protected FontTable _ft; 88 89 90 protected ListTables _lt; 91 92 protected HWPFDocument() 93 { 94 95 } 96 97 104 public HWPFDocument(InputStream istream) throws IOException 105 { 106 POIFSFileSystem filesystem = new POIFSFileSystem(istream); 108 109 DocumentEntry documentProps = 111 (DocumentEntry)filesystem.getRoot().getEntry("WordDocument"); 112 _mainStream = new byte[documentProps.getSize()]; 113 filesystem.createDocumentInputStream("WordDocument").read(_mainStream); 114 115 _fib = new FileInformationBlock(_mainStream); 117 118 String name = "0Table"; 119 if (_fib.isFWhichTblStm()) 120 { 121 name = "1Table"; 122 } 123 124 DocumentEntry tableProps = 126 (DocumentEntry)filesystem.getRoot().getEntry(name); 127 _tableStream = new byte[tableProps.getSize()]; 128 filesystem.createDocumentInputStream(name).read(_tableStream); 129 130 _fib.fillVariableFields(_mainStream, _tableStream); 131 132 try 134 { 135 DocumentEntry dataProps = 136 (DocumentEntry) filesystem.getRoot().getEntry("Data"); 137 _dataStream = new byte[dataProps.getSize()]; 138 filesystem.createDocumentInputStream("Data").read(_dataStream); 139 } 140 catch(java.io.FileNotFoundException e) 141 { 142 _dataStream = new byte[0]; 143 } 144 145 int fcMin = _fib.getFcMin(); 147 148 _dop = new DocumentProperties(_tableStream, _fib.getFcDop()); 150 _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin); 151 _tpt = _cft.getTextPieceTable(); 152 _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), fcMin); 153 _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin); 154 155 int cpMin = _tpt.getCpMin(); 158 if (cpMin > 0) 159 { 160 _cbt.adjustForDelete(0, 0, cpMin); 161 _pbt.adjustForDelete(0, 0, cpMin); 162 } 163 164 _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, getTextTable().getTextPieces()); 165 _ss = new StyleSheet(_tableStream, _fib.getFcStshf()); 166 _ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn()); 167 168 int listOffset = _fib.getFcPlcfLst(); 169 int lfoOffset = _fib.getFcPlfLfo(); 170 if (listOffset != 0 && _fib.getLcbPlcfLst() != 0) 171 { 172 _lt = new ListTables(_tableStream, _fib.getFcPlcfLst(), _fib.getFcPlfLfo()); 173 } 174 175 PlexOfCps plc = new PlexOfCps(_tableStream, _fib.getFcPlcffldMom(), _fib.getLcbPlcffldMom(), 2); 176 for (int x = 0; x < plc.length(); x++) 177 { 178 GenericPropertyNode node = plc.getProperty(x); 179 byte[] fld = node.getBytes(); 180 int breakpoint = 0; 181 } 182 } 183 184 public StyleSheet getStyleSheet() 185 { 186 return _ss; 187 } 188 189 public FileInformationBlock getFileInformationBlock() 190 { 191 return _fib; 192 } 193 194 public DocumentProperties getDocProperties() 195 { 196 return _dop; 197 } 198 199 public Range getRange() 200 { 201 java.util.List text = _tpt.getTextPieces(); 203 PropertyNode p = (PropertyNode)text.get(text.size() - 1); 204 205 return new Range(0, p.getEnd(), this); 206 } 207 208 212 public int characterLength() 213 { 214 java.util.List textPieces = _tpt.getTextPieces(); 215 Iterator textIt = textPieces.iterator(); 216 217 int length = 0; 218 while(textIt.hasNext()) 219 { 220 TextPiece tp = (TextPiece)textIt.next(); 221 length += tp.characterLength(); 222 } 223 return length; 224 } 225 226 public ListTables getListTables() 227 { 228 return _lt; 229 } 230 237 public void write(OutputStream out) 238 throws IOException 239 { 240 HWPFFileSystem docSys = new HWPFFileSystem(); 242 HWPFOutputStream mainStream = docSys.getStream("WordDocument"); 243 HWPFOutputStream tableStream = docSys.getStream("1Table"); 244 int tableOffset = 0; 246 247 _fib.clearOffsetsSizes(); 250 251 int fibSize = _fib.getSize(); 253 fibSize += POIFSConstants.BIG_BLOCK_SIZE - 254 (fibSize % POIFSConstants.BIG_BLOCK_SIZE); 255 256 byte[] placeHolder = new byte[fibSize]; 259 mainStream.write(placeHolder); 260 int mainOffset = mainStream.getOffset(); 261 262 _fib.setFcStshf(tableOffset); 264 _ss.writeTo(tableStream); 265 _fib.setLcbStshf(tableStream.getOffset() - tableOffset); 266 tableOffset = tableStream.getOffset(); 267 268 int fcMin = mainOffset; 271 272 _fib.setFcClx(tableOffset); 274 _cft.writeTo(docSys); 275 _fib.setLcbClx(tableStream.getOffset() - tableOffset); 276 tableOffset = tableStream.getOffset(); 277 int fcMac = mainStream.getOffset(); 278 279 _fib.setFcPlcfbteChpx(tableOffset); 281 _cbt.writeTo(docSys, fcMin); 282 _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset); 283 tableOffset = tableStream.getOffset(); 284 285 _fib.setFcPlcfbtePapx(tableOffset); 287 _pbt.writeTo(docSys, fcMin); 288 _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset); 289 tableOffset = tableStream.getOffset(); 290 291 _fib.setFcPlcfsed(tableOffset); 293 _st.writeTo(docSys, fcMin); 294 _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset); 295 tableOffset = tableStream.getOffset(); 296 297 if (_lt != null) 299 { 300 _fib.setFcPlcfLst(tableOffset); 301 _lt.writeListDataTo(tableStream); 302 _fib.setLcbPlcfLst(tableStream.getOffset() - tableOffset); 303 304 _fib.setFcPlfLfo(tableStream.getOffset()); 305 _lt.writeListOverridesTo(tableStream); 306 _fib.setLcbPlfLfo(tableStream.getOffset() - tableOffset); 307 tableOffset = tableStream.getOffset(); 308 } 309 310 _fib.setFcSttbfffn(tableOffset); 312 _ft.writeTo(docSys); 313 _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset); 314 tableOffset = tableStream.getOffset(); 315 316 _fib.setFcDop(tableOffset); 318 byte[] buf = new byte[_dop.getSize()]; 319 _fib.setLcbDop(_dop.getSize()); 320 _dop.serialize(buf, 0); 321 tableStream.write(buf); 322 323 _fib.setFcMin(fcMin); 325 _fib.setFcMac(fcMac); 326 _fib.setCbMac(mainStream.getOffset()); 327 328 byte[] mainBuf = mainStream.toByteArray(); 330 if (mainBuf.length < 4096) 331 { 332 byte[] tempBuf = new byte[4096]; 333 System.arraycopy(mainBuf, 0, tempBuf, 0, mainBuf.length); 334 mainBuf = tempBuf; 335 } 336 337 _fib.writeTo(mainBuf, tableStream); 340 341 byte[] tableBuf = tableStream.toByteArray(); 342 if (tableBuf.length < 4096) 343 { 344 byte[] tempBuf = new byte[4096]; 345 System.arraycopy(tableBuf, 0, tempBuf, 0, tableBuf.length); 346 tableBuf = tempBuf; 347 } 348 349 byte[] dataBuf = _dataStream; 350 if (dataBuf == null) 351 { 352 dataBuf = new byte[4096]; 353 } 354 if (dataBuf.length < 4096) 355 { 356 byte[] tempBuf = new byte[4096]; 357 System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length); 358 dataBuf = tempBuf; 359 } 360 361 362 POIFSFileSystem pfs = new POIFSFileSystem(); 364 pfs.createDocument(new ByteArrayInputStream (mainBuf), "WordDocument"); 365 pfs.createDocument(new ByteArrayInputStream (tableBuf), "1Table"); 366 pfs.createDocument(new ByteArrayInputStream (dataBuf), "Data"); 367 368 pfs.writeFilesystem(out); 369 } 370 371 public CHPBinTable getCharacterTable() 372 { 373 return _cbt; 374 } 375 376 public PAPBinTable getParagraphTable() 377 { 378 return _pbt; 379 } 380 381 public SectionTable getSectionTable() 382 { 383 return _st; 384 } 385 386 public TextPieceTable getTextTable() 387 { 388 return _cft.getTextPieceTable(); 389 } 390 391 public byte[] getDataStream() 392 { 393 return _dataStream; 394 } 395 396 public int registerList(HWPFList list) 397 { 398 if (_lt == null) 399 { 400 _lt = new ListTables(); 401 } 402 return _lt.addList(list.getListData(), list.getOverride()); 403 } 404 405 public FontTable getFontTable() 406 { 407 return _ft; 408 } 409 410 public void delete(int start, int length) 411 { 412 Range r = new Range(start, start + length, this); 413 r.delete(); 414 } 415 416 421 public static void main(String [] args) 422 { 423 424 try 425 { 426 HWPFDocument doc = new HWPFDocument(new FileInputStream (args[0])); 427 Range r = doc.getRange(); 428 String str = r.text(); 429 int x = 0; 430 444 445 } 446 catch (Throwable t) 447 { 448 t.printStackTrace(); 449 } 450 } 451 452 } 465 | Popular Tags |