1 23 package org.archive.util.ms; 24 25 26 import java.io.File ; 27 import java.io.IOException ; 28 import java.io.RandomAccessFile ; 29 import java.util.List ; 30 import java.util.logging.Level ; 31 import java.util.logging.Logger ; 32 33 import org.archive.io.Endian; 34 import org.archive.io.RandomAccessInputStream; 35 import org.archive.io.SeekInputStream; 36 import org.archive.io.SeekReader; 37 38 39 44 public class Doc { 45 46 47 final private static Logger LOGGER = Logger.getLogger(Doc.class.getName()); 48 49 50 53 private Doc() { 54 } 55 56 57 64 public static SeekReader getText(String docFilename) throws IOException { 65 return getText(new File (docFilename)); 66 } 67 68 69 76 public static SeekReader getText(File doc) throws IOException { 77 RandomAccessFile raf = new RandomAccessFile (doc, "r"); 78 RandomAccessInputStream rais = new RandomAccessInputStream(raf); 79 return getText(rais); 80 } 81 82 83 90 public static SeekReader getText(SeekInputStream doc) throws IOException { 91 BlockFileSystem bfs = new DefaultBlockFileSystem(doc, 16); 92 return getText(bfs, 20); 93 } 94 95 96 110 public static SeekReader getText(BlockFileSystem wordDoc, int cacheSize) 111 throws IOException { 112 List <Entry> entries = wordDoc.getRoot().list(); 113 Entry main = find(entries, "WordDocument"); 114 SeekInputStream mainStream = main.open(); 115 116 mainStream.position(10); 117 int flags = Endian.littleChar(mainStream); 118 boolean complex = (flags & 0x0004) == 0x0004; 119 boolean tableOne = (flags & 0x0200) == 0x0200; 120 String tableName = tableOne ? "1Table" : "0Table"; 121 Entry table = find(entries, tableName); 122 if (LOGGER.isLoggable(Level.FINEST)) { 123 LOGGER.finest("Main entry: " + main); 124 LOGGER.finest("Table entry: " + table); 125 } 126 SeekInputStream tableStream = table.open(); 127 128 mainStream.position(24); 129 int fcMin = Endian.littleInt(mainStream); 130 int fcMax = Endian.littleInt(mainStream); 131 132 mainStream.position(76); 133 int cppText = Endian.littleInt(mainStream); 134 135 mainStream.position(418); 136 int fcClx = Endian.littleInt(mainStream); 137 int fcSz = Endian.littleInt(mainStream); 138 139 if (LOGGER.isLoggable(Level.FINE)) { 140 LOGGER.fine("fcMin: " + fcMin); 141 LOGGER.fine("fcMax: " + fcMax); 142 LOGGER.fine("FcClx: " + fcClx); 143 LOGGER.fine("szClx: " + fcSz); 144 LOGGER.fine("complex: " + complex); 145 LOGGER.fine("cppText: " + cppText); 146 } 147 PieceTable pt = new PieceTable(tableStream, fcClx, fcMax - fcMin, cacheSize); 148 return new PieceReader(pt, mainStream); 149 } 150 151 152 private static Entry find(List <Entry> entries, String name) { 153 for (Entry e: entries) { 154 if (e.getName().equals(name)) { 155 return e; 156 } 157 } 158 return null; 159 } 160 161 } 162 | Popular Tags |