1 package org.apache.lucene.index; 2 3 18 19 import org.apache.lucene.store.Directory; 20 import org.apache.lucene.store.IndexOutput; 21 import org.apache.lucene.util.StringHelper; 22 23 import java.io.IOException ; 24 import java.util.Vector ; 25 26 52 final class TermVectorsWriter { 53 static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1; 54 static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2; 55 56 static final int FORMAT_VERSION = 2; 57 static final int FORMAT_SIZE = 4; 59 60 static final String TVX_EXTENSION = ".tvx"; 61 static final String TVD_EXTENSION = ".tvd"; 62 static final String TVF_EXTENSION = ".tvf"; 63 64 private IndexOutput tvx = null, tvd = null, tvf = null; 65 private Vector fields = null; 66 private Vector terms = null; 67 private FieldInfos fieldInfos; 68 69 private TVField currentField = null; 70 private long currentDocPointer = -1; 71 72 public TermVectorsWriter(Directory directory, String segment, 73 FieldInfos fieldInfos) 74 throws IOException { 75 tvx = directory.createOutput(segment + TVX_EXTENSION); 77 tvx.writeInt(FORMAT_VERSION); 78 tvd = directory.createOutput(segment + TVD_EXTENSION); 79 tvd.writeInt(FORMAT_VERSION); 80 tvf = directory.createOutput(segment + TVF_EXTENSION); 81 tvf.writeInt(FORMAT_VERSION); 82 83 this.fieldInfos = fieldInfos; 84 fields = new Vector (fieldInfos.size()); 85 terms = new Vector (); 86 } 87 88 89 public final void openDocument() 90 throws IOException { 91 closeDocument(); 92 currentDocPointer = tvd.getFilePointer(); 93 } 94 95 96 public final void closeDocument() 97 throws IOException { 98 if (isDocumentOpen()) { 99 closeField(); 100 writeDoc(); 101 fields.clear(); 102 currentDocPointer = -1; 103 } 104 } 105 106 107 public final boolean isDocumentOpen() { 108 return currentDocPointer != -1; 109 } 110 111 112 117 public final void openField(String field) throws IOException { 118 FieldInfo fieldInfo = fieldInfos.fieldInfo(field); 119 openField(fieldInfo.number, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector); 120 } 121 122 private void openField(int fieldNumber, boolean storePositionWithTermVector, 123 boolean storeOffsetWithTermVector) throws IOException { 124 if (!isDocumentOpen()) 125 throw new IllegalStateException ("Cannot open field when no document is open."); 126 closeField(); 127 currentField = new TVField(fieldNumber, storePositionWithTermVector, storeOffsetWithTermVector); 128 } 129 130 133 public final void closeField() 134 throws IOException { 135 if (isFieldOpen()) { 136 137 139 140 writeField(); 142 fields.add(currentField); 143 terms.clear(); 144 currentField = null; 145 } 146 } 147 148 149 public final boolean isFieldOpen() { 150 return currentField != null; 151 } 152 153 160 public final void addTerm(String termText, int freq) { 161 addTerm(termText, freq, null, null); 162 } 163 164 public final void addTerm(String termText, int freq, int [] positions, TermVectorOffsetInfo [] offsets) 165 { 166 if (!isDocumentOpen()) 167 throw new IllegalStateException ("Cannot add terms when document is not open"); 168 if (!isFieldOpen()) 169 throw new IllegalStateException ("Cannot add terms when field is not open"); 170 171 addTermInternal(termText, freq, positions, offsets); 172 } 173 174 private final void addTermInternal(String termText, int freq, int [] positions, TermVectorOffsetInfo [] offsets) { 175 TVTerm term = new TVTerm(); 176 term.termText = termText; 177 term.freq = freq; 178 term.positions = positions; 179 term.offsets = offsets; 180 terms.add(term); 181 } 182 183 190 public final void addAllDocVectors(TermFreqVector[] vectors) 191 throws IOException { 192 openDocument(); 193 194 if (vectors != null) { 195 for (int i = 0; i < vectors.length; i++) { 196 boolean storePositionWithTermVector = false; 197 boolean storeOffsetWithTermVector = false; 198 199 try { 200 201 TermPositionVector tpVector = (TermPositionVector) vectors[i]; 202 203 if (tpVector.size() > 0 && tpVector.getTermPositions(0) != null) 204 storePositionWithTermVector = true; 205 if (tpVector.size() > 0 && tpVector.getOffsets(0) != null) 206 storeOffsetWithTermVector = true; 207 208 FieldInfo fieldInfo = fieldInfos.fieldInfo(tpVector.getField()); 209 openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); 210 211 for (int j = 0; j < tpVector.size(); j++) 212 addTermInternal(tpVector.getTerms()[j], tpVector.getTermFrequencies()[j], tpVector.getTermPositions(j), 213 tpVector.getOffsets(j)); 214 215 closeField(); 216 217 } catch (ClassCastException ignore) { 218 219 TermFreqVector tfVector = vectors[i]; 220 221 FieldInfo fieldInfo = fieldInfos.fieldInfo(tfVector.getField()); 222 openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); 223 224 for (int j = 0; j < tfVector.size(); j++) 225 addTermInternal(tfVector.getTerms()[j], tfVector.getTermFrequencies()[j], null, null); 226 227 closeField(); 228 229 } 230 } 231 } 232 233 closeDocument(); 234 } 235 236 237 final void close() throws IOException { 238 try { 239 closeDocument(); 240 } finally { 241 IOException keep = null; 244 if (tvx != null) 245 try { 246 tvx.close(); 247 } catch (IOException e) { 248 if (keep == null) keep = e; 249 } 250 if (tvd != null) 251 try { 252 tvd.close(); 253 } catch (IOException e) { 254 if (keep == null) keep = e; 255 } 256 if (tvf != null) 257 try { 258 tvf.close(); 259 } catch (IOException e) { 260 if (keep == null) keep = e; 261 } 262 if (keep != null) throw (IOException ) keep.fillInStackTrace(); 263 } 264 } 265 266 267 268 private void writeField() throws IOException { 269 currentField.tvfPointer = tvf.getFilePointer(); 271 273 final int size = terms.size(); 274 tvf.writeVInt(size); 275 276 boolean storePositions = currentField.storePositions; 277 boolean storeOffsets = currentField.storeOffsets; 278 byte bits = 0x0; 279 if (storePositions) 280 bits |= STORE_POSITIONS_WITH_TERMVECTOR; 281 if (storeOffsets) 282 bits |= STORE_OFFSET_WITH_TERMVECTOR; 283 tvf.writeByte(bits); 284 285 String lastTermText = ""; 286 for (int i = 0; i < size; i++) { 287 TVTerm term = (TVTerm) terms.elementAt(i); 288 int start = StringHelper.stringDifference(lastTermText, term.termText); 289 int length = term.termText.length() - start; 290 tvf.writeVInt(start); tvf.writeVInt(length); tvf.writeChars(term.termText, start, length); tvf.writeVInt(term.freq); 294 lastTermText = term.termText; 295 296 if(storePositions){ 297 if(term.positions == null) 298 throw new IllegalStateException ("Trying to write positions that are null!"); 299 300 int position = 0; 302 for (int j = 0; j < term.freq; j++){ 303 tvf.writeVInt(term.positions[j] - position); 304 position = term.positions[j]; 305 } 306 } 307 308 if(storeOffsets){ 309 if(term.offsets == null) 310 throw new IllegalStateException ("Trying to write offsets that are null!"); 311 312 int position = 0; 314 for (int j = 0; j < term.freq; j++) { 315 tvf.writeVInt(term.offsets[j].getStartOffset() - position); 316 tvf.writeVInt(term.offsets[j].getEndOffset() - term.offsets[j].getStartOffset()); position = term.offsets[j].getEndOffset(); 318 } 319 } 320 } 321 } 322 323 private void writeDoc() throws IOException { 324 if (isFieldOpen()) 325 throw new IllegalStateException ("Field is still open while writing document"); 326 tvx.writeLong(currentDocPointer); 329 330 final int size = fields.size(); 332 333 tvd.writeVInt(size); 335 336 for (int i = 0; i < size; i++) { 338 TVField field = (TVField) fields.elementAt(i); 339 tvd.writeVInt(field.number); 340 } 341 342 long lastFieldPointer = 0; 344 for (int i = 0; i < size; i++) { 345 TVField field = (TVField) fields.elementAt(i); 346 tvd.writeVLong(field.tvfPointer - lastFieldPointer); 347 lastFieldPointer = field.tvfPointer; 348 } 349 } 351 352 353 private static class TVField { 354 int number; 355 long tvfPointer = 0; 356 boolean storePositions = false; 357 boolean storeOffsets = false; 358 TVField(int number, boolean storePos, boolean storeOff) { 359 this.number = number; 360 storePositions = storePos; 361 storeOffsets = storeOff; 362 } 363 } 364 365 private static class TVTerm { 366 String termText; 367 int freq = 0; 368 int positions[] = null; 369 TermVectorOffsetInfo [] offsets = null; 370 } 371 372 373 } 374 | Popular Tags |