1 package org.apache.lucene.index; 2 3 18 19 import java.util.Vector ; 20 import java.util.Iterator ; 21 import java.util.Collection ; 22 import java.io.IOException ; 23 24 import org.apache.lucene.store.Directory; 25 import org.apache.lucene.store.IndexOutput; 26 import org.apache.lucene.store.RAMOutputStream; 27 28 39 final class SegmentMerger { 40 private Directory directory; 41 private String segment; 42 private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL; 43 44 private Vector readers = new Vector (); 45 private FieldInfos fieldInfos; 46 47 52 SegmentMerger(Directory dir, String name) { 53 directory = dir; 54 segment = name; 55 } 56 57 SegmentMerger(IndexWriter writer, String name) { 58 directory = writer.getDirectory(); 59 segment = name; 60 termIndexInterval = writer.getTermIndexInterval(); 61 } 62 63 67 final void add(IndexReader reader) { 68 readers.addElement(reader); 69 } 70 71 76 final IndexReader segmentReader(int i) { 77 return (IndexReader) readers.elementAt(i); 78 } 79 80 85 final int merge() throws IOException { 86 int value; 87 88 value = mergeFields(); 89 mergeTerms(); 90 mergeNorms(); 91 92 if (fieldInfos.hasVectors()) 93 mergeVectors(); 94 95 return value; 96 } 97 98 103 final void closeReaders() throws IOException { 104 for (int i = 0; i < readers.size(); i++) { IndexReader reader = (IndexReader) readers.elementAt(i); 106 reader.close(); 107 } 108 } 109 110 final Vector createCompoundFile(String fileName) 111 throws IOException { 112 CompoundFileWriter cfsWriter = 113 new CompoundFileWriter(directory, fileName); 114 115 Vector files = 116 new Vector (IndexFileNames.COMPOUND_EXTENSIONS.length + fieldInfos.size()); 117 118 for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) { 120 files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]); 121 } 122 123 for (int i = 0; i < fieldInfos.size(); i++) { 125 FieldInfo fi = fieldInfos.fieldInfo(i); 126 if (fi.isIndexed && !fi.omitNorms) { 127 files.add(segment + ".f" + i); 128 } 129 } 130 131 if (fieldInfos.hasVectors()) { 133 for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.length; i++) { 134 files.add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]); 135 } 136 } 137 138 Iterator it = files.iterator(); 140 while (it.hasNext()) { 141 cfsWriter.addFile((String ) it.next()); 142 } 143 144 cfsWriter.close(); 146 147 return files; 148 } 149 150 private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean storeTermVectors, boolean storePositionWithTermVector, 151 boolean storeOffsetWithTermVector) throws IOException { 152 Iterator i = names.iterator(); 153 while (i.hasNext()) { 154 String field = (String )i.next(); 155 fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field)); 156 } 157 } 158 159 164 private final int mergeFields() throws IOException { 165 fieldInfos = new FieldInfos(); int docCount = 0; 167 for (int i = 0; i < readers.size(); i++) { 168 IndexReader reader = (IndexReader) readers.elementAt(i); 169 addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); 170 addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); 171 addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); 172 addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); 173 addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); 174 fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false); 175 } 176 fieldInfos.write(directory, segment + ".fnm"); 177 178 FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); 180 try { 181 for (int i = 0; i < readers.size(); i++) { 182 IndexReader reader = (IndexReader) readers.elementAt(i); 183 int maxDoc = reader.maxDoc(); 184 for (int j = 0; j < maxDoc; j++) 185 if (!reader.isDeleted(j)) { fieldsWriter.addDocument(reader.document(j)); 187 docCount++; 188 } 189 } 190 } finally { 191 fieldsWriter.close(); 192 } 193 return docCount; 194 } 195 196 200 private final void mergeVectors() throws IOException { 201 TermVectorsWriter termVectorsWriter = 202 new TermVectorsWriter(directory, segment, fieldInfos); 203 204 try { 205 for (int r = 0; r < readers.size(); r++) { 206 IndexReader reader = (IndexReader) readers.elementAt(r); 207 int maxDoc = reader.maxDoc(); 208 for (int docNum = 0; docNum < maxDoc; docNum++) { 209 if (reader.isDeleted(docNum)) 211 continue; 212 termVectorsWriter.addAllDocVectors(reader.getTermFreqVectors(docNum)); 213 } 214 } 215 } finally { 216 termVectorsWriter.close(); 217 } 218 } 219 220 private IndexOutput freqOutput = null; 221 private IndexOutput proxOutput = null; 222 private TermInfosWriter termInfosWriter = null; 223 private int skipInterval; 224 private SegmentMergeQueue queue = null; 225 226 private final void mergeTerms() throws IOException { 227 try { 228 freqOutput = directory.createOutput(segment + ".frq"); 229 proxOutput = directory.createOutput(segment + ".prx"); 230 termInfosWriter = 231 new TermInfosWriter(directory, segment, fieldInfos, 232 termIndexInterval); 233 skipInterval = termInfosWriter.skipInterval; 234 queue = new SegmentMergeQueue(readers.size()); 235 236 mergeTermInfos(); 237 238 } finally { 239 if (freqOutput != null) freqOutput.close(); 240 if (proxOutput != null) proxOutput.close(); 241 if (termInfosWriter != null) termInfosWriter.close(); 242 if (queue != null) queue.close(); 243 } 244 } 245 246 private final void mergeTermInfos() throws IOException { 247 int base = 0; 248 for (int i = 0; i < readers.size(); i++) { 249 IndexReader reader = (IndexReader) readers.elementAt(i); 250 TermEnum termEnum = reader.terms(); 251 SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader); 252 base += reader.numDocs(); 253 if (smi.next()) 254 queue.put(smi); else 256 smi.close(); 257 } 258 259 SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()]; 260 261 while (queue.size() > 0) { 262 int matchSize = 0; match[matchSize++] = (SegmentMergeInfo) queue.pop(); 264 Term term = match[0].term; 265 SegmentMergeInfo top = (SegmentMergeInfo) queue.top(); 266 267 while (top != null && term.compareTo(top.term) == 0) { 268 match[matchSize++] = (SegmentMergeInfo) queue.pop(); 269 top = (SegmentMergeInfo) queue.top(); 270 } 271 272 mergeTermInfo(match, matchSize); 274 while (matchSize > 0) { 275 SegmentMergeInfo smi = match[--matchSize]; 276 if (smi.next()) 277 queue.put(smi); else 279 smi.close(); } 281 } 282 } 283 284 private final TermInfo termInfo = new TermInfo(); 286 293 private final void mergeTermInfo(SegmentMergeInfo[] smis, int n) 294 throws IOException { 295 long freqPointer = freqOutput.getFilePointer(); 296 long proxPointer = proxOutput.getFilePointer(); 297 298 int df = appendPostings(smis, n); 300 long skipPointer = writeSkip(); 301 302 if (df > 0) { 303 termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); 305 termInfosWriter.add(smis[0].term, termInfo); 306 } 307 } 308 309 317 private final int appendPostings(SegmentMergeInfo[] smis, int n) 318 throws IOException { 319 int lastDoc = 0; 320 int df = 0; resetSkip(); 322 for (int i = 0; i < n; i++) { 323 SegmentMergeInfo smi = smis[i]; 324 TermPositions postings = smi.getPositions(); 325 int base = smi.base; 326 int[] docMap = smi.getDocMap(); 327 postings.seek(smi.termEnum); 328 while (postings.next()) { 329 int doc = postings.doc(); 330 if (docMap != null) 331 doc = docMap[doc]; doc += base; 334 if (doc < lastDoc) 335 throw new IllegalStateException ("docs out of order"); 336 337 df++; 338 339 if ((df % skipInterval) == 0) { 340 bufferSkip(lastDoc); 341 } 342 343 int docCode = (doc - lastDoc) << 1; lastDoc = doc; 345 346 int freq = postings.freq(); 347 if (freq == 1) { 348 freqOutput.writeVInt(docCode | 1); } else { 350 freqOutput.writeVInt(docCode); freqOutput.writeVInt(freq); } 353 354 int lastPosition = 0; for (int j = 0; j < freq; j++) { 356 int position = postings.nextPosition(); 357 proxOutput.writeVInt(position - lastPosition); 358 lastPosition = position; 359 } 360 } 361 } 362 return df; 363 } 364 365 private RAMOutputStream skipBuffer = new RAMOutputStream(); 366 private int lastSkipDoc; 367 private long lastSkipFreqPointer; 368 private long lastSkipProxPointer; 369 370 private void resetSkip() { 371 skipBuffer.reset(); 372 lastSkipDoc = 0; 373 lastSkipFreqPointer = freqOutput.getFilePointer(); 374 lastSkipProxPointer = proxOutput.getFilePointer(); 375 } 376 377 private void bufferSkip(int doc) throws IOException { 378 long freqPointer = freqOutput.getFilePointer(); 379 long proxPointer = proxOutput.getFilePointer(); 380 381 skipBuffer.writeVInt(doc - lastSkipDoc); 382 skipBuffer.writeVInt((int) (freqPointer - lastSkipFreqPointer)); 383 skipBuffer.writeVInt((int) (proxPointer - lastSkipProxPointer)); 384 385 lastSkipDoc = doc; 386 lastSkipFreqPointer = freqPointer; 387 lastSkipProxPointer = proxPointer; 388 } 389 390 private long writeSkip() throws IOException { 391 long skipPointer = freqOutput.getFilePointer(); 392 skipBuffer.writeTo(freqOutput); 393 return skipPointer; 394 } 395 396 private void mergeNorms() throws IOException { 397 for (int i = 0; i < fieldInfos.size(); i++) { 398 FieldInfo fi = fieldInfos.fieldInfo(i); 399 if (fi.isIndexed && !fi.omitNorms) { 400 IndexOutput output = directory.createOutput(segment + ".f" + i); 401 try { 402 for (int j = 0; j < readers.size(); j++) { 403 IndexReader reader = (IndexReader) readers.elementAt(j); 404 int maxDoc = reader.maxDoc(); 405 byte[] input = new byte[maxDoc]; 406 reader.norms(fi.name, input, 0); 407 for (int k = 0; k < maxDoc; k++) { 408 if (!reader.isDeleted(k)) { 409 output.writeByte(input[k]); 410 } 411 } 412 } 413 } finally { 414 output.close(); 415 } 416 } 417 } 418 } 419 420 } 421 | Popular Tags |