KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > index > IndexWriter


1 package org.apache.lucene.index;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.IOException JavaDoc;
20 import java.io.File JavaDoc;
21 import java.io.PrintStream JavaDoc;
22 import java.util.Vector JavaDoc;
23
24 import org.apache.lucene.store.Directory;
25 import org.apache.lucene.store.RAMDirectory;
26 import org.apache.lucene.store.FSDirectory;
27 import org.apache.lucene.store.Lock;
28 import org.apache.lucene.store.IndexInput;
29 import org.apache.lucene.store.IndexOutput;
30 import org.apache.lucene.search.Similarity;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.analysis.Analyzer;
33
34
35 /**
36   An IndexWriter creates and maintains an index.
37
38   The third argument to the
39   <a HREF="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a>
40   determines whether a new index is created, or whether an existing index is
41   opened for the addition of new documents.
42
43   In either case, documents are added with the <a
44   href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a> method.
45   When finished adding documents, <a HREF="#close()"><b>close</b></a> should be called.
46
47   <p>If an index will not have more documents added for a while and optimal search
48   performance is desired, then the <a HREF="#optimize()"><b>optimize</b></a>
49   method should be called before the index is closed.
50   
51   <p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open
52   another IndexWriter on the same directory will lead to an IOException. The IOException
53   is also thrown if an IndexReader on the same directory is used to delete documents
54   from the index.
55   
56   @see IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion
57   */

58
59 public class IndexWriter {
60
61   /**
62    * Default value is 1,000.
63    */

64   public final static long WRITE_LOCK_TIMEOUT = 1000;
65
66   /**
67    * Default value is 10,000.
68    */

69   public final static long COMMIT_LOCK_TIMEOUT = 10000;
70
71   public static final String JavaDoc WRITE_LOCK_NAME = "write.lock";
72   public static final String JavaDoc COMMIT_LOCK_NAME = "commit.lock";
73
74   /**
75    * Default value is 10. Change using {@link #setMergeFactor(int)}.
76    */

77   public final static int DEFAULT_MERGE_FACTOR = 10;
78
79   /**
80    * Default value is 10. Change using {@link #setMaxBufferedDocs(int)}.
81    */

82   public final static int DEFAULT_MAX_BUFFERED_DOCS = 10;
83
84   /**
85    * @deprecated use {@link #DEFAULT_MAX_BUFFERED_DOCS} instead
86    */

87   public final static int DEFAULT_MIN_MERGE_DOCS = DEFAULT_MAX_BUFFERED_DOCS;
88
89   /**
90    * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}.
91    */

92   public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
93
94   /**
95    * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
96    */

97   public final static int DEFAULT_MAX_FIELD_LENGTH = 10000;
98
99   /**
100    * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
101    */

102   public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
103   
104   private Directory directory; // where this index resides
105
private Analyzer analyzer; // how to analyze text
106

107   private Similarity similarity = Similarity.getDefault(); // how to normalize
108

109   private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
110
private final Directory ramDirectory = new RAMDirectory(); // for temp segs
111

112   private Lock writeLock;
113
114   private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
115
116   /** Use compound file setting. Defaults to true, minimizing the number of
117    * files used. Setting this to false may improve indexing performance, but
118    * may also cause file handle problems.
119    */

120   private boolean useCompoundFile = true;
121   
122   private boolean closeDir;
123
124   /** Get the current setting of whether to use the compound file format.
125    * Note that this just returns the value you set with setUseCompoundFile(boolean)
126    * or the default. You cannot use this to query the status of an existing index.
127    * @see #setUseCompoundFile(boolean)
128    */

129   public boolean getUseCompoundFile() {
130     return useCompoundFile;
131   }
132
133   /** Setting to turn on usage of a compound file. When on, multiple files
134    * for each segment are merged into a single file once the segment creation
135    * is finished. This is done regardless of what directory is in use.
136    */

137   public void setUseCompoundFile(boolean value) {
138     useCompoundFile = value;
139   }
140
141   /** Expert: Set the Similarity implementation used by this IndexWriter.
142    *
143    * @see Similarity#setDefault(Similarity)
144    */

145   public void setSimilarity(Similarity similarity) {
146     this.similarity = similarity;
147   }
148
149   /** Expert: Return the Similarity implementation used by this IndexWriter.
150    *
151    * <p>This defaults to the current value of {@link Similarity#getDefault()}.
152    */

153   public Similarity getSimilarity() {
154     return this.similarity;
155   }
156
157   /** Expert: Set the interval between indexed terms. Large values cause less
158    * memory to be used by IndexReader, but slow random-access to terms. Small
159    * values cause more memory to be used by an IndexReader, and speed
160    * random-access to terms.
161    *
162    * This parameter determines the amount of computation required per query
163    * term, regardless of the number of documents that contain that term. In
164    * particular, it is the maximum number of other terms that must be
165    * scanned before a term is located and its frequency and position information
166    * may be processed. In a large index with user-entered query terms, query
167    * processing time is likely to be dominated not by term lookup but rather
168    * by the processing of frequency and positional data. In a small index
169    * or when many uncommon query terms are generated (e.g., by wildcard
170    * queries) term lookup may become a dominant cost.
171    *
172    * In particular, <code>numUniqueTerms/interval</code> terms are read into
173    * memory by an IndexReader, and, on average, <code>interval/2</code> terms
174    * must be scanned for each random term access.
175    *
176    * @see #DEFAULT_TERM_INDEX_INTERVAL
177    */

178   public void setTermIndexInterval(int interval) {
179     this.termIndexInterval = interval;
180   }
181
182   /** Expert: Return the interval between indexed terms.
183    *
184    * @see #setTermIndexInterval(int)
185    */

186   public int getTermIndexInterval() { return termIndexInterval; }
187
188   /**
189    * Constructs an IndexWriter for the index in <code>path</code>.
190    * Text will be analyzed with <code>a</code>. If <code>create</code>
191    * is true, then a new, empty index will be created in
192    * <code>path</code>, replacing the index already there, if any.
193    *
194    * @param path the path to the index directory
195    * @param a the analyzer to use
196    * @param create <code>true</code> to create the index or overwrite
197    * the existing one; <code>false</code> to append to the existing
198    * index
199    * @throws IOException if the directory cannot be read/written to, or
200    * if it does not exist, and <code>create</code> is
201    * <code>false</code>
202    */

203   public IndexWriter(String JavaDoc path, Analyzer a, boolean create)
204        throws IOException JavaDoc {
205     this(FSDirectory.getDirectory(path, create), a, create, true);
206   }
207
208   /**
209    * Constructs an IndexWriter for the index in <code>path</code>.
210    * Text will be analyzed with <code>a</code>. If <code>create</code>
211    * is true, then a new, empty index will be created in
212    * <code>path</code>, replacing the index already there, if any.
213    *
214    * @param path the path to the index directory
215    * @param a the analyzer to use
216    * @param create <code>true</code> to create the index or overwrite
217    * the existing one; <code>false</code> to append to the existing
218    * index
219    * @throws IOException if the directory cannot be read/written to, or
220    * if it does not exist, and <code>create</code> is
221    * <code>false</code>
222    */

223   public IndexWriter(File JavaDoc path, Analyzer a, boolean create)
224        throws IOException JavaDoc {
225     this(FSDirectory.getDirectory(path, create), a, create, true);
226   }
227
228   /**
229    * Constructs an IndexWriter for the index in <code>d</code>.
230    * Text will be analyzed with <code>a</code>. If <code>create</code>
231    * is true, then a new, empty index will be created in
232    * <code>d</code>, replacing the index already there, if any.
233    *
234    * @param d the index directory
235    * @param a the analyzer to use
236    * @param create <code>true</code> to create the index or overwrite
237    * the existing one; <code>false</code> to append to the existing
238    * index
239    * @throws IOException if the directory cannot be read/written to, or
240    * if it does not exist, and <code>create</code> is
241    * <code>false</code>
242    */

243   public IndexWriter(Directory d, Analyzer a, boolean create)
244        throws IOException JavaDoc {
245     this(d, a, create, false);
246   }
247   
248   private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir)
249     throws IOException JavaDoc {
250       this.closeDir = closeDir;
251       directory = d;
252       analyzer = a;
253
254       Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME);
255       if (!writeLock.obtain(WRITE_LOCK_TIMEOUT)) // obtain write lock
256
throw new IOException JavaDoc("Index locked for write: " + writeLock);
257       this.writeLock = writeLock; // save it
258

259       synchronized (directory) { // in- & inter-process sync
260
new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) {
261             public Object JavaDoc doBody() throws IOException JavaDoc {
262               if (create)
263                 segmentInfos.write(directory);
264               else
265                 segmentInfos.read(directory);
266               return null;
267             }
268           }.run();
269       }
270   }
271
272   /** Determines the largest number of documents ever merged by addDocument().
273    * Small values (e.g., less than 10,000) are best for interactive indexing,
274    * as this limits the length of pauses while indexing to a few seconds.
275    * Larger values are best for batched indexing and speedier searches.
276    *
277    * <p>The default value is {@link Integer#MAX_VALUE}.
278    */

279   public void setMaxMergeDocs(int maxMergeDocs) {
280     this.maxMergeDocs = maxMergeDocs;
281   }
282
283   /**
284    * @see #setMaxMergeDocs
285    */

286   public int getMaxMergeDocs() {
287     return maxMergeDocs;
288   }
289
290   /**
291    * The maximum number of terms that will be indexed for a single field in a
292    * document. This limits the amount of memory required for indexing, so that
293    * collections with very large files will not crash the indexing process by
294    * running out of memory.<p/>
295    * Note that this effectively truncates large documents, excluding from the
296    * index terms that occur further in the document. If you know your source
297    * documents are large, be sure to set this value high enough to accomodate
298    * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
299    * is your memory, but you should anticipate an OutOfMemoryError.<p/>
300    * By default, no more than 10,000 terms will be indexed for a field.
301    */

302   public void setMaxFieldLength(int maxFieldLength) {
303     this.maxFieldLength = maxFieldLength;
304   }
305   
306   /**
307    * @see #setMaxFieldLength
308    */

309   public int getMaxFieldLength() {
310     return maxFieldLength;
311   }
312
313   /** Determines the minimal number of documents required before the buffered
314    * in-memory documents are merging and a new Segment is created.
315    * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},
316    * large value gives faster indexing. At the same time, mergeFactor limits
317    * the number of files open in a FSDirectory.
318    *
319    * <p> The default value is 10.
320    *
321    * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2
322    */

323   public void setMaxBufferedDocs(int maxBufferedDocs) {
324     if (maxBufferedDocs < 2)
325       throw new IllegalArgumentException JavaDoc("maxBufferedDocs must at least be 2");
326     this.minMergeDocs = maxBufferedDocs;
327   }
328
329   /**
330    * @see #setMaxBufferedDocs
331    */

332   public int getMaxBufferedDocs() {
333     return minMergeDocs;
334   }
335
336   /** Determines how often segment indices are merged by addDocument(). With
337    * smaller values, less RAM is used while indexing, and searches on
338    * unoptimized indices are faster, but indexing speed is slower. With larger
339    * values, more RAM is used during indexing, and while searches on unoptimized
340    * indices are slower, indexing is faster. Thus larger values (> 10) are best
341    * for batch index creation, and smaller values (< 10) for indices that are
342    * interactively maintained.
343    *
344    * <p>This must never be less than 2. The default value is 10.
345    */

346   public void setMergeFactor(int mergeFactor) {
347     if (mergeFactor < 2)
348       throw new IllegalArgumentException JavaDoc("mergeFactor cannot be less than 2");
349     this.mergeFactor = mergeFactor;
350   }
351
352   /**
353    * @see #setMergeFactor
354    */

355   public int getMergeFactor() {
356     return mergeFactor;
357   }
358
359   /** If non-null, information about merges and a message when
360    * maxFieldLength is reached will be printed to this.
361    */

362   public void setInfoStream(PrintStream JavaDoc infoStream) {
363     this.infoStream = infoStream;
364   }
365
366   /**
367    * @see #setInfoStream
368    */

369   public PrintStream JavaDoc getInfoStream() {
370     return infoStream;
371   }
372
373   /** Flushes all changes to an index and closes all associated files. */
374   public synchronized void close() throws IOException JavaDoc {
375     flushRamSegments();
376     ramDirectory.close();
377     if (writeLock != null) {
378       writeLock.release(); // release write lock
379
writeLock = null;
380     }
381     if(closeDir)
382       directory.close();
383   }
384
385   /** Release the write lock, if needed. */
386   protected void finalize() throws IOException JavaDoc {
387     if (writeLock != null) {
388       writeLock.release(); // release write lock
389
writeLock = null;
390     }
391   }
392
393   /** Returns the Directory used by this index. */
394   public Directory getDirectory() {
395       return directory;
396   }
397
398   /** Returns the analyzer used by this index. */
399   public Analyzer getAnalyzer() {
400       return analyzer;
401   }
402
403
404   /** Returns the number of documents currently in this index. */
405   public synchronized int docCount() {
406     int count = 0;
407     for (int i = 0; i < segmentInfos.size(); i++) {
408       SegmentInfo si = segmentInfos.info(i);
409       count += si.docCount;
410     }
411     return count;
412   }
413
414   /**
415    * The maximum number of terms that will be indexed for a single field in a
416    * document. This limits the amount of memory required for indexing, so that
417    * collections with very large files will not crash the indexing process by
418    * running out of memory.<p/>
419    * Note that this effectively truncates large documents, excluding from the
420    * index terms that occur further in the document. If you know your source
421    * documents are large, be sure to set this value high enough to accomodate
422    * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
423    * is your memory, but you should anticipate an OutOfMemoryError.<p/>
424    * By default, no more than 10,000 terms will be indexed for a field.
425    *
426    * @deprecated use {@link #setMaxFieldLength} instead
427    */

428   public int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
429
430   /**
431    * Adds a document to this index. If the document contains more than
432    * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
433    * discarded.
434    */

435   public void addDocument(Document doc) throws IOException JavaDoc {
436     addDocument(doc, analyzer);
437   }
438
439   /**
440    * Adds a document to this index, using the provided analyzer instead of the
441    * value of {@link #getAnalyzer()}. If the document contains more than
442    * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
443    * discarded.
444    */

445   public void addDocument(Document doc, Analyzer analyzer) throws IOException JavaDoc {
446     DocumentWriter dw =
447       new DocumentWriter(ramDirectory, analyzer, this);
448     dw.setInfoStream(infoStream);
449     String JavaDoc segmentName = newSegmentName();
450     dw.addDocument(segmentName, doc);
451     synchronized (this) {
452       segmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory));
453       maybeMergeSegments();
454     }
455   }
456
457   final int getSegmentsCounter(){
458     return segmentInfos.counter;
459   }
460   
461   private final synchronized String JavaDoc newSegmentName() {
462     return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
463   }
464
465   /** Determines how often segment indices are merged by addDocument(). With
466    * smaller values, less RAM is used while indexing, and searches on
467    * unoptimized indices are faster, but indexing speed is slower. With larger
468    * values, more RAM is used during indexing, and while searches on unoptimized
469    * indices are slower, indexing is faster. Thus larger values (> 10) are best
470    * for batch index creation, and smaller values (< 10) for indices that are
471    * interactively maintained.
472    *
473    * <p>This must never be less than 2. The default value is 10.
474    * @deprecated use {@link #setMergeFactor} instead
475    */

476   public int mergeFactor = DEFAULT_MERGE_FACTOR;
477
478   /** Determines the minimal number of documents required before the buffered
479    * in-memory documents are merging and a new Segment is created.
480    * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},
481    * large value gives faster indexing. At the same time, mergeFactor limits
482    * the number of files open in a FSDirectory.
483    *
484    * <p> The default value is 10.
485    * @deprecated use {@link #setMaxBufferedDocs} instead
486    */

487   public int minMergeDocs = DEFAULT_MIN_MERGE_DOCS;
488
489
490   /** Determines the largest number of documents ever merged by addDocument().
491    * Small values (e.g., less than 10,000) are best for interactive indexing,
492    * as this limits the length of pauses while indexing to a few seconds.
493    * Larger values are best for batched indexing and speedier searches.
494    *
495    * <p>The default value is {@link Integer#MAX_VALUE}.
496    * @deprecated use {@link #setMaxMergeDocs} instead
497    */

498   public int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
499
500   /** If non-null, information about merges will be printed to this.
501    * @deprecated use {@link #setInfoStream} instead
502    */

503   public PrintStream JavaDoc infoStream = null;
504
505   /** Merges all segments together into a single segment, optimizing an index
506       for search. */

507   public synchronized void optimize() throws IOException JavaDoc {
508     flushRamSegments();
509     while (segmentInfos.size() > 1 ||
510            (segmentInfos.size() == 1 &&
511             (SegmentReader.hasDeletions(segmentInfos.info(0)) ||
512              segmentInfos.info(0).dir != directory ||
513              (useCompoundFile &&
514               (!SegmentReader.usesCompoundFile(segmentInfos.info(0)) ||
515                 SegmentReader.hasSeparateNorms(segmentInfos.info(0))))))) {
516       int minSegment = segmentInfos.size() - mergeFactor;
517       mergeSegments(minSegment < 0 ? 0 : minSegment);
518     }
519   }
520
521   /** Merges all segments from an array of indexes into this index.
522    *
523    * <p>This may be used to parallelize batch indexing. A large document
524    * collection can be broken into sub-collections. Each sub-collection can be
525    * indexed in parallel, on a different thread, process or machine. The
526    * complete index can then be created by merging sub-collection indexes
527    * with this method.
528    *
529    * <p>After this completes, the index is optimized. */

530   public synchronized void addIndexes(Directory[] dirs)
531       throws IOException JavaDoc {
532     optimize(); // start with zero or 1 seg
533

534     int start = segmentInfos.size();
535
536     for (int i = 0; i < dirs.length; i++) {
537       SegmentInfos sis = new SegmentInfos(); // read infos from dir
538
sis.read(dirs[i]);
539       for (int j = 0; j < sis.size(); j++) {
540         segmentInfos.addElement(sis.info(j)); // add each info
541
}
542     }
543     
544     // merge newly added segments in log(n) passes
545
while (segmentInfos.size() > start+mergeFactor) {
546       for (int base = start+1; base < segmentInfos.size(); base++) {
547         int end = Math.min(segmentInfos.size(), base+mergeFactor);
548         if (end-base > 1)
549           mergeSegments(base, end);
550       }
551     }
552
553     optimize(); // final cleanup
554
}
555
556   /** Merges the provided indexes into this index.
557    * <p>After this completes, the index is optimized. </p>
558    * <p>The provided IndexReaders are not closed.</p>
559    */

560   public synchronized void addIndexes(IndexReader[] readers)
561     throws IOException JavaDoc {
562
563     optimize(); // start with zero or 1 seg
564

565     final String JavaDoc mergedName = newSegmentName();
566     SegmentMerger merger = new SegmentMerger(this, mergedName);
567
568     final Vector JavaDoc segmentsToDelete = new Vector JavaDoc();
569     IndexReader sReader = null;
570     if (segmentInfos.size() == 1){ // add existing index, if any
571
sReader = SegmentReader.get(segmentInfos.info(0));
572         merger.add(sReader);
573         segmentsToDelete.addElement(sReader); // queue segment for deletion
574
}
575       
576     for (int i = 0; i < readers.length; i++) // add new indexes
577
merger.add(readers[i]);
578
579     int docCount = merger.merge(); // merge 'em
580

581     segmentInfos.setSize(0); // pop old infos & add new
582
segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory));
583     
584     if(sReader != null)
585         sReader.close();
586
587     synchronized (directory) { // in- & inter-process sync
588
new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) {
589       public Object JavaDoc doBody() throws IOException JavaDoc {
590         segmentInfos.write(directory); // commit changes
591
deleteSegments(segmentsToDelete); // delete now-unused segments
592
return null;
593       }
594     }.run();
595     }
596     
597     if (useCompoundFile) {
598       final Vector JavaDoc filesToDelete = merger.createCompoundFile(mergedName + ".tmp");
599       synchronized (directory) { // in- & inter-process sync
600
new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) {
601           public Object JavaDoc doBody() throws IOException JavaDoc {
602             // make compound file visible for SegmentReaders
603
directory.renameFile(mergedName + ".tmp", mergedName + ".cfs");
604             // delete now unused files of segment
605
deleteFiles(filesToDelete);
606             return null;
607           }
608         }.run();
609       }
610     }
611   }
612
613   /** Merges all RAM-resident segments. */
614   private final void flushRamSegments() throws IOException JavaDoc {
615     int minSegment = segmentInfos.size()-1;
616     int docCount = 0;
617     while (minSegment >= 0 &&
618            (segmentInfos.info(minSegment)).dir == ramDirectory) {
619       docCount += segmentInfos.info(minSegment).docCount;
620       minSegment--;
621     }
622     if (minSegment < 0 || // add one FS segment?
623
(docCount + segmentInfos.info(minSegment).docCount) > mergeFactor ||
624         !(segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory))
625       minSegment++;
626     if (minSegment >= segmentInfos.size())
627       return; // none to merge
628
mergeSegments(minSegment);
629   }
630
631   /** Incremental segment merger. */
632   private final void maybeMergeSegments() throws IOException JavaDoc {
633     long targetMergeDocs = minMergeDocs;
634     while (targetMergeDocs <= maxMergeDocs) {
635       // find segments smaller than current target size
636
int minSegment = segmentInfos.size();
637       int mergeDocs = 0;
638       while (--minSegment >= 0) {
639         SegmentInfo si = segmentInfos.info(minSegment);
640         if (si.docCount >= targetMergeDocs)
641           break;
642         mergeDocs += si.docCount;
643       }
644
645       if (mergeDocs >= targetMergeDocs) // found a merge to do
646
mergeSegments(minSegment+1);
647       else
648         break;
649
650       targetMergeDocs *= mergeFactor; // increase target size
651
}
652   }
653
654   /** Pops segments off of segmentInfos stack down to minSegment, merges them,
655     and pushes the merged index onto the top of the segmentInfos stack. */

656   private final void mergeSegments(int minSegment)
657       throws IOException JavaDoc {
658     mergeSegments(minSegment, segmentInfos.size());
659   }
660
661   /** Merges the named range of segments, replacing them in the stack with a
662    * single segment. */

663   private final void mergeSegments(int minSegment, int end)
664     throws IOException JavaDoc {
665     final String JavaDoc mergedName = newSegmentName();
666     if (infoStream != null) infoStream.print("merging segments");
667     SegmentMerger merger = new SegmentMerger(this, mergedName);
668
669     final Vector JavaDoc segmentsToDelete = new Vector JavaDoc();
670     for (int i = minSegment; i < end; i++) {
671       SegmentInfo si = segmentInfos.info(i);
672       if (infoStream != null)
673         infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
674       IndexReader reader = SegmentReader.get(si);
675       merger.add(reader);
676       if ((reader.directory() == this.directory) || // if we own the directory
677
(reader.directory() == this.ramDirectory))
678         segmentsToDelete.addElement(reader); // queue segment for deletion
679
}
680
681     int mergedDocCount = merger.merge();
682
683     if (infoStream != null) {
684       infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
685     }
686
687     for (int i = end-1; i >= minSegment; i--) // remove old infos & add new
688
segmentInfos.remove(i);
689     segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
690                                             directory));
691
692     // close readers before we attempt to delete now-obsolete segments
693
merger.closeReaders();
694
695     synchronized (directory) { // in- & inter-process sync
696
new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) {
697           public Object JavaDoc doBody() throws IOException JavaDoc {
698             segmentInfos.write(directory); // commit before deleting
699
deleteSegments(segmentsToDelete); // delete now-unused segments
700
return null;
701           }
702         }.run();
703     }
704     
705     if (useCompoundFile) {
706       final Vector JavaDoc filesToDelete = merger.createCompoundFile(mergedName + ".tmp");
707       synchronized (directory) { // in- & inter-process sync
708
new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) {
709           public Object JavaDoc doBody() throws IOException JavaDoc {
710             // make compound file visible for SegmentReaders
711
directory.renameFile(mergedName + ".tmp", mergedName + ".cfs");
712             // delete now unused files of segment
713
deleteFiles(filesToDelete);
714             return null;
715           }
716         }.run();
717       }
718     }
719   }
720
721   /*
722    * Some operating systems (e.g. Windows) don't permit a file to be deleted
723    * while it is opened for read (e.g. by another process or thread). So we
724    * assume that when a delete fails it is because the file is open in another
725    * process, and queue the file for subsequent deletion.
726    */

727
728   private final void deleteSegments(Vector JavaDoc segments) throws IOException JavaDoc {
729     Vector JavaDoc deletable = new Vector JavaDoc();
730
731     deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable
732

733     for (int i = 0; i < segments.size(); i++) {
734       SegmentReader reader = (SegmentReader)segments.elementAt(i);
735       if (reader.directory() == this.directory)
736         deleteFiles(reader.files(), deletable); // try to delete our files
737
else
738         deleteFiles(reader.files(), reader.directory()); // delete other files
739
}
740
741     writeDeleteableFiles(deletable); // note files we can't delete
742
}
743   
744   private final void deleteFiles(Vector JavaDoc files) throws IOException JavaDoc {
745     Vector JavaDoc deletable = new Vector JavaDoc();
746     deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable
747
deleteFiles(files, deletable); // try to delete our files
748
writeDeleteableFiles(deletable); // note files we can't delete
749
}
750
751   private final void deleteFiles(Vector JavaDoc files, Directory directory)
752        throws IOException JavaDoc {
753     for (int i = 0; i < files.size(); i++)
754       directory.deleteFile((String JavaDoc)files.elementAt(i));
755   }
756
757   private final void deleteFiles(Vector JavaDoc files, Vector JavaDoc deletable)
758        throws IOException JavaDoc {
759     for (int i = 0; i < files.size(); i++) {
760       String JavaDoc file = (String JavaDoc)files.elementAt(i);
761       try {
762         directory.deleteFile(file); // try to delete each file
763
} catch (IOException JavaDoc e) { // if delete fails
764
if (directory.fileExists(file)) {
765           if (infoStream != null)
766             infoStream.println(e.toString() + "; Will re-try later.");
767           deletable.addElement(file); // add to deletable
768
}
769       }
770     }
771   }
772
773   private final Vector JavaDoc readDeleteableFiles() throws IOException JavaDoc {
774     Vector JavaDoc result = new Vector JavaDoc();
775     if (!directory.fileExists(IndexFileNames.DELETABLE))
776       return result;
777
778     IndexInput input = directory.openInput(IndexFileNames.DELETABLE);
779     try {
780       for (int i = input.readInt(); i > 0; i--) // read file names
781
result.addElement(input.readString());
782     } finally {
783       input.close();
784     }
785     return result;
786   }
787
788   private final void writeDeleteableFiles(Vector JavaDoc files) throws IOException JavaDoc {
789     IndexOutput output = directory.createOutput("deleteable.new");
790     try {
791       output.writeInt(files.size());
792       for (int i = 0; i < files.size(); i++)
793         output.writeString((String JavaDoc)files.elementAt(i));
794     } finally {
795       output.close();
796     }
797     directory.renameFile("deleteable.new", IndexFileNames.DELETABLE);
798   }
799 }
800
Popular Tags