KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > index > IndexModifier


1 package org.apache.lucene.index;
2
3 /**
4  * Copyright 2005 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.File JavaDoc;
20 import java.io.IOException JavaDoc;
21 import java.io.PrintStream JavaDoc;
22
23 import org.apache.lucene.analysis.Analyzer;
24 import org.apache.lucene.document.Document;
25 import org.apache.lucene.store.Directory;
26 import org.apache.lucene.store.FSDirectory;
27
28 /**
29  * A class to modify an index, i.e. to delete and add documents. This
30  * class hides {@link IndexReader} and {@link IndexWriter} so that you
31  * do not need to care about implementation details such as that adding
32  * documents is done via IndexWriter and deletion is done via IndexReader.
33  *
34  * <p>Note that you cannot create more than one <code>IndexModifier</code> object
35  * on the same directory at the same time.
36  *
37  * <p>Example usage:
38  *
39 <!-- ======================================================== -->
40 <!-- = Java Sourcecode to HTML automatically converted code = -->
41 <!-- = Java2Html Converter V4.1 2004 by Markus Gebhard markus@jave.de = -->
42 <!-- = Further information: http://www.java2html.de = -->
43 <div align="left" class="java">
44 <table border="0" cellpadding="3" cellspacing="0" bgcolor="#ffffff">
45    <tr>
46   <!-- start source code -->
47    <td nowrap="nowrap" valign="top" align="left">
48     <code>
49 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">Analyzer&nbsp;analyzer&nbsp;=&nbsp;</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">StandardAnalyzer</font><font color="#000000">()</font><font color="#000000">;</font><br/>
50 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#3f7f5f">//&nbsp;create&nbsp;an&nbsp;index&nbsp;in&nbsp;/tmp/index,&nbsp;overwriting&nbsp;an&nbsp;existing&nbsp;one:</font><br/>
51 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">IndexModifier&nbsp;indexModifier&nbsp;=&nbsp;</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">IndexModifier</font><font color="#000000">(</font><font color="#2a00ff">&#34;/tmp/index&#34;</font><font color="#000000">,&nbsp;analyzer,&nbsp;</font><font color="#7f0055"><b>true</b></font><font color="#000000">)</font><font color="#000000">;</font><br/>
52 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">Document&nbsp;doc&nbsp;=&nbsp;</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">Document</font><font color="#000000">()</font><font color="#000000">;</font><br/>
53 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">&#34;id&#34;</font><font color="#000000">,&nbsp;</font><font color="#2a00ff">&#34;1&#34;</font><font color="#000000">,&nbsp;Field.Store.YES,&nbsp;Field.Index.UN_TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
54 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">&#34;body&#34;</font><font color="#000000">,&nbsp;</font><font color="#2a00ff">&#34;a&nbsp;simple&nbsp;test&#34;</font><font color="#000000">,&nbsp;Field.Store.YES,&nbsp;Field.Index.TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
55 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">indexModifier.addDocument</font><font color="#000000">(</font><font color="#000000">doc</font><font color="#000000">)</font><font color="#000000">;</font><br/>
56 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#7f0055"><b>int&nbsp;</b></font><font color="#000000">deleted&nbsp;=&nbsp;indexModifier.delete</font><font color="#000000">(</font><font color="#7f0055"><b>new&nbsp;</b></font><font color="#000000">Term</font><font color="#000000">(</font><font color="#2a00ff">&#34;id&#34;</font><font color="#000000">,&nbsp;</font><font color="#2a00ff">&#34;1&#34;</font><font color="#000000">))</font><font color="#000000">;</font><br/>
57 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#2a00ff">&#34;Deleted&nbsp;&#34;&nbsp;</font><font color="#000000">+&nbsp;deleted&nbsp;+&nbsp;</font><font color="#2a00ff">&#34;&nbsp;document&#34;</font><font color="#000000">)</font><font color="#000000">;</font><br/>
58 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">indexModifier.flush</font><font color="#000000">()</font><font color="#000000">;</font><br/>
59 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">indexModifier.docCount</font><font color="#000000">()&nbsp;</font><font color="#000000">+&nbsp;</font><font color="#2a00ff">&#34;&nbsp;docs&nbsp;in&nbsp;index&#34;</font><font color="#000000">)</font><font color="#000000">;</font><br/>
60 <font color="#ffffff">&nbsp;&nbsp;&nbsp;&nbsp;</font><font color="#000000">indexModifier.close</font><font color="#000000">()</font><font color="#000000">;</font></code>
61
62    </td>
63   <!-- end source code -->
64    </tr>
65 </table>
66 </div>
67 <!-- = END of automatically generated HTML code = -->
68 <!-- ======================================================== -->
69  *
70  * <p>Not all methods of IndexReader and IndexWriter are offered by this
71  * class. If you need access to additional methods, either use those classes
72  * directly or implement your own class that extends <code>IndexModifier</code>.
73  *
74  * <p>Although an instance of this class can be used from more than one
75  * thread, you will not get the best performance. You might want to use
76  * IndexReader and IndexWriter directly for that (but you will need to
77  * care about synchronization yourself then).
78  *
79  * <p>While you can freely mix calls to add() and delete() using this class,
80  * you should batch you calls for best performance. For example, if you
81  * want to update 20 documents, you should first delete all those documents,
82  * then add all the new documents.
83  *
84  * @author Daniel Naber
85  */

86 public class IndexModifier {
87
88   protected IndexWriter indexWriter = null;
89   protected IndexReader indexReader = null;
90
91   protected Directory directory = null;
92   protected Analyzer analyzer = null;
93   protected boolean open = false;
94
95   // Lucene defaults:
96
protected PrintStream JavaDoc infoStream = null;
97   protected boolean useCompoundFile = true;
98   protected int maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
99   protected int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
100   protected int mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR;
101
102   /**
103    * Open an index with write access.
104    *
105    * @param directory the index directory
106    * @param analyzer the analyzer to use for adding new documents
107    * @param create <code>true</code> to create the index or overwrite the existing one;
108    * <code>false</code> to append to the existing index
109    */

110   public IndexModifier(Directory directory, Analyzer analyzer, boolean create) throws IOException JavaDoc {
111     init(directory, analyzer, create);
112   }
113
114   /**
115    * Open an index with write access.
116    *
117    * @param dirName the index directory
118    * @param analyzer the analyzer to use for adding new documents
119    * @param create <code>true</code> to create the index or overwrite the existing one;
120    * <code>false</code> to append to the existing index
121    */

122   public IndexModifier(String JavaDoc dirName, Analyzer analyzer, boolean create) throws IOException JavaDoc {
123     Directory dir = FSDirectory.getDirectory(dirName, create);
124     init(dir, analyzer, create);
125   }
126
127   /**
128    * Open an index with write access.
129    *
130    * @param file the index directory
131    * @param analyzer the analyzer to use for adding new documents
132    * @param create <code>true</code> to create the index or overwrite the existing one;
133    * <code>false</code> to append to the existing index
134    */

135   public IndexModifier(File JavaDoc file, Analyzer analyzer, boolean create) throws IOException JavaDoc {
136     Directory dir = FSDirectory.getDirectory(file, create);
137     init(dir, analyzer, create);
138   }
139
140   /**
141    * Initialize an IndexWriter.
142    * @throws IOException
143    */

144   protected void init(Directory directory, Analyzer analyzer, boolean create) throws IOException JavaDoc {
145     this.directory = directory;
146     synchronized(this.directory) {
147       this.analyzer = analyzer;
148       indexWriter = new IndexWriter(directory, analyzer, create);
149       open = true;
150     }
151   }
152
153   /**
154    * Throw an IllegalStateException if the index is closed.
155    * @throws IllegalStateException
156    */

157   protected void assureOpen() {
158     if (!open) {
159       throw new IllegalStateException JavaDoc("Index is closed");
160     }
161   }
162
163   /**
164    * Close the IndexReader and open an IndexWriter.
165    * @throws IOException
166    */

167   protected void createIndexWriter() throws IOException JavaDoc {
168     if (indexWriter == null) {
169       if (indexReader != null) {
170         indexReader.close();
171         indexReader = null;
172       }
173       indexWriter = new IndexWriter(directory, analyzer, false);
174       indexWriter.setInfoStream(infoStream);
175       indexWriter.setUseCompoundFile(useCompoundFile);
176       indexWriter.setMaxBufferedDocs(maxBufferedDocs);
177       indexWriter.setMaxFieldLength(maxFieldLength);
178       indexWriter.setMergeFactor(mergeFactor);
179     }
180   }
181
182   /**
183    * Close the IndexWriter and open an IndexReader.
184    * @throws IOException
185    */

186   protected void createIndexReader() throws IOException JavaDoc {
187     if (indexReader == null) {
188       if (indexWriter != null) {
189         indexWriter.close();
190         indexWriter = null;
191       }
192       indexReader = IndexReader.open(directory);
193     }
194   }
195
196   /**
197    * Make sure all changes are written to disk.
198    * @throws IOException
199    */

200   public void flush() throws IOException JavaDoc {
201     synchronized(directory) {
202       assureOpen();
203       if (indexWriter != null) {
204         indexWriter.close();
205         indexWriter = null;
206         createIndexWriter();
207       } else {
208         indexReader.close();
209         indexReader = null;
210         createIndexReader();
211       }
212     }
213   }
214
215   /**
216    * Adds a document to this index, using the provided analyzer instead of the
217    * one specific in the constructor. If the document contains more than
218    * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
219    * discarded.
220    * @see IndexWriter#addDocument(Document, Analyzer)
221    * @throws IllegalStateException if the index is closed
222    */

223   public void addDocument(Document doc, Analyzer docAnalyzer) throws IOException JavaDoc {
224     synchronized(directory) {
225       assureOpen();
226       createIndexWriter();
227       if (docAnalyzer != null)
228         indexWriter.addDocument(doc, docAnalyzer);
229       else
230         indexWriter.addDocument(doc);
231     }
232   }
233
234   /**
235    * Adds a document to this index. If the document contains more than
236    * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
237    * discarded.
238    * @see IndexWriter#addDocument(Document)
239    * @throws IllegalStateException if the index is closed
240    */

241   public void addDocument(Document doc) throws IOException JavaDoc {
242     addDocument(doc, null);
243   }
244
245   /**
246    * Deletes all documents containing <code>term</code>.
247    * This is useful if one uses a document field to hold a unique ID string for
248    * the document. Then to delete such a document, one merely constructs a
249    * term with the appropriate field and the unique ID string as its text and
250    * passes it to this method. Returns the number of documents deleted.
251    * @return the number of documents deleted
252    * @see IndexReader#deleteDocuments(Term)
253    * @throws IllegalStateException if the index is closed
254    */

255   public int deleteDocuments(Term term) throws IOException JavaDoc {
256     synchronized(directory) {
257       assureOpen();
258       createIndexReader();
259       return indexReader.deleteDocuments(term);
260     }
261   }
262
263   /**
264    * Deletes all documents containing <code>term</code>.
265    * This is useful if one uses a document field to hold a unique ID string for
266    * the document. Then to delete such a document, one merely constructs a
267    * term with the appropriate field and the unique ID string as its text and
268    * passes it to this method. Returns the number of documents deleted.
269    * @return the number of documents deleted
270    * @see IndexReader#deleteDocuments(Term)
271    * @throws IllegalStateException if the index is closed
272    * @deprecated Use {@link #deleteDocuments(Term)} instead.
273    */

274   public int delete(Term term) throws IOException JavaDoc {
275     return deleteDocuments(term);
276   }
277
278   /**
279    * Deletes the document numbered <code>docNum</code>.
280    * @see IndexReader#deleteDocument(int)
281    * @throws IllegalStateException if the index is closed
282    */

283   public void deleteDocument(int docNum) throws IOException JavaDoc {
284     synchronized(directory) {
285       assureOpen();
286       createIndexReader();
287       indexReader.deleteDocument(docNum);
288     }
289   }
290
291   /**
292    * Deletes the document numbered <code>docNum</code>.
293    * @see IndexReader#deleteDocument(int)
294    * @throws IllegalStateException if the index is closed
295    * @deprecated Use {@link #deleteDocument(int)} instead.
296    */

297   public void delete(int docNum) throws IOException JavaDoc {
298     deleteDocument(docNum);
299   }
300   
301   /**
302    * Returns the number of documents currently in this index.
303    * @see IndexWriter#docCount()
304    * @see IndexReader#numDocs()
305    * @throws IllegalStateException if the index is closed
306    */

307   public int docCount() {
308     synchronized(directory) {
309       assureOpen();
310       if (indexWriter != null) {
311         return indexWriter.docCount();
312       } else {
313         return indexReader.numDocs();
314       }
315     }
316   }
317
318   /**
319    * Merges all segments together into a single segment, optimizing an index
320    * for search.
321    * @see IndexWriter#optimize()
322    * @throws IllegalStateException if the index is closed
323    */

324   public void optimize() throws IOException JavaDoc {
325     synchronized(directory) {
326       assureOpen();
327       createIndexWriter();
328       indexWriter.optimize();
329     }
330   }
331
332   /**
333    * If non-null, information about merges and a message when
334    * {@link #getMaxFieldLength()} is reached will be printed to this.
335    * <p>Example: <tt>index.setInfoStream(System.err);</tt>
336    * @see IndexWriter#setInfoStream(PrintStream)
337    * @throws IllegalStateException if the index is closed
338    */

339   public void setInfoStream(PrintStream JavaDoc infoStream) {
340     synchronized(directory) {
341       assureOpen();
342       if (indexWriter != null) {
343         indexWriter.setInfoStream(infoStream);
344       }
345       this.infoStream = infoStream;
346     }
347   }
348
349   /**
350    * @throws IOException
351    * @see IndexModifier#setInfoStream(PrintStream)
352    */

353   public PrintStream JavaDoc getInfoStream() throws IOException JavaDoc {
354     synchronized(directory) {
355       assureOpen();
356       createIndexWriter();
357       return indexWriter.getInfoStream();
358     }
359   }
360
361   /**
362    * Setting to turn on usage of a compound file. When on, multiple files
363    * for each segment are merged into a single file once the segment creation
364    * is finished. This is done regardless of what directory is in use.
365    * @see IndexWriter#setUseCompoundFile(boolean)
366    * @throws IllegalStateException if the index is closed
367    */

368   public void setUseCompoundFile(boolean useCompoundFile) {
369     synchronized(directory) {
370       assureOpen();
371       if (indexWriter != null) {
372         indexWriter.setUseCompoundFile(useCompoundFile);
373       }
374       this.useCompoundFile = useCompoundFile;
375     }
376   }
377
378   /**
379    * @throws IOException
380    * @see IndexModifier#setUseCompoundFile(boolean)
381    */

382   public boolean getUseCompoundFile() throws IOException JavaDoc {
383     synchronized(directory) {
384       assureOpen();
385       createIndexWriter();
386       return indexWriter.getUseCompoundFile();
387     }
388   }
389
390   /**
391    * The maximum number of terms that will be indexed for a single field in a
392    * document. This limits the amount of memory required for indexing, so that
393    * collections with very large files will not crash the indexing process by
394    * running out of memory.<p/>
395    * Note that this effectively truncates large documents, excluding from the
396    * index terms that occur further in the document. If you know your source
397    * documents are large, be sure to set this value high enough to accomodate
398    * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
399    * is your memory, but you should anticipate an OutOfMemoryError.<p/>
400    * By default, no more than 10,000 terms will be indexed for a field.
401    * @see IndexWriter#setMaxFieldLength(int)
402    * @throws IllegalStateException if the index is closed
403    */

404   public void setMaxFieldLength(int maxFieldLength) {
405     synchronized(directory) {
406       assureOpen();
407       if (indexWriter != null) {
408         indexWriter.setMaxFieldLength(maxFieldLength);
409       }
410       this.maxFieldLength = maxFieldLength;
411     }
412   }
413
414   /**
415    * @throws IOException
416    * @see IndexModifier#setMaxFieldLength(int)
417    */

418   public int getMaxFieldLength() throws IOException JavaDoc {
419     synchronized(directory) {
420       assureOpen();
421       createIndexWriter();
422       return indexWriter.getMaxFieldLength();
423     }
424   }
425
426   /**
427    * The maximum number of terms that will be indexed for a single field in a
428    * document. This limits the amount of memory required for indexing, so that
429    * collections with very large files will not crash the indexing process by
430    * running out of memory.<p/>
431    * Note that this effectively truncates large documents, excluding from the
432    * index terms that occur further in the document. If you know your source
433    * documents are large, be sure to set this value high enough to accomodate
434    * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
435    * is your memory, but you should anticipate an OutOfMemoryError.<p/>
436    * By default, no more than 10,000 terms will be indexed for a field.
437    * @see IndexWriter#setMaxBufferedDocs(int)
438    * @throws IllegalStateException if the index is closed
439    */

440   public void setMaxBufferedDocs(int maxBufferedDocs) {
441     synchronized(directory) {
442       assureOpen();
443       if (indexWriter != null) {
444         indexWriter.setMaxBufferedDocs(maxBufferedDocs);
445       }
446       this.maxBufferedDocs = maxBufferedDocs;
447     }
448   }
449
450   /**
451    * @throws IOException
452    * @see IndexModifier#setMaxBufferedDocs(int)
453    */

454   public int getMaxBufferedDocs() throws IOException JavaDoc {
455     synchronized(directory) {
456       assureOpen();
457       createIndexWriter();
458       return indexWriter.getMaxBufferedDocs();
459     }
460   }
461
462   /**
463    * Determines how often segment indices are merged by addDocument(). With
464    * smaller values, less RAM is used while indexing, and searches on
465    * unoptimized indices are faster, but indexing speed is slower. With larger
466    * values, more RAM is used during indexing, and while searches on unoptimized
467    * indices are slower, indexing is faster. Thus larger values (&gt; 10) are best
468    * for batch index creation, and smaller values (&lt; 10) for indices that are
469    * interactively maintained.
470    * <p>This must never be less than 2. The default value is 10.
471    *
472    * @see IndexWriter#setMergeFactor(int)
473    * @throws IllegalStateException if the index is closed
474    */

475   public void setMergeFactor(int mergeFactor) {
476     synchronized(directory) {
477       assureOpen();
478       if (indexWriter != null) {
479         indexWriter.setMergeFactor(mergeFactor);
480       }
481       this.mergeFactor = mergeFactor;
482     }
483   }
484
485   /**
486    * @throws IOException
487    * @see IndexModifier#setMergeFactor(int)
488    */

489   public int getMergeFactor() throws IOException JavaDoc {
490     synchronized(directory) {
491       assureOpen();
492       createIndexWriter();
493       return indexWriter.getMergeFactor();
494     }
495   }
496
497   /**
498    * Close this index, writing all pending changes to disk.
499    *
500    * @throws IllegalStateException if the index has been closed before already
501    */

502   public void close() throws IOException JavaDoc {
503     synchronized(directory) {
504       if (!open)
505         throw new IllegalStateException JavaDoc("Index is closed already");
506       if (indexWriter != null) {
507         indexWriter.close();
508         indexWriter = null;
509       } else {
510         indexReader.close();
511         indexReader = null;
512       }
513       open = false;
514     }
515   }
516
517   public String JavaDoc toString() {
518     return "Index@" + directory;
519   }
520
521   /*
522   // used as an example in the javadoc:
523   public static void main(String[] args) throws IOException {
524     Analyzer analyzer = new StandardAnalyzer();
525     // create an index in /tmp/index, overwriting an existing one:
526     IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
527     Document doc = new Document();
528     doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED));
529     doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED));
530     indexModifier.addDocument(doc);
531     int deleted = indexModifier.delete(new Term("id", "1"));
532     System.out.println("Deleted " + deleted + " document");
533     indexModifier.flush();
534     System.out.println(indexModifier.docCount() + " docs in index");
535     indexModifier.close();
536   }*/

537   
538 }
539
Popular Tags