KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > index > IndexReader


1 package org.apache.lucene.index;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.document.Document;
20 import org.apache.lucene.document.Field;
21 import org.apache.lucene.search.Similarity;
22 import org.apache.lucene.store.Directory;
23 import org.apache.lucene.store.FSDirectory;
24 import org.apache.lucene.store.IndexInput;
25 import org.apache.lucene.store.Lock;
26
27 import java.io.File JavaDoc;
28 import java.io.FileOutputStream JavaDoc;
29 import java.io.IOException JavaDoc;
30 import java.util.Arrays JavaDoc;
31 import java.util.Collection JavaDoc;
32 import java.util.HashSet JavaDoc;
33 import java.util.Set JavaDoc;
34
35 /** IndexReader is an abstract class, providing an interface for accessing an
36  index. Search of an index is done entirely through this abstract interface,
37  so that any subclass which implements it is searchable.
38
39  <p> Concrete subclasses of IndexReader are usually constructed with a call to
40  one of the static <code>open()</code> methods, e.g. {@link #open(String)}.
41
42  <p> For efficiency, in this API documents are often referred to via
43  <i>document numbers</i>, non-negative integers which each name a unique
44  document in the index. These document numbers are ephemeral--they may change
45  as documents are added to and deleted from an index. Clients should thus not
46  rely on a given document having the same number between sessions.
47  
48  <p> An IndexReader can be opened on a directory for which an IndexWriter is
49  opened already, but it cannot be used to delete documents from the index then.
50
51  @author Doug Cutting
52  @version $Id: IndexReader.java 358685 2005-12-23 02:38:23Z yonik $
53 */

54 public abstract class IndexReader {
55   
56   public static final class FieldOption {
57     private String JavaDoc option;
58     private FieldOption() { }
59     private FieldOption(String JavaDoc option) {
60       this.option = option;
61     }
62     public String JavaDoc toString() {
63       return this.option;
64     }
65     // all fields
66
public static final FieldOption ALL = new FieldOption ("ALL");
67     // all indexed fields
68
public static final FieldOption INDEXED = new FieldOption ("INDEXED");
69     // all fields which are not indexed
70
public static final FieldOption UNINDEXED = new FieldOption ("UNINDEXED");
71     // all fields which are indexed with termvectors enables
72
public static final FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption ("INDEXED_WITH_TERMVECTOR");
73     // all fields which are indexed but don't have termvectors enabled
74
public static final FieldOption INDEXED_NO_TERMVECTOR = new FieldOption ("INDEXED_NO_TERMVECTOR");
75     // all fields where termvectors are enabled. Please note that only standard termvector fields are returned
76
public static final FieldOption TERMVECTOR = new FieldOption ("TERMVECTOR");
77     // all field with termvectors wiht positions enabled
78
public static final FieldOption TERMVECTOR_WITH_POSITION = new FieldOption ("TERMVECTOR_WITH_POSITION");
79     // all fields where termvectors with offset position are set
80
public static final FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption ("TERMVECTOR_WITH_OFFSET");
81     // all fields where termvectors with offset and position values set
82
public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET");
83   }
84   
85   /**
86    * Constructor used if IndexReader is not owner of its directory.
87    * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
88    *
89    * @param directory Directory where IndexReader files reside.
90    */

91   protected IndexReader(Directory directory) {
92     this.directory = directory;
93   }
94   
95   /**
96    * Constructor used if IndexReader is owner of its directory.
97    * If IndexReader is owner of its directory, it locks its directory in case of write operations.
98    *
99    * @param directory Directory where IndexReader files reside.
100    * @param segmentInfos Used for write-l
101    * @param closeDirectory
102    */

103   IndexReader(Directory directory, SegmentInfos segmentInfos, boolean closeDirectory) {
104     init(directory, segmentInfos, closeDirectory, true);
105   }
106
107   void init(Directory directory, SegmentInfos segmentInfos, boolean closeDirectory, boolean directoryOwner) {
108     this.directory = directory;
109     this.segmentInfos = segmentInfos;
110     this.directoryOwner = directoryOwner;
111     this.closeDirectory = closeDirectory;
112   }
113
114   private Directory directory;
115   private boolean directoryOwner;
116   private boolean closeDirectory;
117
118   private SegmentInfos segmentInfos;
119   private Lock writeLock;
120   private boolean stale;
121   private boolean hasChanges;
122   
123
124   /** Returns an IndexReader reading the index in an FSDirectory in the named
125    path. */

126   public static IndexReader open(String JavaDoc path) throws IOException JavaDoc {
127     return open(FSDirectory.getDirectory(path, false), true);
128   }
129
130   /** Returns an IndexReader reading the index in an FSDirectory in the named
131    path. */

132   public static IndexReader open(File JavaDoc path) throws IOException JavaDoc {
133     return open(FSDirectory.getDirectory(path, false), true);
134   }
135   
136   /** Returns an IndexReader reading the index in the given Directory. */
137   public static IndexReader open(final Directory directory) throws IOException JavaDoc {
138     return open(directory, false);
139   }
140
141   private static IndexReader open(final Directory directory, final boolean closeDirectory) throws IOException JavaDoc {
142     synchronized (directory) { // in- & inter-process sync
143
return (IndexReader)new Lock.With(
144           directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
145           IndexWriter.COMMIT_LOCK_TIMEOUT) {
146           public Object JavaDoc doBody() throws IOException JavaDoc {
147             SegmentInfos infos = new SegmentInfos();
148             infos.read(directory);
149             if (infos.size() == 1) { // index is optimized
150
return SegmentReader.get(infos, infos.info(0), closeDirectory);
151             }
152             IndexReader[] readers = new IndexReader[infos.size()];
153             for (int i = 0; i < infos.size(); i++)
154               readers[i] = SegmentReader.get(infos.info(i));
155             return new MultiReader(directory, infos, closeDirectory, readers);
156             
157           }
158         }.run();
159     }
160   }
161
162   /** Returns the directory this index resides in. */
163   public Directory directory() { return directory; }
164
165   /**
166    * Returns the time the index in the named directory was last modified.
167    * Do not use this to check whether the reader is still up-to-date, use
168    * {@link #isCurrent()} instead.
169    */

170   public static long lastModified(String JavaDoc directory) throws IOException JavaDoc {
171     return lastModified(new File JavaDoc(directory));
172   }
173
174   /**
175    * Returns the time the index in the named directory was last modified.
176    * Do not use this to check whether the reader is still up-to-date, use
177    * {@link #isCurrent()} instead.
178    */

179   public static long lastModified(File JavaDoc directory) throws IOException JavaDoc {
180     return FSDirectory.fileModified(directory, IndexFileNames.SEGMENTS);
181   }
182
183   /**
184    * Returns the time the index in the named directory was last modified.
185    * Do not use this to check whether the reader is still up-to-date, use
186    * {@link #isCurrent()} instead.
187    */

188   public static long lastModified(Directory directory) throws IOException JavaDoc {
189     return directory.fileModified(IndexFileNames.SEGMENTS);
190   }
191
192   /**
193    * Reads version number from segments files. The version number is
194    * initialized with a timestamp and then increased by one for each change of
195    * the index.
196    *
197    * @param directory where the index resides.
198    * @return version number.
199    * @throws IOException if segments file cannot be read
200    */

201   public static long getCurrentVersion(String JavaDoc directory) throws IOException JavaDoc {
202     return getCurrentVersion(new File JavaDoc(directory));
203   }
204
205   /**
206    * Reads version number from segments files. The version number is
207    * initialized with a timestamp and then increased by one for each change of
208    * the index.
209    *
210    * @param directory where the index resides.
211    * @return version number.
212    * @throws IOException if segments file cannot be read
213    */

214   public static long getCurrentVersion(File JavaDoc directory) throws IOException JavaDoc {
215     Directory dir = FSDirectory.getDirectory(directory, false);
216     long version = getCurrentVersion(dir);
217     dir.close();
218     return version;
219   }
220
221   /**
222    * Reads version number from segments files. The version number is
223    * initialized with a timestamp and then increased by one for each change of
224    * the index.
225    *
226    * @param directory where the index resides.
227    * @return version number.
228    * @throws IOException if segments file cannot be read.
229    */

230   public static long getCurrentVersion(Directory directory) throws IOException JavaDoc {
231     synchronized (directory) { // in- & inter-process sync
232
Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
233       
234       boolean locked=false;
235       
236       try {
237          locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
238          
239          return SegmentInfos.readCurrentVersion(directory);
240       } finally {
241         if (locked) {
242           commitLock.release();
243         }
244       }
245     }
246   }
247   
248   /**
249    * Version number when this IndexReader was opened.
250    */

251   public long getVersion() {
252     return segmentInfos.getVersion();
253   }
254
255   /**
256    * Check whether this IndexReader still works on a current version of the index.
257    * If this is not the case you will need to re-open the IndexReader to
258    * make sure you see the latest changes made to the index.
259    *
260    * @throws IOException
261    */

262   public boolean isCurrent() throws IOException JavaDoc {
263     synchronized (directory) { // in- & inter-process sync
264
Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
265       
266       boolean locked=false;
267       
268       try {
269          locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
270          
271          return SegmentInfos.readCurrentVersion(directory) == segmentInfos.getVersion();
272       } finally {
273         if (locked) {
274           commitLock.release();
275         }
276       }
277     }
278   }
279
280   /**
281    * Return an array of term frequency vectors for the specified document.
282    * The array contains a vector for each vectorized field in the document.
283    * Each vector contains terms and frequencies for all terms in a given vectorized field.
284    * If no such fields existed, the method returns null. The term vectors that are
285    * returned my either be of type TermFreqVector or of type TermPositionsVector if
286    * positions or offsets have been stored.
287    *
288    * @param docNumber document for which term frequency vectors are returned
289    * @return array of term frequency vectors. May be null if no term vectors have been
290    * stored for the specified document.
291    * @throws IOException if index cannot be accessed
292    * @see org.apache.lucene.document.Field.TermVector
293    */

294   abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
295           throws IOException JavaDoc;
296
297   
298   /**
299    * Return a term frequency vector for the specified document and field. The
300    * returned vector contains terms and frequencies for the terms in
301    * the specified field of this document, if the field had the storeTermVector
302    * flag set. If termvectors had been stored with positions or offsets, a
303    * TermPositionsVector is returned.
304    *
305    * @param docNumber document for which the term frequency vector is returned
306    * @param field field for which the term frequency vector is returned.
307    * @return term frequency vector May be null if field does not exist in the specified
308    * document or term vector was not stored.
309    * @throws IOException if index cannot be accessed
310    * @see org.apache.lucene.document.Field.TermVector
311    */

312   abstract public TermFreqVector getTermFreqVector(int docNumber, String JavaDoc field)
313           throws IOException JavaDoc;
314  
315   /**
316    * Returns <code>true</code> if an index exists at the specified directory.
317    * If the directory does not exist or if there is no index in it.
318    * <code>false</code> is returned.
319    * @param directory the directory to check for an index
320    * @return <code>true</code> if an index exists; <code>false</code> otherwise
321    */

322   public static boolean indexExists(String JavaDoc directory) {
323     return (new File JavaDoc(directory, IndexFileNames.SEGMENTS)).exists();
324   }
325
326   /**
327    * Returns <code>true</code> if an index exists at the specified directory.
328    * If the directory does not exist or if there is no index in it.
329    * @param directory the directory to check for an index
330    * @return <code>true</code> if an index exists; <code>false</code> otherwise
331    */

332   public static boolean indexExists(File JavaDoc directory) {
333     return (new File JavaDoc(directory, IndexFileNames.SEGMENTS)).exists();
334   }
335
336   /**
337    * Returns <code>true</code> if an index exists at the specified directory.
338    * If the directory does not exist or if there is no index in it.
339    * @param directory the directory to check for an index
340    * @return <code>true</code> if an index exists; <code>false</code> otherwise
341    * @throws IOException if there is a problem with accessing the index
342    */

343   public static boolean indexExists(Directory directory) throws IOException JavaDoc {
344     return directory.fileExists(IndexFileNames.SEGMENTS);
345   }
346
347   /** Returns the number of documents in this index. */
348   public abstract int numDocs();
349
350   /** Returns one greater than the largest possible document number.
351    * This may be used to, e.g., determine how big to allocate an array which
352    * will have an element for every document number in an index.
353    */

354   public abstract int maxDoc();
355
356   /** Returns the stored fields of the <code>n</code><sup>th</sup>
357    <code>Document</code> in this index. */

358   public abstract Document document(int n) throws IOException JavaDoc;
359
360   /** Returns true if document <i>n</i> has been deleted */
361   public abstract boolean isDeleted(int n);
362
363   /** Returns true if any documents have been deleted */
364   public abstract boolean hasDeletions();
365   
366   /** Returns true if there are norms stored for this field. */
367   public boolean hasNorms(String JavaDoc field) throws IOException JavaDoc {
368     // backward compatible implementation.
369
// SegmentReader has an efficient implementation.
370
return norms(field) != null;
371   }
372
373   /** Returns the byte-encoded normalization factor for the named field of
374    * every document. This is used by the search code to score documents.
375    *
376    * @see Field#setBoost(float)
377    */

378   public abstract byte[] norms(String JavaDoc field) throws IOException JavaDoc;
379
380   /** Reads the byte-encoded normalization factor for the named field of every
381    * document. This is used by the search code to score documents.
382    *
383    * @see Field#setBoost(float)
384    */

385   public abstract void norms(String JavaDoc field, byte[] bytes, int offset)
386     throws IOException JavaDoc;
387
388   /** Expert: Resets the normalization factor for the named field of the named
389    * document. The norm represents the product of the field's {@link
390    * Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
391    * int) length normalization}. Thus, to preserve the length normalization
392    * values when resetting this, one should base the new value upon the old.
393    *
394    * @see #norms(String)
395    * @see Similarity#decodeNorm(byte)
396    */

397   public final synchronized void setNorm(int doc, String JavaDoc field, byte value)
398           throws IOException JavaDoc{
399     if(directoryOwner)
400       aquireWriteLock();
401     doSetNorm(doc, field, value);
402     hasChanges = true;
403   }
404           
405   /** Implements setNorm in subclass.*/
406   protected abstract void doSetNorm(int doc, String JavaDoc field, byte value)
407           throws IOException JavaDoc;
408
409   /** Expert: Resets the normalization factor for the named field of the named
410    * document.
411    *
412    * @see #norms(String)
413    * @see Similarity#decodeNorm(byte)
414    */

415   public void setNorm(int doc, String JavaDoc field, float value)
416           throws IOException JavaDoc {
417     setNorm(doc, field, Similarity.encodeNorm(value));
418   }
419
420   /** Returns an enumeration of all the terms in the index.
421    * The enumeration is ordered by Term.compareTo(). Each term
422    * is greater than all that precede it in the enumeration.
423    */

424   public abstract TermEnum terms() throws IOException JavaDoc;
425
426   /** Returns an enumeration of all terms after a given term.
427    * The enumeration is ordered by Term.compareTo(). Each term
428    * is greater than all that precede it in the enumeration.
429    */

430   public abstract TermEnum terms(Term t) throws IOException JavaDoc;
431
432   /** Returns the number of documents containing the term <code>t</code>. */
433   public abstract int docFreq(Term t) throws IOException JavaDoc;
434
435   /** Returns an enumeration of all the documents which contain
436    * <code>term</code>. For each document, the document number, the frequency of
437    * the term in that document is also provided, for use in search scoring.
438    * Thus, this method implements the mapping:
439    * <p><ul>
440    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
441    * </ul>
442    * <p>The enumeration is ordered by document number. Each document number
443    * is greater than all that precede it in the enumeration.
444    */

445   public TermDocs termDocs(Term term) throws IOException JavaDoc {
446     TermDocs termDocs = termDocs();
447     termDocs.seek(term);
448     return termDocs;
449   }
450
451   /** Returns an unpositioned {@link TermDocs} enumerator. */
452   public abstract TermDocs termDocs() throws IOException JavaDoc;
453
454   /** Returns an enumeration of all the documents which contain
455    * <code>term</code>. For each document, in addition to the document number
456    * and frequency of the term in that document, a list of all of the ordinal
457    * positions of the term in the document is available. Thus, this method
458    * implements the mapping:
459    *
460    * <p><ul>
461    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
462    * &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
463    * pos<sub>freq-1</sub>&gt;
464    * &gt;<sup>*</sup>
465    * </ul>
466    * <p> This positional information faciliates phrase and proximity searching.
467    * <p>The enumeration is ordered by document number. Each document number is
468    * greater than all that precede it in the enumeration.
469    */

470   public TermPositions termPositions(Term term) throws IOException JavaDoc {
471     TermPositions termPositions = termPositions();
472     termPositions.seek(term);
473     return termPositions;
474   }
475
476   /** Returns an unpositioned {@link TermPositions} enumerator. */
477   public abstract TermPositions termPositions() throws IOException JavaDoc;
478
479   /**
480    * Tries to acquire the WriteLock on this directory.
481    * this method is only valid if this IndexReader is directory owner.
482    *
483    * @throws IOException If WriteLock cannot be acquired.
484    */

485   private void aquireWriteLock() throws IOException JavaDoc {
486     if (stale)
487       throw new IOException JavaDoc("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
488
489     if (writeLock == null) {
490       Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME);
491       if (!writeLock.obtain(IndexWriter.WRITE_LOCK_TIMEOUT)) // obtain write lock
492
throw new IOException JavaDoc("Index locked for write: " + writeLock);
493       this.writeLock = writeLock;
494
495       // we have to check whether index has changed since this reader was opened.
496
// if so, this reader is no longer valid for deletion
497
if (SegmentInfos.readCurrentVersion(directory) > segmentInfos.getVersion()) {
498         stale = true;
499         this.writeLock.release();
500         this.writeLock = null;
501         throw new IOException JavaDoc("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
502       }
503     }
504   }
505
506   /** Deletes the document numbered <code>docNum</code>. Once a document is
507    * deleted it will not appear in TermDocs or TermPostitions enumerations.
508    * Attempts to read its field with the {@link #document}
509    * method will result in an error. The presence of this document may still be
510    * reflected in the {@link #docFreq} statistic, though
511    * this will be corrected eventually as the index is further modified.
512    *
513    * @deprecated Use {@link #deleteDocument(int docNum)} instead.
514    */

515   public final synchronized void delete(int docNum) throws IOException JavaDoc {
516     deleteDocument(docNum);
517   }
518
519   /** Deletes the document numbered <code>docNum</code>. Once a document is
520    * deleted it will not appear in TermDocs or TermPostitions enumerations.
521    * Attempts to read its field with the {@link #document}
522    * method will result in an error. The presence of this document may still be
523    * reflected in the {@link #docFreq} statistic, though
524    * this will be corrected eventually as the index is further modified.
525    */

526   public final synchronized void deleteDocument(int docNum) throws IOException JavaDoc {
527     if(directoryOwner)
528       aquireWriteLock();
529     doDelete(docNum);
530     hasChanges = true;
531   }
532
533
534   /** Implements deletion of the document numbered <code>docNum</code>.
535    * Applications should call {@link #delete(int)} or {@link #delete(Term)}.
536    */

537   protected abstract void doDelete(int docNum) throws IOException JavaDoc;
538
539   /** Deletes all documents containing <code>term</code>.
540    * This is useful if one uses a document field to hold a unique ID string for
541    * the document. Then to delete such a document, one merely constructs a
542    * term with the appropriate field and the unique ID string as its text and
543    * passes it to this method.
544    * See {@link #delete(int)} for information about when this deletion will
545    * become effective.
546    * @return the number of documents deleted
547    *
548    * @deprecated Use {@link #deleteDocuments(Term term)} instead.
549    */

550   public final int delete(Term term) throws IOException JavaDoc {
551     return deleteDocuments(term);
552   }
553
554   /** Deletes all documents containing <code>term</code>.
555    * This is useful if one uses a document field to hold a unique ID string for
556    * the document. Then to delete such a document, one merely constructs a
557    * term with the appropriate field and the unique ID string as its text and
558    * passes it to this method.
559    * See {@link #delete(int)} for information about when this deletion will
560    * become effective.
561    * @return the number of documents deleted
562    */

563   public final int deleteDocuments(Term term) throws IOException JavaDoc {
564     TermDocs docs = termDocs(term);
565     if (docs == null) return 0;
566     int n = 0;
567     try {
568       while (docs.next()) {
569         deleteDocument(docs.doc());
570         n++;
571       }
572     } finally {
573       docs.close();
574     }
575     return n;
576   }
577
578   /** Undeletes all documents currently marked as deleted in this index.*/
579   public final synchronized void undeleteAll() throws IOException JavaDoc{
580     if(directoryOwner)
581       aquireWriteLock();
582     doUndeleteAll();
583     hasChanges = true;
584   }
585   
586   /** Implements actual undeleteAll() in subclass. */
587   protected abstract void doUndeleteAll() throws IOException JavaDoc;
588
589   /**
590    * Commit changes resulting from delete, undeleteAll, or setNorm operations
591    *
592    * @throws IOException
593    */

594   protected final synchronized void commit() throws IOException JavaDoc{
595     if(hasChanges){
596       if(directoryOwner){
597         synchronized (directory) { // in- & inter-process sync
598
new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
599                    IndexWriter.COMMIT_LOCK_TIMEOUT) {
600              public Object JavaDoc doBody() throws IOException JavaDoc {
601                doCommit();
602                segmentInfos.write(directory);
603                return null;
604              }
605            }.run();
606          }
607         if (writeLock != null) {
608           writeLock.release(); // release write lock
609
writeLock = null;
610         }
611       }
612       else
613         doCommit();
614     }
615     hasChanges = false;
616   }
617   
618   /** Implements commit. */
619   protected abstract void doCommit() throws IOException JavaDoc;
620   
621   /**
622    * Closes files associated with this index.
623    * Also saves any new deletions to disk.
624    * No other methods should be called after this has been called.
625    */

626   public final synchronized void close() throws IOException JavaDoc {
627     commit();
628     doClose();
629     if(closeDirectory)
630       directory.close();
631   }
632
633   /** Implements close. */
634   protected abstract void doClose() throws IOException JavaDoc;
635
636   /** Release the write lock, if needed. */
637   protected void finalize() {
638     if (writeLock != null) {
639       writeLock.release(); // release write lock
640
writeLock = null;
641     }
642   }
643   
644   /**
645    * Returns a list of all unique field names that exist in the index pointed
646    * to by this IndexReader.
647    * @return Collection of Strings indicating the names of the fields
648    * @throws IOException if there is a problem with accessing the index
649    *
650    * @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
651    */

652   public abstract Collection JavaDoc getFieldNames() throws IOException JavaDoc;
653
654   /**
655    * Returns a list of all unique field names that exist in the index pointed
656    * to by this IndexReader. The boolean argument specifies whether the fields
657    * returned are indexed or not.
658    * @param indexed <code>true</code> if only indexed fields should be returned;
659    * <code>false</code> if only unindexed fields should be returned.
660    * @return Collection of Strings indicating the names of the fields
661    * @throws IOException if there is a problem with accessing the index
662    *
663    * @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
664    */

665   public abstract Collection JavaDoc getFieldNames(boolean indexed) throws IOException JavaDoc;
666
667   /**
668    *
669    * @param storedTermVector if true, returns only Indexed fields that have term vector info,
670    * else only indexed fields without term vector info
671    * @return Collection of Strings indicating the names of the fields
672    *
673    * @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
674    */

675   public Collection JavaDoc getIndexedFieldNames(boolean storedTermVector){
676     if(storedTermVector){
677       Set JavaDoc fieldSet = new HashSet JavaDoc();
678       fieldSet.addAll(getIndexedFieldNames(Field.TermVector.YES));
679       fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_POSITIONS));
680       fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_OFFSETS));
681       fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_POSITIONS_OFFSETS));
682       return fieldSet;
683     }
684     else
685       return getIndexedFieldNames(Field.TermVector.NO);
686   }
687   
688   /**
689    * Get a list of unique field names that exist in this index, are indexed, and have
690    * the specified term vector information.
691    *
692    * @param tvSpec specifies which term vector information should be available for the fields
693    * @return Collection of Strings indicating the names of the fields
694    *
695    * @deprecated Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
696    */

697   public abstract Collection JavaDoc getIndexedFieldNames(Field.TermVector tvSpec);
698   
699   /**
700    * Get a list of unique field names that exist in this index and have the specified
701    * field option information.
702    * @param fldOption specifies which field option should be available for the returned fields
703    * @return Collection of Strings indicating the names of the fields.
704    * @see IndexReader.FieldOption
705    */

706   public abstract Collection JavaDoc getFieldNames(FieldOption fldOption);
707
708   /**
709    * Returns <code>true</code> iff the index in the named directory is
710    * currently locked.
711    * @param directory the directory to check for a lock
712    * @throws IOException if there is a problem with accessing the index
713    */

714   public static boolean isLocked(Directory directory) throws IOException JavaDoc {
715     return
716             directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() ||
717             directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked();
718   }
719
720   /**
721    * Returns <code>true</code> iff the index in the named directory is
722    * currently locked.
723    * @param directory the directory to check for a lock
724    * @throws IOException if there is a problem with accessing the index
725    */

726   public static boolean isLocked(String JavaDoc directory) throws IOException JavaDoc {
727     Directory dir = FSDirectory.getDirectory(directory, false);
728     boolean result = isLocked(dir);
729     dir.close();
730     return result;
731   }
732
733   /**
734    * Forcibly unlocks the index in the named directory.
735    * <P>
736    * Caution: this should only be used by failure recovery code,
737    * when it is known that no other process nor thread is in fact
738    * currently accessing this index.
739    */

740   public static void unlock(Directory directory) throws IOException JavaDoc {
741     directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
742     directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release();
743   }
744   
745   /**
746    * Prints the filename and size of each file within a given compound file.
747    * Add the -extract flag to extract files to the current working directory.
748    * In order to make the extracted version of the index work, you have to copy
749    * the segments file from the compound index into the directory where the extracted files are stored.
750    * @param args Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt;
751    */

752   public static void main(String JavaDoc [] args) {
753     String JavaDoc filename = null;
754     boolean extract = false;
755
756     for (int i = 0; i < args.length; ++i) {
757       if (args[i].equals("-extract")) {
758         extract = true;
759       } else if (filename == null) {
760         filename = args[i];
761       }
762     }
763
764     if (filename == null) {
765       System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>");
766       return;
767     }
768
769     Directory dir = null;
770     CompoundFileReader cfr = null;
771       
772     try {
773       File JavaDoc file = new File JavaDoc(filename);
774       String JavaDoc dirname = file.getAbsoluteFile().getParent();
775       filename = file.getName();
776       dir = FSDirectory.getDirectory(dirname, false);
777       cfr = new CompoundFileReader(dir, filename);
778
779       String JavaDoc [] files = cfr.list();
780       Arrays.sort(files); // sort the array of filename so that the output is more readable
781

782       for (int i = 0; i < files.length; ++i) {
783         long len = cfr.fileLength(files[i]);
784
785         if (extract) {
786           System.out.println("extract " + files[i] + " with " + len + " bytes to local directory...");
787           IndexInput ii = cfr.openInput(files[i]);
788
789           FileOutputStream JavaDoc f = new FileOutputStream JavaDoc(files[i]);
790           
791           // read and write with a small buffer, which is more effectiv than reading byte by byte
792
byte[] buffer = new byte[1024];
793           int chunk = buffer.length;
794           while(len > 0) {
795             final int bufLen = (int) Math.min(chunk, len);
796             ii.readBytes(buffer, 0, bufLen);
797             f.write(buffer, 0, bufLen);
798             len -= bufLen;
799           }
800           
801           f.close();
802           ii.close();
803         }
804         else
805           System.out.println(files[i] + ": " + len + " bytes");
806       }
807     } catch (IOException JavaDoc ioe) {
808       ioe.printStackTrace();
809     }
810     finally {
811       try {
812         if (dir != null)
813           dir.close();
814         if (cfr != null)
815           cfr.close();
816       }
817       catch (IOException JavaDoc ioe) {
818         ioe.printStackTrace();
819       }
820     }
821   }
822 }
823
Popular Tags