IndexReader


1   package org.apache.lucene.index;
2   
3   /**
4    * Copyright 2004 The Apache Software Foundation
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import org.apache.lucene.document.Document;
20  import org.apache.lucene.document.Field;
21  import org.apache.lucene.search.Similarity;
22  import org.apache.lucene.store.Directory;
23  import org.apache.lucene.store.FSDirectory;
24  import org.apache.lucene.store.IndexInput;
25  import org.apache.lucene.store.Lock;
26  
27  import java.io.File  ;
28  import java.io.FileOutputStream  ;
29  import java.io.IOException  ;
30  import java.util.Arrays  ;
31  import java.util.Collection  ;
32  import java.util.HashSet  ;
33  import java.util.Set  ;
34  
35  /** IndexReader is an abstract class, providing an interface for accessing an
36   index.  Search of an index is done entirely through this abstract interface,
37   so that any subclass which implements it is searchable.
38  
39   <p> Concrete subclasses of IndexReader are usually constructed with a call to
40   one of the static <code>open()</code> methods, e.g. {@link #open(String)}.
41  
42   <p> For efficiency, in this API documents are often referred to via
43   <i>document numbers</i>, non-negative integers which each name a unique
44   document in the index.  These document numbers are ephemeral--they may change
45   as documents are added to and deleted from an index.  Clients should thus not
46   rely on a given document having the same number between sessions.
47   
48   <p> An IndexReader can be opened on a directory for which an IndexWriter is
49   opened already, but it cannot be used to delete documents from the index then.
50  
51   @author Doug Cutting
52   @version $Id: IndexReader.java 358685 2005-12-23 02:38:23Z yonik $
53  */
54  public abstract class IndexReader {
55    
56    public static final class FieldOption {
57      private String   option;
58      private FieldOption() { }
59      private FieldOption(String   option) {
60        this.option = option;
61      } 
62      public String   toString() {
63        return this.option;
64      }
65      // all fields
66      public static final FieldOption ALL = new FieldOption ("ALL");
67      // all indexed fields
68      public static final FieldOption INDEXED = new FieldOption ("INDEXED");
69      // all fields which are not indexed
70      public static final FieldOption UNINDEXED = new FieldOption ("UNINDEXED");
71      // all fields which are indexed with termvectors enables
72      public static final FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption ("INDEXED_WITH_TERMVECTOR");
73      // all fields which are indexed but don't have termvectors enabled
74      public static final FieldOption INDEXED_NO_TERMVECTOR = new FieldOption ("INDEXED_NO_TERMVECTOR");
75      // all fields where termvectors are enabled. Please note that only standard termvector fields are returned
76      public static final FieldOption TERMVECTOR = new FieldOption ("TERMVECTOR");
77      // all field with termvectors wiht positions enabled
78      public static final FieldOption TERMVECTOR_WITH_POSITION = new FieldOption ("TERMVECTOR_WITH_POSITION");
79      // all fields where termvectors with offset position are set
80      public static final FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption ("TERMVECTOR_WITH_OFFSET");
81      // all fields where termvectors with offset and position values set
82      public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET");
83    }
84    
85    /**
86     * Constructor used if IndexReader is not owner of its directory. 
87     * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
88     * 
89     * @param directory Directory where IndexReader files reside.
90     */
91    protected IndexReader(Directory directory) {
92      this.directory = directory;
93    }
94    
95    /**
96     * Constructor used if IndexReader is owner of its directory.
97     * If IndexReader is owner of its directory, it locks its directory in case of write operations.
98     * 
99     * @param directory Directory where IndexReader files reside.
100    * @param segmentInfos Used for write-l
101    * @param closeDirectory
102    */
103   IndexReader(Directory directory, SegmentInfos segmentInfos, boolean closeDirectory) {
104     init(directory, segmentInfos, closeDirectory, true);
105   }
106 
107   void init(Directory directory, SegmentInfos segmentInfos, boolean closeDirectory, boolean directoryOwner) {
108     this.directory = directory;
109     this.segmentInfos = segmentInfos;
110     this.directoryOwner = directoryOwner;
111     this.closeDirectory = closeDirectory;
112   }
113 
114   private Directory directory;
115   private boolean directoryOwner;
116   private boolean closeDirectory;
117 
118   private SegmentInfos segmentInfos;
119   private Lock writeLock;
120   private boolean stale;
121   private boolean hasChanges;
122   
123 
124   /** Returns an IndexReader reading the index in an FSDirectory in the named
125    path. */
126   public static IndexReader open(String   path) throws IOException   {
127     return open(FSDirectory.getDirectory(path, false), true);
128   }
129 
130   /** Returns an IndexReader reading the index in an FSDirectory in the named
131    path. */
132   public static IndexReader open(File   path) throws IOException   {
133     return open(FSDirectory.getDirectory(path, false), true);
134   }
135   
136   /** Returns an IndexReader reading the index in the given Directory. */
137   public static IndexReader open(final Directory directory) throws IOException   {
138     return open(directory, false);
139   }
140 
141   private static IndexReader open(final Directory directory, final boolean closeDirectory) throws IOException   {
142     synchronized (directory) {            // in- & inter-process sync
143       return (IndexReader)new Lock.With(
144           directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
145           IndexWriter.COMMIT_LOCK_TIMEOUT) {
146           public Object   doBody() throws IOException   {
147             SegmentInfos infos = new SegmentInfos();
148             infos.read(directory);
149             if (infos.size() == 1) {          // index is optimized
150               return SegmentReader.get(infos, infos.info(0), closeDirectory);
151             }
152             IndexReader[] readers = new IndexReader[infos.size()];
153             for (int i = 0; i < infos.size(); i++)
154               readers[i] = SegmentReader.get(infos.info(i));
155             return new MultiReader(directory, infos, closeDirectory, readers);
156             
157           }
158         }.run();
159     }
160   }
161 
162   /** Returns the directory this index resides in. */
163   public Directory directory() { return directory; }
164 
165   /** 
166    * Returns the time the index in the named directory was last modified.
167    * Do not use this to check whether the reader is still up-to-date, use
168    * {@link #isCurrent()} instead. 
169    */
170   public static long lastModified(String   directory) throws IOException   {
171     return lastModified(new File  (directory));
172   }
173 
174   /** 
175    * Returns the time the index in the named directory was last modified. 
176    * Do not use this to check whether the reader is still up-to-date, use
177    * {@link #isCurrent()} instead. 
178    */
179   public static long lastModified(File   directory) throws IOException   {
180     return FSDirectory.fileModified(directory, IndexFileNames.SEGMENTS);
181   }
182 
183   /** 
184    * Returns the time the index in the named directory was last modified. 
185    * Do not use this to check whether the reader is still up-to-date, use
186    * {@link #isCurrent()} instead. 
187    */
188   public static long lastModified(Directory directory) throws IOException   {
189     return directory.fileModified(IndexFileNames.SEGMENTS);
190   }
191 
192   /**
193    * Reads version number from segments files. The version number is
194    * initialized with a timestamp and then increased by one for each change of
195    * the index.
196    * 
197    * @param directory where the index resides.
198    * @return version number.
199    * @throws IOException if segments file cannot be read
200    */
201   public static long getCurrentVersion(String   directory) throws IOException   {
202     return getCurrentVersion(new File  (directory));
203   }
204 
205   /**
206    * Reads version number from segments files. The version number is
207    * initialized with a timestamp and then increased by one for each change of
208    * the index.
209    * 
210    * @param directory where the index resides.
211    * @return version number.
212    * @throws IOException if segments file cannot be read
213    */
214   public static long getCurrentVersion(File   directory) throws IOException   {
215     Directory dir = FSDirectory.getDirectory(directory, false);
216     long version = getCurrentVersion(dir);
217     dir.close();
218     return version;
219   }
220 
221   /**
222    * Reads version number from segments files. The version number is
223    * initialized with a timestamp and then increased by one for each change of
224    * the index.
225    * 
226    * @param directory where the index resides.
227    * @return version number.
228    * @throws IOException if segments file cannot be read.
229    */
230   public static long getCurrentVersion(Directory directory) throws IOException   {
231     synchronized (directory) {                 // in- & inter-process sync
232       Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
233       
234       boolean locked=false;
235       
236       try {
237          locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
238          
239          return SegmentInfos.readCurrentVersion(directory);
240       } finally {
241         if (locked) {
242           commitLock.release();
243         }
244       }
245     }
246   }
247   
248   /**
249    * Version number when this IndexReader was opened.
250    */
251   public long getVersion() {
252     return segmentInfos.getVersion();
253   }
254 
255   /**
256    * Check whether this IndexReader still works on a current version of the index.
257    * If this is not the case you will need to re-open the IndexReader to
258    * make sure you see the latest changes made to the index.
259    * 
260    * @throws IOException
261    */
262   public boolean isCurrent() throws IOException   {
263     synchronized (directory) {                 // in- & inter-process sync
264       Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
265       
266       boolean locked=false;
267       
268       try {
269          locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
270          
271          return SegmentInfos.readCurrentVersion(directory) == segmentInfos.getVersion();
272       } finally {
273         if (locked) {
274           commitLock.release();
275         }
276       }
277     }
278   }
279 
280   /**
281    *  Return an array of term frequency vectors for the specified document.
282    *  The array contains a vector for each vectorized field in the document.
283    *  Each vector contains terms and frequencies for all terms in a given vectorized field.
284    *  If no such fields existed, the method returns null. The term vectors that are
285    * returned my either be of type TermFreqVector or of type TermPositionsVector if
286    * positions or offsets have been stored.
287    * 
288    * @param docNumber document for which term frequency vectors are returned
289    * @return array of term frequency vectors. May be null if no term vectors have been
290    *  stored for the specified document.
291    * @throws IOException if index cannot be accessed
292    * @see org.apache.lucene.document.Field.TermVector
293    */
294   abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
295           throws IOException  ;
296 
297   
298   /**
299    *  Return a term frequency vector for the specified document and field. The
300    *  returned vector contains terms and frequencies for the terms in
301    *  the specified field of this document, if the field had the storeTermVector
302    *  flag set. If termvectors had been stored with positions or offsets, a 
303    *  TermPositionsVector is returned.
304    * 
305    * @param docNumber document for which the term frequency vector is returned
306    * @param field field for which the term frequency vector is returned.
307    * @return term frequency vector May be null if field does not exist in the specified
308    * document or term vector was not stored.
309    * @throws IOException if index cannot be accessed
310    * @see org.apache.lucene.document.Field.TermVector
311    */
312   abstract public TermFreqVector getTermFreqVector(int docNumber, String   field)
313           throws IOException  ;
314  
315   /**
316    * Returns <code>true</code> if an index exists at the specified directory.
317    * If the directory does not exist or if there is no index in it.
318    * <code>false</code> is returned.
319    * @param  directory the directory to check for an index
320    * @return <code>true</code> if an index exists; <code>false</code> otherwise
321    */
322   public static boolean indexExists(String   directory) {
323     return (new File  (directory, IndexFileNames.SEGMENTS)).exists();
324   }
325 
326   /**
327    * Returns <code>true</code> if an index exists at the specified directory.
328    * If the directory does not exist or if there is no index in it.
329    * @param  directory the directory to check for an index
330    * @return <code>true</code> if an index exists; <code>false</code> otherwise
331    */
332   public static boolean indexExists(File   directory) {
333     return (new File  (directory, IndexFileNames.SEGMENTS)).exists();
334   }
335 
336   /**
337    * Returns <code>true</code> if an index exists at the specified directory.
338    * If the directory does not exist or if there is no index in it.
339    * @param  directory the directory to check for an index
340    * @return <code>true</code> if an index exists; <code>false</code> otherwise
341    * @throws IOException if there is a problem with accessing the index
342    */
343   public static boolean indexExists(Directory directory) throws IOException   {
344     return directory.fileExists(IndexFileNames.SEGMENTS);
345   }
346 
347   /** Returns the number of documents in this index. */
348   public abstract int numDocs();
349 
350   /** Returns one greater than the largest possible document number.
351    * This may be used to, e.g., determine how big to allocate an array which
352    * will have an element for every document number in an index.
353    */
354   public abstract int maxDoc();
355 
356   /** Returns the stored fields of the <code>n</code><sup>th</sup>
357    <code>Document</code> in this index. */
358   public abstract Document document(int n) throws IOException  ;
359 
360   /** Returns true if document <i>n</i> has been deleted */
361   public abstract boolean isDeleted(int n);
362 
363   /** Returns true if any documents have been deleted */
364   public abstract boolean hasDeletions();
365   
366   /** Returns true if there are norms stored for this field. */
367   public boolean hasNorms(String   field) throws IOException   {
368     // backward compatible implementation.
369     // SegmentReader has an efficient implementation.
370     return norms(field) != null;
371   }
372 
373   /** Returns the byte-encoded normalization factor for the named field of
374    * every document.  This is used by the search code to score documents.
375    *
376    * @see Field#setBoost(float)
377    */
378   public abstract byte[] norms(String   field) throws IOException  ;
379 
380   /** Reads the byte-encoded normalization factor for the named field of every
381    *  document.  This is used by the search code to score documents.
382    *
383    * @see Field#setBoost(float)
384    */
385   public abstract void norms(String   field, byte[] bytes, int offset)
386     throws IOException  ;
387 
388   /** Expert: Resets the normalization factor for the named field of the named
389    * document.  The norm represents the product of the field's {@link
390    * Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
391    * int) length normalization}.  Thus, to preserve the length normalization
392    * values when resetting this, one should base the new value upon the old.
393    *
394    * @see #norms(String)
395    * @see Similarity#decodeNorm(byte)
396    */
397   public final synchronized  void setNorm(int doc, String   field, byte value)
398           throws IOException  {
399     if(directoryOwner)
400       aquireWriteLock();
401     doSetNorm(doc, field, value);
402     hasChanges = true;
403   }
404           
405   /** Implements setNorm in subclass.*/
406   protected abstract void doSetNorm(int doc, String   field, byte value) 
407           throws IOException  ;
408 
409   /** Expert: Resets the normalization factor for the named field of the named
410    * document.
411    *
412    * @see #norms(String)
413    * @see Similarity#decodeNorm(byte)
414    */
415   public void setNorm(int doc, String   field, float value)
416           throws IOException   {
417     setNorm(doc, field, Similarity.encodeNorm(value));
418   }
419 
420   /** Returns an enumeration of all the terms in the index.
421    * The enumeration is ordered by Term.compareTo().  Each term
422    * is greater than all that precede it in the enumeration.
423    */
424   public abstract TermEnum terms() throws IOException  ;
425 
426   /** Returns an enumeration of all terms after a given term.
427    * The enumeration is ordered by Term.compareTo().  Each term
428    * is greater than all that precede it in the enumeration.
429    */
430   public abstract TermEnum terms(Term t) throws IOException  ;
431 
432   /** Returns the number of documents containing the term <code>t</code>. */
433   public abstract int docFreq(Term t) throws IOException  ;
434 
435   /** Returns an enumeration of all the documents which contain
436    * <code>term</code>. For each document, the document number, the frequency of
437    * the term in that document is also provided, for use in search scoring.
438    * Thus, this method implements the mapping:
439    * <p><ul>
440    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
441    * </ul>
442    * <p>The enumeration is ordered by document number.  Each document number
443    * is greater than all that precede it in the enumeration.
444    */
445   public TermDocs termDocs(Term term) throws IOException   {
446     TermDocs termDocs = termDocs();
447     termDocs.seek(term);
448     return termDocs;
449   }
450 
451   /** Returns an unpositioned {@link TermDocs} enumerator. */
452   public abstract TermDocs termDocs() throws IOException  ;
453 
454   /** Returns an enumeration of all the documents which contain
455    * <code>term</code>.  For each document, in addition to the document number
456    * and frequency of the term in that document, a list of all of the ordinal
457    * positions of the term in the document is available.  Thus, this method
458    * implements the mapping:
459    *
460    * <p><ul>
461    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
462    * &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
463    * pos<sub>freq-1</sub>&gt;
464    * &gt;<sup>*</sup>
465    * </ul>
466    * <p> This positional information faciliates phrase and proximity searching.
467    * <p>The enumeration is ordered by document number.  Each document number is
468    * greater than all that precede it in the enumeration.
469    */
470   public TermPositions termPositions(Term term) throws IOException   {
471     TermPositions termPositions = termPositions();
472     termPositions.seek(term);
473     return termPositions;
474   }
475 
476   /** Returns an unpositioned {@link TermPositions} enumerator. */
477   public abstract TermPositions termPositions() throws IOException  ;
478 
479   /**
480    * Tries to acquire the WriteLock on this directory.
481    * this method is only valid if this IndexReader is directory owner.
482    * 
483    * @throws IOException If WriteLock cannot be acquired.
484    */
485   private void aquireWriteLock() throws IOException   {
486     if (stale)
487       throw new IOException  ("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
488 
489     if (writeLock == null) {
490       Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME);
491       if (!writeLock.obtain(IndexWriter.WRITE_LOCK_TIMEOUT)) // obtain write lock
492         throw new IOException  ("Index locked for write: " + writeLock);
493       this.writeLock = writeLock;
494 
495       // we have to check whether index has changed since this reader was opened.
496       // if so, this reader is no longer valid for deletion
497       if (SegmentInfos.readCurrentVersion(directory) > segmentInfos.getVersion()) {
498         stale = true;
499         this.writeLock.release();
500         this.writeLock = null;
501         throw new IOException  ("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
502       }
503     }
504   }
505 
506   /** Deletes the document numbered <code>docNum</code>.  Once a document is
507    * deleted it will not appear in TermDocs or TermPostitions enumerations.
508    * Attempts to read its field with the {@link #document}
509    * method will result in an error.  The presence of this document may still be
510    * reflected in the {@link #docFreq} statistic, though
511    * this will be corrected eventually as the index is further modified.
512    *
513    * @deprecated Use {@link #deleteDocument(int docNum)} instead.
514    */
515   public final synchronized void delete(int docNum) throws IOException   {
516     deleteDocument(docNum);
517   }
518 
519   /** Deletes the document numbered <code>docNum</code>.  Once a document is
520    * deleted it will not appear in TermDocs or TermPostitions enumerations.
521    * Attempts to read its field with the {@link #document}
522    * method will result in an error.  The presence of this document may still be
523    * reflected in the {@link #docFreq} statistic, though
524    * this will be corrected eventually as the index is further modified.
525    */
526   public final synchronized void deleteDocument(int docNum) throws IOException   {
527     if(directoryOwner)
528       aquireWriteLock();
529     doDelete(docNum);
530     hasChanges = true;
531   }
532 
533 
534   /** Implements deletion of the document numbered <code>docNum</code>.
535    * Applications should call {@link #delete(int)} or {@link #delete(Term)}.
536    */
537   protected abstract void doDelete(int docNum) throws IOException  ;
538 
539   /** Deletes all documents containing <code>term</code>.
540    * This is useful if one uses a document field to hold a unique ID string for
541    * the document.  Then to delete such a document, one merely constructs a
542    * term with the appropriate field and the unique ID string as its text and
543    * passes it to this method.
544    * See {@link #delete(int)} for information about when this deletion will
545    * become effective.
546    * @return the number of documents deleted
547    * 
548    * @deprecated Use {@link #deleteDocuments(Term term)} instead.
549    */
550   public final int delete(Term term) throws IOException   {
551     return deleteDocuments(term);
552   }
553 
554   /** Deletes all documents containing <code>term</code>.
555    * This is useful if one uses a document field to hold a unique ID string for
556    * the document.  Then to delete such a document, one merely constructs a
557    * term with the appropriate field and the unique ID string as its text and
558    * passes it to this method.
559    * See {@link #delete(int)} for information about when this deletion will 
560    * become effective.
561    * @return the number of documents deleted
562    */
563   public final int deleteDocuments(Term term) throws IOException   {
564     TermDocs docs = termDocs(term);
565     if (docs == null) return 0;
566     int n = 0;
567     try {
568       while (docs.next()) {
569         deleteDocument(docs.doc());
570         n++;
571       }
572     } finally {
573       docs.close();
574     }
575     return n;
576   }
577 
578   /** Undeletes all documents currently marked as deleted in this index.*/
579   public final synchronized void undeleteAll() throws IOException  {
580     if(directoryOwner)
581       aquireWriteLock();
582     doUndeleteAll();
583     hasChanges = true;
584   }
585   
586   /** Implements actual undeleteAll() in subclass. */
587   protected abstract void doUndeleteAll() throws IOException  ;
588 
589   /**
590    * Commit changes resulting from delete, undeleteAll, or setNorm operations
591    * 
592    * @throws IOException
593    */
594   protected final synchronized void commit() throws IOException  {
595     if(hasChanges){
596       if(directoryOwner){
597         synchronized (directory) {      // in- & inter-process sync
598            new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
599                    IndexWriter.COMMIT_LOCK_TIMEOUT) {
600              public Object   doBody() throws IOException   {
601                doCommit();
602                segmentInfos.write(directory);
603                return null;
604              }
605            }.run();
606          }
607         if (writeLock != null) {
608           writeLock.release();  // release write lock
609           writeLock = null;
610         }
611       }
612       else
613         doCommit();
614     }
615     hasChanges = false;
616   }
617   
618   /** Implements commit. */
619   protected abstract void doCommit() throws IOException  ;
620   
621   /**
622    * Closes files associated with this index.
623    * Also saves any new deletions to disk.
624    * No other methods should be called after this has been called.
625    */
626   public final synchronized void close() throws IOException   {
627     commit();
628     doClose();
629     if(closeDirectory)
630       directory.close();
631   }
632 
633   /** Implements close. */
634   protected abstract void doClose() throws IOException  ;
635 
636   /** Release the write lock, if needed. */
637   protected void finalize() {
638     if (writeLock != null) {
639       writeLock.release();                        // release write lock
640       writeLock = null;
641     }
642   }
643   
644   /**
645    * Returns a list of all unique field names that exist in the index pointed
646    * to by this IndexReader.
647    * @return Collection of Strings indicating the names of the fields
648    * @throws IOException if there is a problem with accessing the index
649    * 
650    * @deprecated  Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
651    */
652   public abstract Collection   getFieldNames() throws IOException  ;
653 
654   /**
655    * Returns a list of all unique field names that exist in the index pointed
656    * to by this IndexReader.  The boolean argument specifies whether the fields
657    * returned are indexed or not.
658    * @param indexed <code>true</code> if only indexed fields should be returned;
659    *                <code>false</code> if only unindexed fields should be returned.
660    * @return Collection of Strings indicating the names of the fields
661    * @throws IOException if there is a problem with accessing the index
662    * 
663    * @deprecated  Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
664    */
665   public abstract Collection   getFieldNames(boolean indexed) throws IOException  ;
666 
667   /**
668    * 
669    * @param storedTermVector if true, returns only Indexed fields that have term vector info, 
670    *                        else only indexed fields without term vector info 
671    * @return Collection of Strings indicating the names of the fields
672    * 
673    * @deprecated  Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
674    */ 
675   public Collection   getIndexedFieldNames(boolean storedTermVector){
676     if(storedTermVector){
677       Set   fieldSet = new HashSet  ();
678       fieldSet.addAll(getIndexedFieldNames(Field.TermVector.YES));
679       fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_POSITIONS));
680       fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_OFFSETS));
681       fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_POSITIONS_OFFSETS));
682       return fieldSet;
683     }
684     else
685       return getIndexedFieldNames(Field.TermVector.NO);
686   }
687   
688   /**
689    * Get a list of unique field names that exist in this index, are indexed, and have
690    * the specified term vector information.
691    * 
692    * @param tvSpec specifies which term vector information should be available for the fields
693    * @return Collection of Strings indicating the names of the fields
694    * 
695    * @deprecated  Replaced by {@link #getFieldNames(IndexReader.FieldOption)}
696    */
697   public abstract Collection   getIndexedFieldNames(Field.TermVector tvSpec);
698   
699   /**
700    * Get a list of unique field names that exist in this index and have the specified
701    * field option information.
702    * @param fldOption specifies which field option should be available for the returned fields
703    * @return Collection of Strings indicating the names of the fields.
704    * @see IndexReader.FieldOption
705    */
706   public abstract Collection   getFieldNames(FieldOption fldOption);
707 
708   /**
709    * Returns <code>true</code> iff the index in the named directory is
710    * currently locked.
711    * @param directory the directory to check for a lock
712    * @throws IOException if there is a problem with accessing the index
713    */
714   public static boolean isLocked(Directory directory) throws IOException   {
715     return
716             directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() ||
717             directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked();
718   }
719 
720   /**
721    * Returns <code>true</code> iff the index in the named directory is
722    * currently locked.
723    * @param directory the directory to check for a lock
724    * @throws IOException if there is a problem with accessing the index
725    */
726   public static boolean isLocked(String   directory) throws IOException   {
727     Directory dir = FSDirectory.getDirectory(directory, false);
728     boolean result = isLocked(dir);
729     dir.close();
730     return result;
731   }
732 
733   /**
734    * Forcibly unlocks the index in the named directory.
735    * <P>
736    * Caution: this should only be used by failure recovery code,
737    * when it is known that no other process nor thread is in fact
738    * currently accessing this index.
739    */
740   public static void unlock(Directory directory) throws IOException   {
741     directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
742     directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release();
743   }
744   
745   /**
746    * Prints the filename and size of each file within a given compound file.
747    * Add the -extract flag to extract files to the current working directory.
748    * In order to make the extracted version of the index work, you have to copy
749    * the segments file from the compound index into the directory where the extracted files are stored.
750    * @param args Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt;
751    */
752   public static void main(String   [] args) {
753     String   filename = null;
754     boolean extract = false;
755 
756     for (int i = 0; i < args.length; ++i) {
757       if (args[i].equals("-extract")) {
758         extract = true;
759       } else if (filename == null) {
760         filename = args[i];
761       }
762     }
763 
764     if (filename == null) {
765       System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>");
766       return;
767     }
768 
769     Directory dir = null;
770     CompoundFileReader cfr = null;
771       
772     try {
773       File   file = new File  (filename);
774       String   dirname = file.getAbsoluteFile().getParent();
775       filename = file.getName();
776       dir = FSDirectory.getDirectory(dirname, false);
777       cfr = new CompoundFileReader(dir, filename);
778 
779       String   [] files = cfr.list();
780       Arrays.sort(files);   // sort the array of filename so that the output is more readable
781       
782       for (int i = 0; i < files.length; ++i) {
783         long len = cfr.fileLength(files[i]);
784 
785         if (extract) {
786           System.out.println("extract " + files[i] + " with " + len + " bytes to local directory...");
787           IndexInput ii = cfr.openInput(files[i]);
788 
789           FileOutputStream   f = new FileOutputStream  (files[i]);
790           
791           // read and write with a small buffer, which is more effectiv than reading byte by byte
792           byte[] buffer = new byte[1024];
793           int chunk = buffer.length;
794           while(len > 0) {
795             final int bufLen = (int) Math.min(chunk, len);
796             ii.readBytes(buffer, 0, bufLen);
797             f.write(buffer, 0, bufLen);
798             len -= bufLen;
799           }
800           
801           f.close();
802           ii.close();
803         }
804         else
805           System.out.println(files[i] + ": " + len + " bytes");
806       }
807     } catch (IOException   ioe) {
808       ioe.printStackTrace();
809     }
810     finally {
811       try {
812         if (dir != null)
813           dir.close();
814         if (cfr != null)
815           cfr.close();
816       }
817       catch (IOException   ioe) {
818         ioe.printStackTrace();
819       }
820     }
821   }
822 }
823
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags