SegmentsReader


1   package org.apache.lucene.index;
2   
3   /* ====================================================================
4    * The Apache Software License, Version 1.1
5    *
6    * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation.
7    * All rights reserved.
8    *
9    * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   *
13   * 1. Redistributions of source code must retain the above copyright
14   *    notice, this list of conditions and the following disclaimer.
15   *
16   * 2. Redistributions in binary form must reproduce the above copyright
17   *    notice, this list of conditions and the following disclaimer in
18   *    the documentation and/or other materials provided with the
19   *    distribution.
20   *
21   * 3. The end-user documentation included with the redistribution,
22   *    if any, must include the following acknowledgment:
23   *       "This product includes software developed by the
24   *        Apache Software Foundation (http://www.apache.org/)."
25   *    Alternately, this acknowledgment may appear in the software itself,
26   *    if and wherever such third-party acknowledgments normally appear.
27   *
28   * 4. The names "Apache" and "Apache Software Foundation" and
29   *    "Apache Lucene" must not be used to endorse or promote products
30   *    derived from this software without prior written permission. For
31   *    written permission, please contact apache@apache.org.
32   *
33   * 5. Products derived from this software may not be called "Apache",
34   *    "Apache Lucene", nor may "Apache" appear in their name, without
35   *    prior written permission of the Apache Software Foundation.
36   *
37   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48   * SUCH DAMAGE.
49   * ====================================================================
50   *
51   * This software consists of voluntary contributions made by many
52   * individuals on behalf of the Apache Software Foundation.  For more
53   * information on the Apache Software Foundation, please see
54   * <http://www.apache.org/>.
55   */
56  
57  import java.io.IOException  ;
58  import java.util.Collection  ;
59  import java.util.HashSet  ;
60  import java.util.Hashtable  ;
61  import java.util.Iterator  ;
62  import java.util.Set  ;
63  
64  import org.apache.lucene.document.Document;
65  import org.apache.lucene.store.Directory;
66  
67  /**
68   * FIXME: Describe class <code>SegmentsReader</code> here.
69   *
70   * @version $Id: SegmentsReader.java,v 1.17 2003/12/15 23:04:42 cutting Exp $
71   */
72  final class SegmentsReader extends IndexReader
73  {
74    protected SegmentReader[] readers;
75    protected int[] starts;             // 1st docno for each segment
76    private Hashtable   normsCache = new Hashtable  ();
77    private int maxDoc = 0;
78    private int numDocs = -1;
79    private boolean hasDeletions = false;
80    
81    SegmentsReader(SegmentInfos sis, Directory directory, SegmentReader[] r) throws IOException   {
82      super(directory);
83      segmentInfos = sis;
84      readers = r;
85      starts = new int[readers.length + 1];     // build starts array
86      for (int i = 0; i < readers.length; i++) {
87        starts[i] = maxDoc;
88        maxDoc += readers[i].maxDoc();          // compute maxDocs
89  
90        if (readers[i].hasDeletions())
91          hasDeletions = true;
92      }
93      starts[readers.length] = maxDoc;
94    }
95  
96    public final synchronized int numDocs() {
97      if (numDocs == -1) {              // check cache
98        int n = 0;                  // cache miss--recompute
99        for (int i = 0; i < readers.length; i++)
100     n += readers[i].numDocs();        // sum from readers
101       numDocs = n;
102     }
103     return numDocs;
104   }
105 
106   public final int maxDoc() {
107     return maxDoc;
108   }
109 
110   public final Document document(int n) throws IOException   {
111     int i = readerIndex(n);           // find segment num
112     return readers[i].document(n - starts[i]);    // dispatch to segment reader
113   }
114 
115   public final boolean isDeleted(int n) {
116     int i = readerIndex(n);           // find segment num
117     return readers[i].isDeleted(n - starts[i]);   // dispatch to segment reader
118   }
119 
120   public boolean hasDeletions() { return hasDeletions; }
121 
122   protected final synchronized void doDelete(int n) throws IOException   {
123     numDocs = -1;                 // invalidate cache
124     int i = readerIndex(n);           // find segment num
125     readers[i].doDelete(n - starts[i]);       // dispatch to segment reader
126     hasDeletions = true;
127   }
128 
129   public void undeleteAll() throws IOException   {
130     for (int i = 0; i < readers.length; i++)
131       readers[i].undeleteAll();
132     hasDeletions = false;
133   }
134 
135   private final int readerIndex(int n) {      // find reader for doc n:
136     int lo = 0;                   // search starts array
137     int hi = readers.length - 1;                  // for first element less
138 
139     while (hi >= lo) {
140       int mid = (lo + hi) >> 1;
141       int midValue = starts[mid];
142       if (n < midValue)
143     hi = mid - 1;
144       else if (n > midValue)
145     lo = mid + 1;
146       else {                                      // found a match
147         while (mid+1 < readers.length && starts[mid+1] == midValue) {
148           mid++;                                  // scan to last match
149         }
150     return mid;
151       }
152     }
153     return hi;
154   }
155 
156   public final synchronized byte[] norms(String   field) throws IOException   {
157     byte[] bytes = (byte[])normsCache.get(field);
158     if (bytes != null)
159       return bytes;               // cache hit
160 
161     bytes = new byte[maxDoc()];
162     for (int i = 0; i < readers.length; i++)
163       readers[i].norms(field, bytes, starts[i]);
164     normsCache.put(field, bytes);         // update cache
165     return bytes;
166   }
167 
168   public synchronized void setNorm(int n, String   field, byte value)
169     throws IOException   {
170     normsCache.remove(field);                     // clear cache
171     int i = readerIndex(n);           // find segment num
172     readers[i].setNorm(n-starts[i], field, value); // dispatch
173   }
174 
175   public final TermEnum terms() throws IOException   {
176     return new SegmentsTermEnum(readers, starts, null);
177   }
178 
179   public final TermEnum terms(Term term) throws IOException   {
180     return new SegmentsTermEnum(readers, starts, term);
181   }
182 
183   public final int docFreq(Term t) throws IOException   {
184     int total = 0;                // sum freqs in segments
185     for (int i = 0; i < readers.length; i++)
186       total += readers[i].docFreq(t);
187     return total;
188   }
189 
190   public final TermDocs termDocs() throws IOException   {
191     return new SegmentsTermDocs(readers, starts);
192   }
193 
194   public final TermPositions termPositions() throws IOException   {
195     return new SegmentsTermPositions(readers, starts);
196   }
197 
198   protected final synchronized void doClose() throws IOException   {
199     for (int i = 0; i < readers.length; i++)
200       readers[i].close();
201   }
202 
203   /**
204    * @see IndexReader#getFieldNames()
205    */
206   public Collection   getFieldNames() throws IOException   {
207     // maintain a unique set of field names
208     Set   fieldSet = new HashSet  ();
209     for (int i = 0; i < readers.length; i++) {
210         SegmentReader reader = readers[i];
211         Collection   names = reader.getFieldNames();
212         // iterate through the field names and add them to the set
213         for (Iterator   iterator = names.iterator(); iterator.hasNext();) {
214             String   s = (String  ) iterator.next();
215             fieldSet.add(s);
216         }
217     }
218     return fieldSet;
219   }
220 
221   /**
222    * @see IndexReader#getFieldNames(boolean)
223    */
224   public Collection   getFieldNames(boolean indexed) throws IOException   {
225     // maintain a unique set of field names
226     Set   fieldSet = new HashSet  ();
227     for (int i = 0; i < readers.length; i++) {
228         SegmentReader reader = readers[i];
229         Collection   names = reader.getFieldNames(indexed);
230         fieldSet.addAll(names);
231     }
232     return fieldSet;
233   }
234 }
235 
236 class SegmentsTermEnum extends TermEnum {
237   private SegmentMergeQueue queue;
238 
239   private Term term;
240   private int docFreq;
241 
242   SegmentsTermEnum(SegmentReader[] readers, int[] starts, Term t)
243        throws IOException   {
244     queue = new SegmentMergeQueue(readers.length);
245     for (int i = 0; i < readers.length; i++) {
246       SegmentReader reader = readers[i];
247       SegmentTermEnum termEnum;
248 
249       if (t != null) {
250     termEnum = (SegmentTermEnum)reader.terms(t);
251       } else
252     termEnum = (SegmentTermEnum)reader.terms();
253 
254       SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
255       if (t == null ? smi.next() : termEnum.term() != null)
256     queue.put(smi);               // initialize queue
257       else
258     smi.close();
259     }
260 
261     if (t != null && queue.size() > 0) {
262       next();
263     }
264   }
265 
266   public final boolean next() throws IOException   {
267     SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
268     if (top == null) {
269       term = null;
270       return false;
271     }
272 
273     term = top.term;
274     docFreq = 0;
275 
276     while (top != null && term.compareTo(top.term) == 0) {
277       queue.pop();
278       docFreq += top.termEnum.docFreq();      // increment freq
279       if (top.next())
280     queue.put(top);               // restore queue
281       else
282     top.close();                  // done with a segment
283       top = (SegmentMergeInfo)queue.top();
284     }
285     return true;
286   }
287 
288   public final Term term() {
289     return term;
290   }
291 
292   public final int docFreq() {
293     return docFreq;
294   }
295 
296   public final void close() throws IOException   {
297     queue.close();
298   }
299 }
300 
301 class SegmentsTermDocs implements TermDocs {
302   protected SegmentReader[] readers;
303   protected int[] starts;
304   protected Term term;
305 
306   protected int base = 0;
307   protected int pointer = 0;
308 
309   private SegmentTermDocs[] segTermDocs;
310   protected SegmentTermDocs current;              // == segTermDocs[pointer]
311 
312   SegmentsTermDocs(SegmentReader[] r, int[] s) {
313     readers = r;
314     starts = s;
315 
316     segTermDocs = new SegmentTermDocs[r.length];
317   }
318 
319   public final int doc() {
320     return base + current.doc;
321   }
322   public final int freq() {
323     return current.freq;
324   }
325 
326   public final void seek(Term term) {
327     this.term = term;
328     this.base = 0;
329     this.pointer = 0;
330     this.current = null;
331   }
332 
333   public void seek(TermEnum termEnum) throws IOException   {
334     seek(termEnum.term());
335   }
336 
337   public final boolean next() throws IOException   {
338     if (current != null && current.next()) {
339       return true;
340     } else if (pointer < readers.length) {
341       base = starts[pointer];
342       current = termDocs(pointer++);
343       return next();
344     } else
345       return false;
346   }
347 
348   /** Optimized implementation. */
349   public final int read(final int[] docs, final int[] freqs)
350       throws IOException   {
351     while (true) {
352       while (current == null) {
353     if (pointer < readers.length) {       // try next segment
354       base = starts[pointer];
355       current = termDocs(pointer++);
356     } else {
357       return 0;
358     }
359       }
360       int end = current.read(docs, freqs);
361       if (end == 0) {                 // none left in segment
362     current = null;
363       } else {                    // got some
364     final int b = base;           // adjust doc numbers
365     for (int i = 0; i < end; i++)
366       docs[i] += b;
367     return end;
368       }
369     }
370   }
371 
372   /** As yet unoptimized implementation. */
373   public boolean skipTo(int target) throws IOException   {
374     do {
375       if (!next())
376     return false;
377     } while (target > doc());
378     return true;
379   }
380 
381   private SegmentTermDocs termDocs(int i) throws IOException   {
382     if (term == null)
383       return null;
384     SegmentTermDocs result = segTermDocs[i];
385     if (result == null)
386       result = segTermDocs[i] = termDocs(readers[i]);
387     result.seek(term);
388     return result;
389   }
390 
391   protected SegmentTermDocs termDocs(SegmentReader reader)
392     throws IOException   {
393     return (SegmentTermDocs)reader.termDocs();
394   }
395 
396   public final void close() throws IOException   {
397     for (int i = 0; i < segTermDocs.length; i++) {
398       if (segTermDocs[i] != null)
399         segTermDocs[i].close();
400     }
401   }
402 }
403 
404 class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
405   SegmentsTermPositions(SegmentReader[] r, int[] s) {
406     super(r,s);
407   }
408 
409   protected final SegmentTermDocs termDocs(SegmentReader reader)
410        throws IOException   {
411     return (SegmentTermDocs)reader.termPositions();
412   }
413 
414   public final int nextPosition() throws IOException   {
415     return ((SegmentTermPositions)current).nextPosition();
416   }
417 
418 }
419
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags