KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > index > SegmentsReader


1 package org.apache.lucene.index;
2
3 /* ====================================================================
4  * The Apache Software License, Version 1.1
5  *
6  * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in
18  * the documentation and/or other materials provided with the
19  * distribution.
20  *
21  * 3. The end-user documentation included with the redistribution,
22  * if any, must include the following acknowledgment:
23  * "This product includes software developed by the
24  * Apache Software Foundation (http://www.apache.org/)."
25  * Alternately, this acknowledgment may appear in the software itself,
26  * if and wherever such third-party acknowledgments normally appear.
27  *
28  * 4. The names "Apache" and "Apache Software Foundation" and
29  * "Apache Lucene" must not be used to endorse or promote products
30  * derived from this software without prior written permission. For
31  * written permission, please contact apache@apache.org.
32  *
33  * 5. Products derived from this software may not be called "Apache",
34  * "Apache Lucene", nor may "Apache" appear in their name, without
35  * prior written permission of the Apache Software Foundation.
36  *
37  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  * ====================================================================
50  *
51  * This software consists of voluntary contributions made by many
52  * individuals on behalf of the Apache Software Foundation. For more
53  * information on the Apache Software Foundation, please see
54  * <http://www.apache.org/>.
55  */

56
57 import java.io.IOException JavaDoc;
58 import java.util.Collection JavaDoc;
59 import java.util.HashSet JavaDoc;
60 import java.util.Hashtable JavaDoc;
61 import java.util.Iterator JavaDoc;
62 import java.util.Set JavaDoc;
63
64 import org.apache.lucene.document.Document;
65 import org.apache.lucene.store.Directory;
66
67 /**
68  * FIXME: Describe class <code>SegmentsReader</code> here.
69  *
70  * @version $Id: SegmentsReader.java,v 1.17 2003/12/15 23:04:42 cutting Exp $
71  */

72 final class SegmentsReader extends IndexReader
73 {
74   protected SegmentReader[] readers;
75   protected int[] starts; // 1st docno for each segment
76
private Hashtable JavaDoc normsCache = new Hashtable JavaDoc();
77   private int maxDoc = 0;
78   private int numDocs = -1;
79   private boolean hasDeletions = false;
80   
81   SegmentsReader(SegmentInfos sis, Directory directory, SegmentReader[] r) throws IOException JavaDoc {
82     super(directory);
83     segmentInfos = sis;
84     readers = r;
85     starts = new int[readers.length + 1]; // build starts array
86
for (int i = 0; i < readers.length; i++) {
87       starts[i] = maxDoc;
88       maxDoc += readers[i].maxDoc(); // compute maxDocs
89

90       if (readers[i].hasDeletions())
91         hasDeletions = true;
92     }
93     starts[readers.length] = maxDoc;
94   }
95
96   public final synchronized int numDocs() {
97     if (numDocs == -1) { // check cache
98
int n = 0; // cache miss--recompute
99
for (int i = 0; i < readers.length; i++)
100     n += readers[i].numDocs(); // sum from readers
101
numDocs = n;
102     }
103     return numDocs;
104   }
105
106   public final int maxDoc() {
107     return maxDoc;
108   }
109
110   public final Document document(int n) throws IOException JavaDoc {
111     int i = readerIndex(n); // find segment num
112
return readers[i].document(n - starts[i]); // dispatch to segment reader
113
}
114
115   public final boolean isDeleted(int n) {
116     int i = readerIndex(n); // find segment num
117
return readers[i].isDeleted(n - starts[i]); // dispatch to segment reader
118
}
119
120   public boolean hasDeletions() { return hasDeletions; }
121
122   protected final synchronized void doDelete(int n) throws IOException JavaDoc {
123     numDocs = -1; // invalidate cache
124
int i = readerIndex(n); // find segment num
125
readers[i].doDelete(n - starts[i]); // dispatch to segment reader
126
hasDeletions = true;
127   }
128
129   public void undeleteAll() throws IOException JavaDoc {
130     for (int i = 0; i < readers.length; i++)
131       readers[i].undeleteAll();
132     hasDeletions = false;
133   }
134
135   private final int readerIndex(int n) { // find reader for doc n:
136
int lo = 0; // search starts array
137
int hi = readers.length - 1; // for first element less
138

139     while (hi >= lo) {
140       int mid = (lo + hi) >> 1;
141       int midValue = starts[mid];
142       if (n < midValue)
143     hi = mid - 1;
144       else if (n > midValue)
145     lo = mid + 1;
146       else { // found a match
147
while (mid+1 < readers.length && starts[mid+1] == midValue) {
148           mid++; // scan to last match
149
}
150     return mid;
151       }
152     }
153     return hi;
154   }
155
156   public final synchronized byte[] norms(String JavaDoc field) throws IOException JavaDoc {
157     byte[] bytes = (byte[])normsCache.get(field);
158     if (bytes != null)
159       return bytes; // cache hit
160

161     bytes = new byte[maxDoc()];
162     for (int i = 0; i < readers.length; i++)
163       readers[i].norms(field, bytes, starts[i]);
164     normsCache.put(field, bytes); // update cache
165
return bytes;
166   }
167
168   public synchronized void setNorm(int n, String JavaDoc field, byte value)
169     throws IOException JavaDoc {
170     normsCache.remove(field); // clear cache
171
int i = readerIndex(n); // find segment num
172
readers[i].setNorm(n-starts[i], field, value); // dispatch
173
}
174
175   public final TermEnum terms() throws IOException JavaDoc {
176     return new SegmentsTermEnum(readers, starts, null);
177   }
178
179   public final TermEnum terms(Term term) throws IOException JavaDoc {
180     return new SegmentsTermEnum(readers, starts, term);
181   }
182
183   public final int docFreq(Term t) throws IOException JavaDoc {
184     int total = 0; // sum freqs in segments
185
for (int i = 0; i < readers.length; i++)
186       total += readers[i].docFreq(t);
187     return total;
188   }
189
190   public final TermDocs termDocs() throws IOException JavaDoc {
191     return new SegmentsTermDocs(readers, starts);
192   }
193
194   public final TermPositions termPositions() throws IOException JavaDoc {
195     return new SegmentsTermPositions(readers, starts);
196   }
197
198   protected final synchronized void doClose() throws IOException JavaDoc {
199     for (int i = 0; i < readers.length; i++)
200       readers[i].close();
201   }
202
203   /**
204    * @see IndexReader#getFieldNames()
205    */

206   public Collection JavaDoc getFieldNames() throws IOException JavaDoc {
207     // maintain a unique set of field names
208
Set JavaDoc fieldSet = new HashSet JavaDoc();
209     for (int i = 0; i < readers.length; i++) {
210         SegmentReader reader = readers[i];
211         Collection JavaDoc names = reader.getFieldNames();
212         // iterate through the field names and add them to the set
213
for (Iterator JavaDoc iterator = names.iterator(); iterator.hasNext();) {
214             String JavaDoc s = (String JavaDoc) iterator.next();
215             fieldSet.add(s);
216         }
217     }
218     return fieldSet;
219   }
220
221   /**
222    * @see IndexReader#getFieldNames(boolean)
223    */

224   public Collection JavaDoc getFieldNames(boolean indexed) throws IOException JavaDoc {
225     // maintain a unique set of field names
226
Set JavaDoc fieldSet = new HashSet JavaDoc();
227     for (int i = 0; i < readers.length; i++) {
228         SegmentReader reader = readers[i];
229         Collection JavaDoc names = reader.getFieldNames(indexed);
230         fieldSet.addAll(names);
231     }
232     return fieldSet;
233   }
234 }
235
236 class SegmentsTermEnum extends TermEnum {
237   private SegmentMergeQueue queue;
238
239   private Term term;
240   private int docFreq;
241
242   SegmentsTermEnum(SegmentReader[] readers, int[] starts, Term t)
243        throws IOException JavaDoc {
244     queue = new SegmentMergeQueue(readers.length);
245     for (int i = 0; i < readers.length; i++) {
246       SegmentReader reader = readers[i];
247       SegmentTermEnum termEnum;
248
249       if (t != null) {
250     termEnum = (SegmentTermEnum)reader.terms(t);
251       } else
252     termEnum = (SegmentTermEnum)reader.terms();
253
254       SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
255       if (t == null ? smi.next() : termEnum.term() != null)
256     queue.put(smi); // initialize queue
257
else
258     smi.close();
259     }
260
261     if (t != null && queue.size() > 0) {
262       next();
263     }
264   }
265
266   public final boolean next() throws IOException JavaDoc {
267     SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
268     if (top == null) {
269       term = null;
270       return false;
271     }
272
273     term = top.term;
274     docFreq = 0;
275
276     while (top != null && term.compareTo(top.term) == 0) {
277       queue.pop();
278       docFreq += top.termEnum.docFreq(); // increment freq
279
if (top.next())
280     queue.put(top); // restore queue
281
else
282     top.close(); // done with a segment
283
top = (SegmentMergeInfo)queue.top();
284     }
285     return true;
286   }
287
288   public final Term term() {
289     return term;
290   }
291
292   public final int docFreq() {
293     return docFreq;
294   }
295
296   public final void close() throws IOException JavaDoc {
297     queue.close();
298   }
299 }
300
301 class SegmentsTermDocs implements TermDocs {
302   protected SegmentReader[] readers;
303   protected int[] starts;
304   protected Term term;
305
306   protected int base = 0;
307   protected int pointer = 0;
308
309   private SegmentTermDocs[] segTermDocs;
310   protected SegmentTermDocs current; // == segTermDocs[pointer]
311

312   SegmentsTermDocs(SegmentReader[] r, int[] s) {
313     readers = r;
314     starts = s;
315
316     segTermDocs = new SegmentTermDocs[r.length];
317   }
318
319   public final int doc() {
320     return base + current.doc;
321   }
322   public final int freq() {
323     return current.freq;
324   }
325
326   public final void seek(Term term) {
327     this.term = term;
328     this.base = 0;
329     this.pointer = 0;
330     this.current = null;
331   }
332
333   public void seek(TermEnum termEnum) throws IOException JavaDoc {
334     seek(termEnum.term());
335   }
336
337   public final boolean next() throws IOException JavaDoc {
338     if (current != null && current.next()) {
339       return true;
340     } else if (pointer < readers.length) {
341       base = starts[pointer];
342       current = termDocs(pointer++);
343       return next();
344     } else
345       return false;
346   }
347
348   /** Optimized implementation. */
349   public final int read(final int[] docs, final int[] freqs)
350       throws IOException JavaDoc {
351     while (true) {
352       while (current == null) {
353     if (pointer < readers.length) { // try next segment
354
base = starts[pointer];
355       current = termDocs(pointer++);
356     } else {
357       return 0;
358     }
359       }
360       int end = current.read(docs, freqs);
361       if (end == 0) { // none left in segment
362
current = null;
363       } else { // got some
364
final int b = base; // adjust doc numbers
365
for (int i = 0; i < end; i++)
366       docs[i] += b;
367     return end;
368       }
369     }
370   }
371
372   /** As yet unoptimized implementation. */
373   public boolean skipTo(int target) throws IOException JavaDoc {
374     do {
375       if (!next())
376     return false;
377     } while (target > doc());
378     return true;
379   }
380
381   private SegmentTermDocs termDocs(int i) throws IOException JavaDoc {
382     if (term == null)
383       return null;
384     SegmentTermDocs result = segTermDocs[i];
385     if (result == null)
386       result = segTermDocs[i] = termDocs(readers[i]);
387     result.seek(term);
388     return result;
389   }
390
391   protected SegmentTermDocs termDocs(SegmentReader reader)
392     throws IOException JavaDoc {
393     return (SegmentTermDocs)reader.termDocs();
394   }
395
396   public final void close() throws IOException JavaDoc {
397     for (int i = 0; i < segTermDocs.length; i++) {
398       if (segTermDocs[i] != null)
399         segTermDocs[i].close();
400     }
401   }
402 }
403
404 class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
405   SegmentsTermPositions(SegmentReader[] r, int[] s) {
406     super(r,s);
407   }
408
409   protected final SegmentTermDocs termDocs(SegmentReader reader)
410        throws IOException JavaDoc {
411     return (SegmentTermDocs)reader.termPositions();
412   }
413
414   public final int nextPosition() throws IOException JavaDoc {
415     return ((SegmentTermPositions)current).nextPosition();
416   }
417
418 }
419
Popular Tags