KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > index > MultiReader


1 package org.apache.lucene.index;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.document.Document;
20 import org.apache.lucene.document.Field;
21 import org.apache.lucene.store.Directory;
22
23 import java.io.IOException JavaDoc;
24 import java.util.*;
25
26 /** An IndexReader which reads multiple indexes, appending their content.
27  *
28  * @version $Id: MultiReader.java 355181 2005-12-08 19:53:06Z cutting $
29  */

30 public class MultiReader extends IndexReader {
31   private IndexReader[] subReaders;
32   private int[] starts; // 1st docno for each segment
33
private Hashtable normsCache = new Hashtable();
34   private int maxDoc = 0;
35   private int numDocs = -1;
36   private boolean hasDeletions = false;
37
38  /**
39   * <p>Construct a MultiReader aggregating the named set of (sub)readers.
40   * Directory locking for delete, undeleteAll, and setNorm operations is
41   * left to the subreaders. </p>
42   * <p>Note that all subreaders are closed if this Multireader is closed.</p>
43   * @param subReaders set of (sub)readers
44   * @throws IOException
45   */

46   public MultiReader(IndexReader[] subReaders) throws IOException JavaDoc {
47     super(subReaders.length == 0 ? null : subReaders[0].directory());
48     initialize(subReaders);
49   }
50
51   /** Construct reading the named set of readers. */
52   MultiReader(Directory directory, SegmentInfos sis, boolean closeDirectory, IndexReader[] subReaders) {
53     super(directory, sis, closeDirectory);
54     initialize(subReaders);
55   }
56
57   private void initialize(IndexReader[] subReaders) {
58     this.subReaders = subReaders;
59     starts = new int[subReaders.length + 1]; // build starts array
60
for (int i = 0; i < subReaders.length; i++) {
61       starts[i] = maxDoc;
62       maxDoc += subReaders[i].maxDoc(); // compute maxDocs
63

64       if (subReaders[i].hasDeletions())
65         hasDeletions = true;
66     }
67     starts[subReaders.length] = maxDoc;
68   }
69
70
71   /** Return an array of term frequency vectors for the specified document.
72    * The array contains a vector for each vectorized field in the document.
73    * Each vector vector contains term numbers and frequencies for all terms
74    * in a given vectorized field.
75    * If no such fields existed, the method returns null.
76    */

77   public TermFreqVector[] getTermFreqVectors(int n) throws IOException JavaDoc {
78     int i = readerIndex(n); // find segment num
79
return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment
80
}
81
82   public TermFreqVector getTermFreqVector(int n, String JavaDoc field)
83       throws IOException JavaDoc {
84     int i = readerIndex(n); // find segment num
85
return subReaders[i].getTermFreqVector(n - starts[i], field);
86   }
87
88   public synchronized int numDocs() {
89     if (numDocs == -1) { // check cache
90
int n = 0; // cache miss--recompute
91
for (int i = 0; i < subReaders.length; i++)
92         n += subReaders[i].numDocs(); // sum from readers
93
numDocs = n;
94     }
95     return numDocs;
96   }
97
98   public int maxDoc() {
99     return maxDoc;
100   }
101
102   public Document document(int n) throws IOException JavaDoc {
103     int i = readerIndex(n); // find segment num
104
return subReaders[i].document(n - starts[i]); // dispatch to segment reader
105
}
106
107   public boolean isDeleted(int n) {
108     int i = readerIndex(n); // find segment num
109
return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader
110
}
111
112   public boolean hasDeletions() { return hasDeletions; }
113
114   protected void doDelete(int n) throws IOException JavaDoc {
115     numDocs = -1; // invalidate cache
116
int i = readerIndex(n); // find segment num
117
subReaders[i].delete(n - starts[i]); // dispatch to segment reader
118
hasDeletions = true;
119   }
120
121   protected void doUndeleteAll() throws IOException JavaDoc {
122     for (int i = 0; i < subReaders.length; i++)
123       subReaders[i].undeleteAll();
124     hasDeletions = false;
125     numDocs = -1; // invalidate cache
126
}
127
128   private int readerIndex(int n) { // find reader for doc n:
129
int lo = 0; // search starts array
130
int hi = subReaders.length - 1; // for first element less
131

132     while (hi >= lo) {
133       int mid = (lo + hi) >> 1;
134       int midValue = starts[mid];
135       if (n < midValue)
136         hi = mid - 1;
137       else if (n > midValue)
138         lo = mid + 1;
139       else { // found a match
140
while (mid+1 < subReaders.length && starts[mid+1] == midValue) {
141           mid++; // scan to last match
142
}
143         return mid;
144       }
145     }
146     return hi;
147   }
148
149   public boolean hasNorms(String JavaDoc field) throws IOException JavaDoc {
150     for (int i = 0; i < subReaders.length; i++) {
151       if (subReaders[i].hasNorms(field)) return true;
152     }
153     return false;
154   }
155
156   private byte[] ones;
157   private byte[] fakeNorms() {
158     if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc());
159     return ones;
160   }
161
162   public synchronized byte[] norms(String JavaDoc field) throws IOException JavaDoc {
163     byte[] bytes = (byte[])normsCache.get(field);
164     if (bytes != null)
165       return bytes; // cache hit
166
if (!hasNorms(field))
167       return fakeNorms();
168
169     bytes = new byte[maxDoc()];
170     for (int i = 0; i < subReaders.length; i++)
171       subReaders[i].norms(field, bytes, starts[i]);
172     normsCache.put(field, bytes); // update cache
173
return bytes;
174   }
175
176   public synchronized void norms(String JavaDoc field, byte[] result, int offset)
177     throws IOException JavaDoc {
178     byte[] bytes = (byte[])normsCache.get(field);
179     if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
180     if (bytes != null) // cache hit
181
System.arraycopy(bytes, 0, result, offset, maxDoc());
182
183     for (int i = 0; i < subReaders.length; i++) // read from segments
184
subReaders[i].norms(field, result, offset + starts[i]);
185   }
186
187   protected void doSetNorm(int n, String JavaDoc field, byte value)
188     throws IOException JavaDoc {
189     normsCache.remove(field); // clear cache
190
int i = readerIndex(n); // find segment num
191
subReaders[i].setNorm(n-starts[i], field, value); // dispatch
192
}
193
194   public TermEnum terms() throws IOException JavaDoc {
195     return new MultiTermEnum(subReaders, starts, null);
196   }
197
198   public TermEnum terms(Term term) throws IOException JavaDoc {
199     return new MultiTermEnum(subReaders, starts, term);
200   }
201
202   public int docFreq(Term t) throws IOException JavaDoc {
203     int total = 0; // sum freqs in segments
204
for (int i = 0; i < subReaders.length; i++)
205       total += subReaders[i].docFreq(t);
206     return total;
207   }
208
209   public TermDocs termDocs() throws IOException JavaDoc {
210     return new MultiTermDocs(subReaders, starts);
211   }
212
213   public TermPositions termPositions() throws IOException JavaDoc {
214     return new MultiTermPositions(subReaders, starts);
215   }
216
217   protected void doCommit() throws IOException JavaDoc {
218     for (int i = 0; i < subReaders.length; i++)
219       subReaders[i].commit();
220   }
221
222   protected synchronized void doClose() throws IOException JavaDoc {
223     for (int i = 0; i < subReaders.length; i++)
224       subReaders[i].close();
225   }
226
227   /**
228    * @see IndexReader#getFieldNames()
229    */

230   public Collection getFieldNames() throws IOException JavaDoc {
231     // maintain a unique set of field names
232
Set fieldSet = new HashSet();
233     for (int i = 0; i < subReaders.length; i++) {
234       IndexReader reader = subReaders[i];
235       Collection names = reader.getFieldNames();
236       fieldSet.addAll(names);
237     }
238     return fieldSet;
239   }
240
241   /**
242    * @see IndexReader#getFieldNames(boolean)
243    */

244   public Collection getFieldNames(boolean indexed) throws IOException JavaDoc {
245     // maintain a unique set of field names
246
Set fieldSet = new HashSet();
247     for (int i = 0; i < subReaders.length; i++) {
248       IndexReader reader = subReaders[i];
249       Collection names = reader.getFieldNames(indexed);
250       fieldSet.addAll(names);
251     }
252     return fieldSet;
253   }
254
255   public Collection getIndexedFieldNames (Field.TermVector tvSpec){
256     // maintain a unique set of field names
257
Set fieldSet = new HashSet();
258     for (int i = 0; i < subReaders.length; i++) {
259       IndexReader reader = subReaders[i];
260       Collection names = reader.getIndexedFieldNames(tvSpec);
261       fieldSet.addAll(names);
262     }
263     return fieldSet;
264   }
265
266   /**
267    * @see IndexReader#getFieldNames(IndexReader.FieldOption)
268    */

269   public Collection getFieldNames (IndexReader.FieldOption fieldNames) {
270     // maintain a unique set of field names
271
Set fieldSet = new HashSet();
272     for (int i = 0; i < subReaders.length; i++) {
273       IndexReader reader = subReaders[i];
274       Collection names = reader.getFieldNames(fieldNames);
275       fieldSet.addAll(names);
276     }
277     return fieldSet;
278   }
279 }
280
281 class MultiTermEnum extends TermEnum {
282   private SegmentMergeQueue queue;
283
284   private Term term;
285   private int docFreq;
286
287   public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
288     throws IOException JavaDoc {
289     queue = new SegmentMergeQueue(readers.length);
290     for (int i = 0; i < readers.length; i++) {
291       IndexReader reader = readers[i];
292       TermEnum termEnum;
293
294       if (t != null) {
295         termEnum = reader.terms(t);
296       } else
297         termEnum = reader.terms();
298
299       SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
300       if (t == null ? smi.next() : termEnum.term() != null)
301         queue.put(smi); // initialize queue
302
else
303         smi.close();
304     }
305
306     if (t != null && queue.size() > 0) {
307       next();
308     }
309   }
310
311   public boolean next() throws IOException JavaDoc {
312     SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
313     if (top == null) {
314       term = null;
315       return false;
316     }
317
318     term = top.term;
319     docFreq = 0;
320
321     while (top != null && term.compareTo(top.term) == 0) {
322       queue.pop();
323       docFreq += top.termEnum.docFreq(); // increment freq
324
if (top.next())
325         queue.put(top); // restore queue
326
else
327         top.close(); // done with a segment
328
top = (SegmentMergeInfo)queue.top();
329     }
330     return true;
331   }
332
333   public Term term() {
334     return term;
335   }
336
337   public int docFreq() {
338     return docFreq;
339   }
340
341   public void close() throws IOException JavaDoc {
342     queue.close();
343   }
344 }
345
346 class MultiTermDocs implements TermDocs {
347   protected IndexReader[] readers;
348   protected int[] starts;
349   protected Term term;
350
351   protected int base = 0;
352   protected int pointer = 0;
353
354   private TermDocs[] readerTermDocs;
355   protected TermDocs current; // == readerTermDocs[pointer]
356

357   public MultiTermDocs(IndexReader[] r, int[] s) {
358     readers = r;
359     starts = s;
360
361     readerTermDocs = new TermDocs[r.length];
362   }
363
364   public int doc() {
365     return base + current.doc();
366   }
367   public int freq() {
368     return current.freq();
369   }
370
371   public void seek(Term term) {
372     this.term = term;
373     this.base = 0;
374     this.pointer = 0;
375     this.current = null;
376   }
377
378   public void seek(TermEnum termEnum) throws IOException JavaDoc {
379     seek(termEnum.term());
380   }
381
382   public boolean next() throws IOException JavaDoc {
383     if (current != null && current.next()) {
384       return true;
385     } else if (pointer < readers.length) {
386       base = starts[pointer];
387       current = termDocs(pointer++);
388       return next();
389     } else
390       return false;
391   }
392
393   /** Optimized implementation. */
394   public int read(final int[] docs, final int[] freqs) throws IOException JavaDoc {
395     while (true) {
396       while (current == null) {
397         if (pointer < readers.length) { // try next segment
398
base = starts[pointer];
399           current = termDocs(pointer++);
400         } else {
401           return 0;
402         }
403       }
404       int end = current.read(docs, freqs);
405       if (end == 0) { // none left in segment
406
current = null;
407       } else { // got some
408
final int b = base; // adjust doc numbers
409
for (int i = 0; i < end; i++)
410          docs[i] += b;
411         return end;
412       }
413     }
414   }
415
416   /** As yet unoptimized implementation. */
417   public boolean skipTo(int target) throws IOException JavaDoc {
418     do {
419       if (!next())
420         return false;
421     } while (target > doc());
422       return true;
423   }
424
425   private TermDocs termDocs(int i) throws IOException JavaDoc {
426     if (term == null)
427       return null;
428     TermDocs result = readerTermDocs[i];
429     if (result == null)
430       result = readerTermDocs[i] = termDocs(readers[i]);
431     result.seek(term);
432     return result;
433   }
434
435   protected TermDocs termDocs(IndexReader reader)
436     throws IOException JavaDoc {
437     return reader.termDocs();
438   }
439
440   public void close() throws IOException JavaDoc {
441     for (int i = 0; i < readerTermDocs.length; i++) {
442       if (readerTermDocs[i] != null)
443         readerTermDocs[i].close();
444     }
445   }
446 }
447
448 class MultiTermPositions extends MultiTermDocs implements TermPositions {
449   public MultiTermPositions(IndexReader[] r, int[] s) {
450     super(r,s);
451   }
452
453   protected TermDocs termDocs(IndexReader reader) throws IOException JavaDoc {
454     return (TermDocs)reader.termPositions();
455   }
456
457   public int nextPosition() throws IOException JavaDoc {
458     return ((TermPositions)current).nextPosition();
459   }
460
461 }
462
Popular Tags