KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > index > ParallelReader


1 package org.apache.lucene.index;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.IOException JavaDoc;
20 import java.util.ArrayList JavaDoc;
21 import java.util.Collection JavaDoc;
22 import java.util.Enumeration JavaDoc;
23 import java.util.HashSet JavaDoc;
24 import java.util.Iterator JavaDoc;
25 import java.util.List JavaDoc;
26 import java.util.Map JavaDoc;
27 import java.util.Set JavaDoc;
28 import java.util.SortedMap JavaDoc;
29 import java.util.TreeMap JavaDoc;
30
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.Field;
33
34 /** An IndexReader which reads multiple, parallel indexes. Each index added
35  * must have the same number of documents, but typically each contains
36  * different fields. Each document contains the union of the fields of all
37  * documents with the same document number. When searching, matches for a
38  * query term are from the first index added that has the field.
39  *
40  * <p>This is useful, e.g., with collections that have large fields which
41  * change rarely and small fields that change more frequently. The smaller
42  * fields may be re-indexed in a new index and both indexes may be searched
43  * together.
44  *
45  * <p><strong>Warning:</strong> It is up to you to make sure all indexes
46  * are created and modified the same way. For example, if you add
47  * documents to one index, you need to add the same documents in the
48  * same order to the other indexes. <em>Failure to do so will result in
49  * undefined behavior</em>.
50  */

51 public class ParallelReader extends IndexReader {
52   private List JavaDoc readers = new ArrayList JavaDoc();
53   private SortedMap JavaDoc fieldToReader = new TreeMap JavaDoc();
54   private List JavaDoc storedFieldReaders = new ArrayList JavaDoc();
55
56   private int maxDoc;
57   private int numDocs;
58   private boolean hasDeletions;
59
60  /** Construct a ParallelReader. */
61   public ParallelReader() throws IOException JavaDoc { super(null); }
62     
63  /** Add an IndexReader. */
64   public void add(IndexReader reader) throws IOException JavaDoc {
65     add(reader, false);
66   }
67
68  /** Add an IndexReader whose stored fields will not be returned. This can
69   * accellerate search when stored fields are only needed from a subset of
70   * the IndexReaders.
71   *
72   * @throws IllegalArgumentException if not all indexes contain the same number
73   * of documents
74   * @throws IllegalArgumentException if not all indexes have the same value
75   * of {@link IndexReader#maxDoc()}
76   */

77   public void add(IndexReader reader, boolean ignoreStoredFields)
78     throws IOException JavaDoc {
79
80     if (readers.size() == 0) {
81       this.maxDoc = reader.maxDoc();
82       this.numDocs = reader.numDocs();
83       this.hasDeletions = reader.hasDeletions();
84     }
85
86     if (reader.maxDoc() != maxDoc) // check compatibility
87
throw new IllegalArgumentException JavaDoc
88         ("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
89     if (reader.numDocs() != numDocs)
90       throw new IllegalArgumentException JavaDoc
91         ("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs());
92     
93     Iterator JavaDoc i = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator();
94     while (i.hasNext()) { // update fieldToReader map
95
String JavaDoc field = (String JavaDoc)i.next();
96       if (fieldToReader.get(field) == null)
97         fieldToReader.put(field, reader);
98     }
99
100     if (!ignoreStoredFields)
101       storedFieldReaders.add(reader); // add to storedFieldReaders
102
readers.add(reader);
103   }
104
105   public int numDocs() { return numDocs; }
106
107   public int maxDoc() { return maxDoc; }
108
109   public boolean hasDeletions() { return hasDeletions; }
110
111   // check first reader
112
public boolean isDeleted(int n) {
113     if (readers.size() > 0)
114       return ((IndexReader)readers.get(0)).isDeleted(n);
115     return false;
116   }
117
118   // delete in all readers
119
protected void doDelete(int n) throws IOException JavaDoc {
120     for (int i = 0; i < readers.size(); i++) {
121       ((IndexReader)readers.get(i)).doDelete(n);
122     }
123     hasDeletions = true;
124   }
125
126   // undeleteAll in all readers
127
protected void doUndeleteAll() throws IOException JavaDoc {
128     for (int i = 0; i < readers.size(); i++) {
129       ((IndexReader)readers.get(i)).doUndeleteAll();
130     }
131     hasDeletions = false;
132   }
133
134   // append fields from storedFieldReaders
135
public Document document(int n) throws IOException JavaDoc {
136     Document result = new Document();
137     for (int i = 0; i < storedFieldReaders.size(); i++) {
138       IndexReader reader = (IndexReader)storedFieldReaders.get(i);
139       Enumeration JavaDoc fields = reader.document(n).fields();
140       while (fields.hasMoreElements()) {
141         result.add((Field)fields.nextElement());
142       }
143     }
144     return result;
145   }
146
147   // get all vectors
148
public TermFreqVector[] getTermFreqVectors(int n) throws IOException JavaDoc {
149     ArrayList JavaDoc results = new ArrayList JavaDoc();
150     Iterator JavaDoc i = fieldToReader.entrySet().iterator();
151     while (i.hasNext()) {
152       Map.Entry JavaDoc e = (Map.Entry JavaDoc)i.next();
153       IndexReader reader = (IndexReader)e.getKey();
154       String JavaDoc field = (String JavaDoc)e.getValue();
155       TermFreqVector vector = reader.getTermFreqVector(n, field);
156       if (vector != null)
157         results.add(vector);
158     }
159     return (TermFreqVector[])
160       results.toArray(new TermFreqVector[results.size()]);
161   }
162
163   public TermFreqVector getTermFreqVector(int n, String JavaDoc field)
164     throws IOException JavaDoc {
165     return ((IndexReader)fieldToReader.get(field)).getTermFreqVector(n, field);
166   }
167
168   public boolean hasNorms(String JavaDoc field) throws IOException JavaDoc {
169     return ((IndexReader)fieldToReader.get(field)).hasNorms(field);
170   }
171
172   public byte[] norms(String JavaDoc field) throws IOException JavaDoc {
173     return ((IndexReader)fieldToReader.get(field)).norms(field);
174   }
175
176   public void norms(String JavaDoc field, byte[] result, int offset)
177     throws IOException JavaDoc {
178      ((IndexReader)fieldToReader.get(field)).norms(field, result, offset);
179   }
180
181   protected void doSetNorm(int n, String JavaDoc field, byte value)
182     throws IOException JavaDoc {
183     ((IndexReader)fieldToReader.get(field)).doSetNorm(n, field, value);
184   }
185
186   public TermEnum terms() throws IOException JavaDoc {
187     return new ParallelTermEnum();
188   }
189
190   public TermEnum terms(Term term) throws IOException JavaDoc {
191     return new ParallelTermEnum(term);
192   }
193
194   public int docFreq(Term term) throws IOException JavaDoc {
195     return ((IndexReader)fieldToReader.get(term.field())).docFreq(term);
196   }
197
198   public TermDocs termDocs(Term term) throws IOException JavaDoc {
199     return new ParallelTermDocs(term);
200   }
201
202   public TermDocs termDocs() throws IOException JavaDoc {
203     return new ParallelTermDocs();
204   }
205
206   public TermPositions termPositions(Term term) throws IOException JavaDoc {
207     return new ParallelTermPositions(term);
208   }
209
210   public TermPositions termPositions() throws IOException JavaDoc {
211     return new ParallelTermPositions();
212   }
213
214   protected void doCommit() throws IOException JavaDoc {
215     for (int i = 0; i < readers.size(); i++)
216       ((IndexReader)readers.get(i)).commit();
217   }
218
219   protected synchronized void doClose() throws IOException JavaDoc {
220     for (int i = 0; i < readers.size(); i++)
221       ((IndexReader)readers.get(i)).close();
222   }
223
224   public Collection JavaDoc getFieldNames() throws IOException JavaDoc {
225     return fieldToReader.keySet();
226   }
227
228   public Collection JavaDoc getFieldNames(boolean indexed) throws IOException JavaDoc {
229     Set JavaDoc fieldSet = new HashSet JavaDoc();
230     for (int i = 0; i < readers.size(); i++) {
231       IndexReader reader = ((IndexReader)readers.get(i));
232       Collection JavaDoc names = reader.getFieldNames(indexed);
233       fieldSet.addAll(names);
234     }
235     return fieldSet;
236   }
237
238   public Collection JavaDoc getIndexedFieldNames (Field.TermVector tvSpec){
239     Set JavaDoc fieldSet = new HashSet JavaDoc();
240     for (int i = 0; i < readers.size(); i++) {
241       IndexReader reader = ((IndexReader)readers.get(i));
242       Collection JavaDoc names = reader.getIndexedFieldNames(tvSpec);
243       fieldSet.addAll(names);
244     }
245     return fieldSet;
246   }
247
248   public Collection JavaDoc getFieldNames (IndexReader.FieldOption fieldNames) {
249     Set JavaDoc fieldSet = new HashSet JavaDoc();
250     for (int i = 0; i < readers.size(); i++) {
251       IndexReader reader = ((IndexReader)readers.get(i));
252       Collection JavaDoc names = reader.getFieldNames(fieldNames);
253       fieldSet.addAll(names);
254     }
255     return fieldSet;
256   }
257
258   private class ParallelTermEnum extends TermEnum {
259     private String JavaDoc field;
260     private TermEnum termEnum;
261
262     public ParallelTermEnum() throws IOException JavaDoc {
263       field = (String JavaDoc)fieldToReader.firstKey();
264       if (field != null)
265         termEnum = ((IndexReader)fieldToReader.get(field)).terms();
266     }
267     
268     public ParallelTermEnum(Term term) throws IOException JavaDoc {
269       field = term.field();
270       termEnum = ((IndexReader)fieldToReader.get(field)).terms(term);
271     }
272     
273     public boolean next() throws IOException JavaDoc {
274       if (field == null)
275         return false;
276
277       boolean next = termEnum.next();
278
279       // still within field?
280
if (next && termEnum.term().field() == field)
281         return true; // yes, keep going
282

283       termEnum.close(); // close old termEnum
284

285       // find the next field, if any
286
field = (String JavaDoc)fieldToReader.tailMap(field).firstKey();
287       if (field != null) {
288         termEnum = ((IndexReader)fieldToReader.get(field)).terms();
289         return true;
290       }
291
292       return false; // no more fields
293

294     }
295
296     public Term term() { return termEnum.term(); }
297     public int docFreq() { return termEnum.docFreq(); }
298     public void close() throws IOException JavaDoc { termEnum.close(); }
299
300   }
301
302   // wrap a TermDocs in order to support seek(Term)
303
private class ParallelTermDocs implements TermDocs {
304     protected TermDocs termDocs;
305
306     public ParallelTermDocs() {}
307     public ParallelTermDocs(Term term) throws IOException JavaDoc { seek(term); }
308
309     public int doc() { return termDocs.doc(); }
310     public int freq() { return termDocs.freq(); }
311
312     public void seek(Term term) throws IOException JavaDoc {
313       termDocs = ((IndexReader)fieldToReader.get(term.field())).termDocs(term);
314     }
315
316     public void seek(TermEnum termEnum) throws IOException JavaDoc {
317       seek(termEnum.term());
318     }
319
320     public boolean next() throws IOException JavaDoc { return termDocs.next(); }
321
322     public int read(final int[] docs, final int[] freqs) throws IOException JavaDoc {
323       return termDocs.read(docs, freqs);
324     }
325
326     public boolean skipTo(int target) throws IOException JavaDoc {
327       return termDocs.skipTo(target);
328     }
329
330     public void close() throws IOException JavaDoc { termDocs.close(); }
331
332   }
333
334   private class ParallelTermPositions
335     extends ParallelTermDocs implements TermPositions {
336
337     public ParallelTermPositions() {}
338     public ParallelTermPositions(Term term) throws IOException JavaDoc { seek(term); }
339
340     public void seek(Term term) throws IOException JavaDoc {
341       termDocs = ((IndexReader)fieldToReader.get(term.field()))
342         .termPositions(term);
343     }
344
345     public int nextPosition() throws IOException JavaDoc {
346       return ((TermPositions)termDocs).nextPosition();
347     }
348
349   }
350
351 }
352
353
Popular Tags