KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > index > TermInfosReader


1 package org.apache.lucene.index;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.IOException JavaDoc;
20
21 import org.apache.lucene.store.Directory;
22
23 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
24  * Directory. Pairs are accessed either by Term or by ordinal position the
25  * set. */

26
27 final class TermInfosReader {
28   private Directory directory;
29   private String JavaDoc segment;
30   private FieldInfos fieldInfos;
31
32   private ThreadLocal JavaDoc enumerators = new ThreadLocal JavaDoc();
33   private SegmentTermEnum origEnum;
34   private long size;
35
36   private Term[] indexTerms = null;
37   private TermInfo[] indexInfos;
38   private long[] indexPointers;
39   
40   private SegmentTermEnum indexEnum;
41
42   TermInfosReader(Directory dir, String JavaDoc seg, FieldInfos fis)
43        throws IOException JavaDoc {
44     directory = dir;
45     segment = seg;
46     fieldInfos = fis;
47
48     origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis"),
49                                    fieldInfos, false);
50     size = origEnum.size;
51
52     indexEnum =
53       new SegmentTermEnum(directory.openInput(segment + ".tii"),
54               fieldInfos, true);
55   }
56
57   protected void finalize() {
58     // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
59
enumerators.set(null);
60   }
61
62   public int getSkipInterval() {
63     return origEnum.skipInterval;
64   }
65
66   final void close() throws IOException JavaDoc {
67     if (origEnum != null)
68       origEnum.close();
69     if (indexEnum != null)
70       indexEnum.close();
71   }
72
73   /** Returns the number of term/value pairs in the set. */
74   final long size() {
75     return size;
76   }
77
78   private SegmentTermEnum getEnum() {
79     SegmentTermEnum termEnum = (SegmentTermEnum)enumerators.get();
80     if (termEnum == null) {
81       termEnum = terms();
82       enumerators.set(termEnum);
83     }
84     return termEnum;
85   }
86
87   private synchronized void ensureIndexIsRead() throws IOException JavaDoc {
88     if (indexTerms != null) // index already read
89
return; // do nothing
90
try {
91       int indexSize = (int)indexEnum.size; // otherwise read index
92

93       indexTerms = new Term[indexSize];
94       indexInfos = new TermInfo[indexSize];
95       indexPointers = new long[indexSize];
96         
97       for (int i = 0; indexEnum.next(); i++) {
98         indexTerms[i] = indexEnum.term();
99         indexInfos[i] = indexEnum.termInfo();
100         indexPointers[i] = indexEnum.indexPointer;
101       }
102     } finally {
103         indexEnum.close();
104         indexEnum = null;
105     }
106   }
107
108   /** Returns the offset of the greatest index entry which is less than or equal to term.*/
109   private final int getIndexOffset(Term term) {
110     int lo = 0; // binary search indexTerms[]
111
int hi = indexTerms.length - 1;
112
113     while (hi >= lo) {
114       int mid = (lo + hi) >> 1;
115       int delta = term.compareTo(indexTerms[mid]);
116       if (delta < 0)
117     hi = mid - 1;
118       else if (delta > 0)
119     lo = mid + 1;
120       else
121     return mid;
122     }
123     return hi;
124   }
125
126   private final void seekEnum(int indexOffset) throws IOException JavaDoc {
127     getEnum().seek(indexPointers[indexOffset],
128           (indexOffset * getEnum().indexInterval) - 1,
129           indexTerms[indexOffset], indexInfos[indexOffset]);
130   }
131
132   /** Returns the TermInfo for a Term in the set, or null. */
133   TermInfo get(Term term) throws IOException JavaDoc {
134     if (size == 0) return null;
135
136     ensureIndexIsRead();
137
138     // optimize sequential access: first try scanning cached enum w/o seeking
139
SegmentTermEnum enumerator = getEnum();
140     if (enumerator.term() != null // term is at or past current
141
&& ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
142         || term.compareTo(enumerator.term()) >= 0)) {
143       int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
144       if (indexTerms.length == enumOffset // but before end of block
145
|| term.compareTo(indexTerms[enumOffset]) < 0)
146     return scanEnum(term); // no need to seek
147
}
148
149     // random-access: must seek
150
seekEnum(getIndexOffset(term));
151     return scanEnum(term);
152   }
153
154   /** Scans within block for matching term. */
155   private final TermInfo scanEnum(Term term) throws IOException JavaDoc {
156     SegmentTermEnum enumerator = getEnum();
157     enumerator.scanTo(term);
158     if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0)
159       return enumerator.termInfo();
160     else
161       return null;
162   }
163
164   /** Returns the nth term in the set. */
165   final Term get(int position) throws IOException JavaDoc {
166     if (size == 0) return null;
167
168     SegmentTermEnum enumerator = getEnum();
169     if (enumerator != null && enumerator.term() != null &&
170         position >= enumerator.position &&
171     position < (enumerator.position + enumerator.indexInterval))
172       return scanEnum(position); // can avoid seek
173

174     seekEnum(position / enumerator.indexInterval); // must seek
175
return scanEnum(position);
176   }
177
178   private final Term scanEnum(int position) throws IOException JavaDoc {
179     SegmentTermEnum enumerator = getEnum();
180     while(enumerator.position < position)
181       if (!enumerator.next())
182     return null;
183
184     return enumerator.term();
185   }
186
187   /** Returns the position of a Term in the set or -1. */
188   final long getPosition(Term term) throws IOException JavaDoc {
189     if (size == 0) return -1;
190
191     ensureIndexIsRead();
192     int indexOffset = getIndexOffset(term);
193     seekEnum(indexOffset);
194
195     SegmentTermEnum enumerator = getEnum();
196     while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
197
198     if (term.compareTo(enumerator.term()) == 0)
199       return enumerator.position;
200     else
201       return -1;
202   }
203
204   /** Returns an enumeration of all the Terms and TermInfos in the set. */
205   public SegmentTermEnum terms() {
206     return (SegmentTermEnum)origEnum.clone();
207   }
208
209   /** Returns an enumeration of terms starting at or after the named term. */
210   public SegmentTermEnum terms(Term term) throws IOException JavaDoc {
211     get(term);
212     return (SegmentTermEnum)getEnum().clone();
213   }
214 }
215
Popular Tags