KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > index > TermInfosWriter


1 package org.apache.lucene.index;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19
20 import java.io.IOException JavaDoc;
21 import org.apache.lucene.store.IndexOutput;
22 import org.apache.lucene.store.Directory;
23 import org.apache.lucene.util.StringHelper;
24
25 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
26   Directory. A TermInfos can be written once, in order. */

27
28 final class TermInfosWriter {
29   /** The file format version, a negative number. */
30   public static final int FORMAT = -2;
31
32   private FieldInfos fieldInfos;
33   private IndexOutput output;
34   private Term lastTerm = new Term("", "");
35   private TermInfo lastTi = new TermInfo();
36   private long size = 0;
37
38   // TODO: the default values for these two parameters should be settable from
39
// IndexWriter. However, once that's done, folks will start setting them to
40
// ridiculous values and complaining that things don't work well, as with
41
// mergeFactor. So, let's wait until a number of folks find that alternate
42
// values work better. Note that both of these values are stored in the
43
// segment, so that it's safe to change these w/o rebuilding all indexes.
44

45   /** Expert: The fraction of terms in the "dictionary" which should be stored
46    * in RAM. Smaller values use more memory, but make searching slightly
47    * faster, while larger values use less memory and make searching slightly
48    * slower. Searching is typically not dominated by dictionary lookup, so
49    * tweaking this is rarely useful.*/

50   int indexInterval = 128;
51
52   /** Expert: The fraction of {@link TermDocs} entries stored in skip tables,
53    * used to accellerate {@link TermDocs#skipTo(int)}. Larger values result in
54    * smaller indexes, greater acceleration, but fewer accelerable cases, while
55    * smaller values result in bigger indexes, less acceleration and more
56    * accelerable cases. More detailed experiments would be useful here. */

57   int skipInterval = 16;
58
59   private long lastIndexPointer = 0;
60   private boolean isIndex = false;
61
62   private TermInfosWriter other = null;
63
64   TermInfosWriter(Directory directory, String JavaDoc segment, FieldInfos fis,
65                   int interval)
66        throws IOException JavaDoc {
67     initialize(directory, segment, fis, interval, false);
68     other = new TermInfosWriter(directory, segment, fis, interval, true);
69     other.other = this;
70   }
71
72   private TermInfosWriter(Directory directory, String JavaDoc segment, FieldInfos fis,
73                           int interval, boolean isIndex) throws IOException JavaDoc {
74     initialize(directory, segment, fis, interval, isIndex);
75   }
76
77   private void initialize(Directory directory, String JavaDoc segment, FieldInfos fis,
78                           int interval, boolean isi) throws IOException JavaDoc {
79     indexInterval = interval;
80     fieldInfos = fis;
81     isIndex = isi;
82     output = directory.createOutput(segment + (isIndex ? ".tii" : ".tis"));
83     output.writeInt(FORMAT); // write format
84
output.writeLong(0); // leave space for size
85
output.writeInt(indexInterval); // write indexInterval
86
output.writeInt(skipInterval); // write skipInterval
87
}
88
89   /** Adds a new <Term, TermInfo> pair to the set.
90     Term must be lexicographically greater than all previous Terms added.
91     TermInfo pointers must be positive and greater than all previous.*/

92   final void add(Term term, TermInfo ti)
93        throws IOException JavaDoc {
94     if (!isIndex && term.compareTo(lastTerm) <= 0)
95       throw new IOException JavaDoc("term out of order");
96     if (ti.freqPointer < lastTi.freqPointer)
97       throw new IOException JavaDoc("freqPointer out of order");
98     if (ti.proxPointer < lastTi.proxPointer)
99       throw new IOException JavaDoc("proxPointer out of order");
100
101     if (!isIndex && size % indexInterval == 0)
102       other.add(lastTerm, lastTi); // add an index term
103

104     writeTerm(term); // write term
105
output.writeVInt(ti.docFreq); // write doc freq
106
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
107
output.writeVLong(ti.proxPointer - lastTi.proxPointer);
108
109     if (ti.docFreq >= skipInterval) {
110       output.writeVInt(ti.skipOffset);
111     }
112
113     if (isIndex) {
114       output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
115       lastIndexPointer = other.output.getFilePointer(); // write pointer
116
}
117
118     lastTi.set(ti);
119     size++;
120   }
121
122   private final void writeTerm(Term term)
123        throws IOException JavaDoc {
124     int start = StringHelper.stringDifference(lastTerm.text, term.text);
125     int length = term.text.length() - start;
126
127     output.writeVInt(start); // write shared prefix length
128
output.writeVInt(length); // write delta length
129
output.writeChars(term.text, start, length); // write delta chars
130

131     output.writeVInt(fieldInfos.fieldNumber(term.field)); // write field num
132

133     lastTerm = term;
134   }
135
136
137
138   /** Called to complete TermInfos creation. */
139   final void close() throws IOException JavaDoc {
140     output.seek(4); // write size after format
141
output.writeLong(size);
142     output.close();
143
144     if (!isIndex)
145       other.close();
146   }
147
148 }
149
Popular Tags