1 package org.apache.lucene.index; 2 3 18 19 import org.apache.lucene.store.Directory; 20 import org.apache.lucene.store.IndexInput; 21 22 import java.io.IOException ; 23 24 27 class TermVectorsReader implements Cloneable { 28 private FieldInfos fieldInfos; 29 30 private IndexInput tvx; 31 private IndexInput tvd; 32 private IndexInput tvf; 33 private int size; 34 35 private int tvdFormat; 36 private int tvfFormat; 37 38 TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos) 39 throws IOException { 40 if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) { 41 tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION); 42 checkValidFormat(tvx); 43 tvd = d.openInput(segment + TermVectorsWriter.TVD_EXTENSION); 44 tvdFormat = checkValidFormat(tvd); 45 tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION); 46 tvfFormat = checkValidFormat(tvf); 47 size = (int) tvx.length() / 8; 48 } 49 50 this.fieldInfos = fieldInfos; 51 } 52 53 private int checkValidFormat(IndexInput in) throws IOException 54 { 55 int format = in.readInt(); 56 if (format > TermVectorsWriter.FORMAT_VERSION) 57 { 58 throw new IOException ("Incompatible format version: " + format + " expected " 59 + TermVectorsWriter.FORMAT_VERSION + " or less"); 60 } 61 return format; 62 } 63 64 void close() throws IOException { 65 IOException keep = null; 68 if (tvx != null) try { tvx.close(); } catch (IOException e) { if (keep == null) keep = e; } 69 if (tvd != null) try { tvd.close(); } catch (IOException e) { if (keep == null) keep = e; } 70 if (tvf != null) try { tvf.close(); } catch (IOException e) { if (keep == null) keep = e; } 71 if (keep != null) throw (IOException ) keep.fillInStackTrace(); 72 } 73 74 78 int size() { 79 return size; 80 } 81 82 89 TermFreqVector get(int docNum, String field) throws IOException { 90 int fieldNumber = fieldInfos.fieldNumber(field); 92 TermFreqVector result = null; 93 if (tvx != null) { 94 tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE); 99 long position = tvx.readLong(); 101 102 tvd.seek(position); 103 int fieldCount = tvd.readVInt(); 104 int number = 0; 109 int found = -1; 110 for (int i = 0; i < fieldCount; i++) { 111 if(tvdFormat == TermVectorsWriter.FORMAT_VERSION) 112 number = tvd.readVInt(); 113 else 114 number += tvd.readVInt(); 115 116 if (number == fieldNumber) 117 found = i; 118 } 119 120 if (found != -1) { 123 position = 0; 125 for (int i = 0; i <= found; i++) 126 position += tvd.readVLong(); 127 128 result = readTermVector(field, position); 129 } else { 130 } 132 } else { 133 } 135 return result; 136 } 137 138 145 TermFreqVector[] get(int docNum) throws IOException { 146 TermFreqVector[] result = null; 147 if (tvx != null) { 149 tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE); 151 long position = tvx.readLong(); 152 153 tvd.seek(position); 154 int fieldCount = tvd.readVInt(); 155 156 if (fieldCount != 0) { 158 int number = 0; 159 String [] fields = new String [fieldCount]; 160 161 for (int i = 0; i < fieldCount; i++) { 162 if(tvdFormat == TermVectorsWriter.FORMAT_VERSION) 163 number = tvd.readVInt(); 164 else 165 number += tvd.readVInt(); 166 167 fields[i] = fieldInfos.fieldName(number); 168 } 169 170 position = 0; 172 long[] tvfPointers = new long[fieldCount]; 173 for (int i = 0; i < fieldCount; i++) { 174 position += tvd.readVLong(); 175 tvfPointers[i] = position; 176 } 177 178 result = readTermVectors(fields, tvfPointers); 179 } 180 } else { 181 } 183 return result; 184 } 185 186 187 private SegmentTermVector[] readTermVectors(String fields[], long tvfPointers[]) 188 throws IOException { 189 SegmentTermVector res[] = new SegmentTermVector[fields.length]; 190 for (int i = 0; i < fields.length; i++) { 191 res[i] = readTermVector(fields[i], tvfPointers[i]); 192 } 193 return res; 194 } 195 196 203 private SegmentTermVector readTermVector(String field, long tvfPointer) 204 throws IOException { 205 206 tvf.seek(tvfPointer); 209 210 int numTerms = tvf.readVInt(); 211 if (numTerms == 0) 214 return new SegmentTermVector(field, null, null); 215 216 boolean storePositions; 217 boolean storeOffsets; 218 219 if(tvfFormat == TermVectorsWriter.FORMAT_VERSION){ 220 byte bits = tvf.readByte(); 221 storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0; 222 storeOffsets = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0; 223 } 224 else{ 225 tvf.readVInt(); 226 storePositions = false; 227 storeOffsets = false; 228 } 229 230 String terms[] = new String [numTerms]; 231 int termFreqs[] = new int[numTerms]; 232 233 int positions[][] = null; 235 TermVectorOffsetInfo offsets[][] = null; 236 if(storePositions) 237 positions = new int[numTerms][]; 238 if(storeOffsets) 239 offsets = new TermVectorOffsetInfo[numTerms][]; 240 241 int start = 0; 242 int deltaLength = 0; 243 int totalLength = 0; 244 char [] buffer = new char[10]; char[] previousBuffer = {}; 246 247 for (int i = 0; i < numTerms; i++) { 248 start = tvf.readVInt(); 249 deltaLength = tvf.readVInt(); 250 totalLength = start + deltaLength; 251 if (buffer.length < totalLength) { buffer = null; buffer = new char[totalLength]; 254 255 if (start > 0) System.arraycopy(previousBuffer, 0, buffer, 0, start); 257 } 258 259 tvf.readChars(buffer, start, deltaLength); 260 terms[i] = new String (buffer, 0, totalLength); 261 previousBuffer = buffer; 262 int freq = tvf.readVInt(); 263 termFreqs[i] = freq; 264 265 if (storePositions) { int [] pos = new int[freq]; 267 positions[i] = pos; 268 int prevPosition = 0; 269 for (int j = 0; j < freq; j++) 270 { 271 pos[j] = prevPosition + tvf.readVInt(); 272 prevPosition = pos[j]; 273 } 274 } 275 276 if (storeOffsets) { 277 TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq]; 278 offsets[i] = offs; 279 int prevOffset = 0; 280 for (int j = 0; j < freq; j++) { 281 int startOffset = prevOffset + tvf.readVInt(); 282 int endOffset = startOffset + tvf.readVInt(); 283 offs[j] = new TermVectorOffsetInfo(startOffset, endOffset); 284 prevOffset = endOffset; 285 } 286 } 287 } 288 289 SegmentTermVector tv; 290 if (storePositions || storeOffsets){ 291 tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets); 292 } 293 else { 294 tv = new SegmentTermVector(field, terms, termFreqs); 295 } 296 return tv; 297 } 298 299 protected Object clone() { 300 301 if (tvx == null || tvd == null || tvf == null) 302 return null; 303 304 TermVectorsReader clone = null; 305 try { 306 clone = (TermVectorsReader) super.clone(); 307 } catch (CloneNotSupportedException e) {} 308 309 clone.tvx = (IndexInput) tvx.clone(); 310 clone.tvd = (IndexInput) tvd.clone(); 311 clone.tvf = (IndexInput) tvf.clone(); 312 313 return clone; 314 } 315 } 316 | Popular Tags |