1 package org.apache.lucene.index; 2 3 18 19 import java.io.IOException ; 20 import java.util.*; 21 22 import org.apache.lucene.document.Document; 23 import org.apache.lucene.document.Field; 24 import org.apache.lucene.store.IndexInput; 25 import org.apache.lucene.store.IndexOutput; 26 import org.apache.lucene.store.Directory; 27 import org.apache.lucene.util.BitVector; 28 import org.apache.lucene.search.DefaultSimilarity; 29 30 33 class SegmentReader extends IndexReader { 34 private String segment; 35 36 FieldInfos fieldInfos; 37 private FieldsReader fieldsReader; 38 39 TermInfosReader tis; 40 TermVectorsReader termVectorsReaderOrig = null; 41 ThreadLocal termVectorsLocal = new ThreadLocal (); 42 43 BitVector deletedDocs = null; 44 private boolean deletedDocsDirty = false; 45 private boolean normsDirty = false; 46 private boolean undeleteAll = false; 47 48 IndexInput freqStream; 49 IndexInput proxStream; 50 51 CompoundFileReader cfsReader = null; 53 54 private class Norm { 55 public Norm(IndexInput in, int number) 56 { 57 this.in = in; 58 this.number = number; 59 } 60 61 private IndexInput in; 62 private byte[] bytes; 63 private boolean dirty; 64 private int number; 65 66 private void reWrite() throws IOException { 67 IndexOutput out = directory().createOutput(segment + ".tmp"); 69 try { 70 out.writeBytes(bytes, maxDoc()); 71 } finally { 72 out.close(); 73 } 74 String fileName; 75 if(cfsReader == null) 76 fileName = segment + ".f" + number; 77 else{ 78 fileName = segment + ".s" + number; 80 } 81 directory().renameFile(segment + ".tmp", fileName); 82 this.dirty = false; 83 } 84 } 85 86 private Hashtable norms = new Hashtable(); 87 88 89 private static Class IMPL; 90 static { 91 try { 92 String name = 93 System.getProperty("org.apache.lucene.SegmentReader.class", 94 SegmentReader.class.getName()); 95 IMPL = Class.forName(name); 96 } catch (ClassNotFoundException e) { 97 throw new RuntimeException ("cannot load SegmentReader class: " + e); 98 } catch (SecurityException se) { 99 try { 100 IMPL = Class.forName(SegmentReader.class.getName()); 101 } catch (ClassNotFoundException e) { 102 throw new RuntimeException ("cannot load default SegmentReader class: " + e); 103 } 104 } 105 } 106 107 protected SegmentReader() { super(null); } 108 109 public static SegmentReader get(SegmentInfo si) throws IOException { 110 return get(si.dir, si, null, false, false); 111 } 112 113 public static SegmentReader get(SegmentInfos sis, SegmentInfo si, 114 boolean closeDir) throws IOException { 115 return get(si.dir, si, sis, closeDir, true); 116 } 117 118 public static SegmentReader get(Directory dir, SegmentInfo si, 119 SegmentInfos sis, 120 boolean closeDir, boolean ownDir) 121 throws IOException { 122 SegmentReader instance; 123 try { 124 instance = (SegmentReader)IMPL.newInstance(); 125 } catch (Exception e) { 126 throw new RuntimeException ("cannot load SegmentReader class: " + e); 127 } 128 instance.init(dir, sis, closeDir, ownDir); 129 instance.initialize(si); 130 return instance; 131 } 132 133 private void initialize(SegmentInfo si) throws IOException { 134 segment = si.name; 135 136 Directory cfsDir = directory(); 138 if (directory().fileExists(segment + ".cfs")) { 139 cfsReader = new CompoundFileReader(directory(), segment + ".cfs"); 140 cfsDir = cfsReader; 141 } 142 143 fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); 145 fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); 146 147 tis = new TermInfosReader(cfsDir, segment, fieldInfos); 148 149 if (hasDeletions(si)) 151 deletedDocs = new BitVector(directory(), segment + ".del"); 152 153 freqStream = cfsDir.openInput(segment + ".frq"); 156 proxStream = cfsDir.openInput(segment + ".prx"); 157 openNorms(cfsDir); 158 159 if (fieldInfos.hasVectors()) { termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos); 161 } 162 } 163 164 protected void finalize() { 165 termVectorsLocal.set(null); 167 super.finalize(); 168 } 169 170 protected void doCommit() throws IOException { 171 if (deletedDocsDirty) { deletedDocs.write(directory(), segment + ".tmp"); 173 directory().renameFile(segment + ".tmp", segment + ".del"); 174 } 175 if(undeleteAll && directory().fileExists(segment + ".del")){ 176 directory().deleteFile(segment + ".del"); 177 } 178 if (normsDirty) { Enumeration values = norms.elements(); 180 while (values.hasMoreElements()) { 181 Norm norm = (Norm) values.nextElement(); 182 if (norm.dirty) { 183 norm.reWrite(); 184 } 185 } 186 } 187 deletedDocsDirty = false; 188 normsDirty = false; 189 undeleteAll = false; 190 } 191 192 protected void doClose() throws IOException { 193 fieldsReader.close(); 194 tis.close(); 195 196 if (freqStream != null) 197 freqStream.close(); 198 if (proxStream != null) 199 proxStream.close(); 200 201 closeNorms(); 202 203 if (termVectorsReaderOrig != null) 204 termVectorsReaderOrig.close(); 205 206 if (cfsReader != null) 207 cfsReader.close(); 208 } 209 210 static boolean hasDeletions(SegmentInfo si) throws IOException { 211 return si.dir.fileExists(si.name + ".del"); 212 } 213 214 public boolean hasDeletions() { 215 return deletedDocs != null; 216 } 217 218 219 static boolean usesCompoundFile(SegmentInfo si) throws IOException { 220 return si.dir.fileExists(si.name + ".cfs"); 221 } 222 223 static boolean hasSeparateNorms(SegmentInfo si) throws IOException { 224 String [] result = si.dir.list(); 225 String pattern = si.name + ".s"; 226 int patternLength = pattern.length(); 227 for(int i = 0; i < result.length; i++){ 228 if(result[i].startsWith(pattern) && Character.isDigit(result[i].charAt(patternLength))) 229 return true; 230 } 231 return false; 232 } 233 234 protected void doDelete(int docNum) { 235 if (deletedDocs == null) 236 deletedDocs = new BitVector(maxDoc()); 237 deletedDocsDirty = true; 238 undeleteAll = false; 239 deletedDocs.set(docNum); 240 } 241 242 protected void doUndeleteAll() { 243 deletedDocs = null; 244 deletedDocsDirty = false; 245 undeleteAll = true; 246 } 247 248 Vector files() throws IOException { 249 Vector files = new Vector(16); 250 251 for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) { 252 String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i]; 253 if (directory().fileExists(name)) 254 files.addElement(name); 255 } 256 257 for (int i = 0; i < fieldInfos.size(); i++) { 258 FieldInfo fi = fieldInfos.fieldInfo(i); 259 if (fi.isIndexed && !fi.omitNorms){ 260 String name; 261 if(cfsReader == null) 262 name = segment + ".f" + i; 263 else 264 name = segment + ".s" + i; 265 if (directory().fileExists(name)) 266 files.addElement(name); 267 } 268 } 269 return files; 270 } 271 272 public TermEnum terms() { 273 return tis.terms(); 274 } 275 276 public TermEnum terms(Term t) throws IOException { 277 return tis.terms(t); 278 } 279 280 public synchronized Document document(int n) throws IOException { 281 if (isDeleted(n)) 282 throw new IllegalArgumentException 283 ("attempt to access a deleted document"); 284 return fieldsReader.doc(n); 285 } 286 287 public synchronized boolean isDeleted(int n) { 288 return (deletedDocs != null && deletedDocs.get(n)); 289 } 290 291 public TermDocs termDocs() throws IOException { 292 return new SegmentTermDocs(this); 293 } 294 295 public TermPositions termPositions() throws IOException { 296 return new SegmentTermPositions(this); 297 } 298 299 public int docFreq(Term t) throws IOException { 300 TermInfo ti = tis.get(t); 301 if (ti != null) 302 return ti.docFreq; 303 else 304 return 0; 305 } 306 307 public int numDocs() { 308 int n = maxDoc(); 309 if (deletedDocs != null) 310 n -= deletedDocs.count(); 311 return n; 312 } 313 314 public int maxDoc() { 315 return fieldsReader.size(); 316 } 317 318 322 public Collection getFieldNames() { 323 Set fieldSet = new HashSet(); 325 for (int i = 0; i < fieldInfos.size(); i++) { 326 FieldInfo fi = fieldInfos.fieldInfo(i); 327 fieldSet.add(fi.name); 328 } 329 return fieldSet; 330 } 331 332 336 public Collection getFieldNames(boolean indexed) { 337 Set fieldSet = new HashSet(); 339 for (int i = 0; i < fieldInfos.size(); i++) { 340 FieldInfo fi = fieldInfos.fieldInfo(i); 341 if (fi.isIndexed == indexed) 342 fieldSet.add(fi.name); 343 } 344 return fieldSet; 345 } 346 347 351 public Collection getIndexedFieldNames (Field.TermVector tvSpec){ 352 boolean storedTermVector; 353 boolean storePositionWithTermVector; 354 boolean storeOffsetWithTermVector; 355 356 if(tvSpec == Field.TermVector.NO){ 357 storedTermVector = false; 358 storePositionWithTermVector = false; 359 storeOffsetWithTermVector = false; 360 } 361 else if(tvSpec == Field.TermVector.YES){ 362 storedTermVector = true; 363 storePositionWithTermVector = false; 364 storeOffsetWithTermVector = false; 365 } 366 else if(tvSpec == Field.TermVector.WITH_POSITIONS){ 367 storedTermVector = true; 368 storePositionWithTermVector = true; 369 storeOffsetWithTermVector = false; 370 } 371 else if(tvSpec == Field.TermVector.WITH_OFFSETS){ 372 storedTermVector = true; 373 storePositionWithTermVector = false; 374 storeOffsetWithTermVector = true; 375 } 376 else if(tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS){ 377 storedTermVector = true; 378 storePositionWithTermVector = true; 379 storeOffsetWithTermVector = true; 380 } 381 else{ 382 throw new IllegalArgumentException ("unknown termVector parameter " + tvSpec); 383 } 384 385 Set fieldSet = new HashSet(); 387 for (int i = 0; i < fieldInfos.size(); i++) { 388 FieldInfo fi = fieldInfos.fieldInfo(i); 389 if (fi.isIndexed && fi.storeTermVector == storedTermVector && 390 fi.storePositionWithTermVector == storePositionWithTermVector && 391 fi.storeOffsetWithTermVector == storeOffsetWithTermVector){ 392 fieldSet.add(fi.name); 393 } 394 } 395 return fieldSet; 396 } 397 398 401 public Collection getFieldNames(IndexReader.FieldOption fieldOption) { 402 403 Set fieldSet = new HashSet(); 404 for (int i = 0; i < fieldInfos.size(); i++) { 405 FieldInfo fi = fieldInfos.fieldInfo(i); 406 if (fieldOption == IndexReader.FieldOption.ALL) { 407 fieldSet.add(fi.name); 408 } 409 else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { 410 fieldSet.add(fi.name); 411 } 412 else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) { 413 fieldSet.add(fi.name); 414 } 415 else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) { 416 fieldSet.add(fi.name); 417 } 418 else if (fi.storeTermVector == true && 419 fi.storePositionWithTermVector == false && 420 fi.storeOffsetWithTermVector == false && 421 fieldOption == IndexReader.FieldOption.TERMVECTOR) { 422 fieldSet.add(fi.name); 423 } 424 else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) { 425 fieldSet.add(fi.name); 426 } 427 else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) { 428 fieldSet.add(fi.name); 429 } 430 else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) { 431 fieldSet.add(fi.name); 432 } 433 else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && 434 fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { 435 fieldSet.add(fi.name); 436 } 437 } 438 return fieldSet; 439 } 440 441 442 public synchronized boolean hasNorms(String field) { 443 return norms.containsKey(field); 444 } 445 446 static byte[] createFakeNorms(int size) { 447 byte[] ones = new byte[size]; 448 Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f)); 449 return ones; 450 } 451 452 private byte[] ones; 453 private byte[] fakeNorms() { 454 if (ones==null) ones=createFakeNorms(maxDoc()); 455 return ones; 456 } 457 458 protected synchronized byte[] getNorms(String field) throws IOException { 460 Norm norm = (Norm) norms.get(field); 461 if (norm == null) return null; 463 if (norm.bytes == null) { byte[] bytes = new byte[maxDoc()]; 465 norms(field, bytes, 0); 466 norm.bytes = bytes; } 468 return norm.bytes; 469 } 470 471 public synchronized byte[] norms(String field) throws IOException { 473 byte[] bytes = getNorms(field); 474 if (bytes==null) bytes=fakeNorms(); 475 return bytes; 476 } 477 478 protected void doSetNorm(int doc, String field, byte value) 479 throws IOException { 480 Norm norm = (Norm) norms.get(field); 481 if (norm == null) return; 483 norm.dirty = true; normsDirty = true; 485 486 norms(field)[doc] = value; } 488 489 490 public synchronized void norms(String field, byte[] bytes, int offset) 491 throws IOException { 492 493 Norm norm = (Norm) norms.get(field); 494 if (norm == null) { 495 System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc()); 496 return; 497 } 498 499 if (norm.bytes != null) { System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc()); 501 return; 502 } 503 504 IndexInput normStream = (IndexInput) norm.in.clone(); 505 try { normStream.seek(0); 507 normStream.readBytes(bytes, offset, maxDoc()); 508 } finally { 509 normStream.close(); 510 } 511 } 512 513 514 private void openNorms(Directory cfsDir) throws IOException { 515 for (int i = 0; i < fieldInfos.size(); i++) { 516 FieldInfo fi = fieldInfos.fieldInfo(i); 517 if (fi.isIndexed && !fi.omitNorms) { 518 String fileName = segment + ".s" + fi.number; 520 Directory d = directory(); 521 if(!d.fileExists(fileName)){ 522 fileName = segment + ".f" + fi.number; 523 d = cfsDir; 524 } 525 norms.put(fi.name, new Norm(d.openInput(fileName), fi.number)); 526 } 527 } 528 } 529 530 private void closeNorms() throws IOException { 531 synchronized (norms) { 532 Enumeration enumerator = norms.elements(); 533 while (enumerator.hasMoreElements()) { 534 Norm norm = (Norm) enumerator.nextElement(); 535 norm.in.close(); 536 } 537 } 538 } 539 540 544 private TermVectorsReader getTermVectorsReader() { 545 TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get(); 546 if (tvReader == null) { 547 tvReader = (TermVectorsReader)termVectorsReaderOrig.clone(); 548 termVectorsLocal.set(tvReader); 549 } 550 return tvReader; 551 } 552 553 559 public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { 560 FieldInfo fi = fieldInfos.fieldInfo(field); 562 if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) 563 return null; 564 565 TermVectorsReader termVectorsReader = getTermVectorsReader(); 566 if (termVectorsReader == null) 567 return null; 568 569 return termVectorsReader.get(docNumber, field); 570 } 571 572 573 580 public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { 581 if (termVectorsReaderOrig == null) 582 return null; 583 584 TermVectorsReader termVectorsReader = getTermVectorsReader(); 585 if (termVectorsReader == null) 586 return null; 587 588 return termVectorsReader.get(docNumber); 589 } 590 } 591 | Popular Tags |