1 package org.apache.lucene.index; 2 3 18 19 import java.io.IOException ; 20 import java.io.File ; 21 import java.io.PrintStream ; 22 import java.util.Vector ; 23 24 import org.apache.lucene.store.Directory; 25 import org.apache.lucene.store.RAMDirectory; 26 import org.apache.lucene.store.FSDirectory; 27 import org.apache.lucene.store.Lock; 28 import org.apache.lucene.store.IndexInput; 29 import org.apache.lucene.store.IndexOutput; 30 import org.apache.lucene.search.Similarity; 31 import org.apache.lucene.document.Document; 32 import org.apache.lucene.analysis.Analyzer; 33 34 35 58 59 public class IndexWriter { 60 61 64 public final static long WRITE_LOCK_TIMEOUT = 1000; 65 66 69 public final static long COMMIT_LOCK_TIMEOUT = 10000; 70 71 public static final String WRITE_LOCK_NAME = "write.lock"; 72 public static final String COMMIT_LOCK_NAME = "commit.lock"; 73 74 77 public final static int DEFAULT_MERGE_FACTOR = 10; 78 79 82 public final static int DEFAULT_MAX_BUFFERED_DOCS = 10; 83 84 87 public final static int DEFAULT_MIN_MERGE_DOCS = DEFAULT_MAX_BUFFERED_DOCS; 88 89 92 public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; 93 94 97 public final static int DEFAULT_MAX_FIELD_LENGTH = 10000; 98 99 102 public final static int DEFAULT_TERM_INDEX_INTERVAL = 128; 103 104 private Directory directory; private Analyzer analyzer; 107 private Similarity similarity = Similarity.getDefault(); 109 private SegmentInfos segmentInfos = new SegmentInfos(); private final Directory ramDirectory = new RAMDirectory(); 112 private Lock writeLock; 113 114 private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; 115 116 120 private boolean useCompoundFile = true; 121 122 private boolean closeDir; 123 124 129 public boolean getUseCompoundFile() { 130 return useCompoundFile; 131 } 132 133 137 public void setUseCompoundFile(boolean value) { 138 useCompoundFile = value; 139 } 140 141 145 public void setSimilarity(Similarity similarity) { 146 this.similarity = similarity; 147 } 148 149 153 public Similarity getSimilarity() { 154 return this.similarity; 155 } 156 157 178 public void setTermIndexInterval(int interval) { 179 this.termIndexInterval = interval; 180 } 181 182 186 public int getTermIndexInterval() { return termIndexInterval; } 187 188 203 public IndexWriter(String path, Analyzer a, boolean create) 204 throws IOException { 205 this(FSDirectory.getDirectory(path, create), a, create, true); 206 } 207 208 223 public IndexWriter(File path, Analyzer a, boolean create) 224 throws IOException { 225 this(FSDirectory.getDirectory(path, create), a, create, true); 226 } 227 228 243 public IndexWriter(Directory d, Analyzer a, boolean create) 244 throws IOException { 245 this(d, a, create, false); 246 } 247 248 private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir) 249 throws IOException { 250 this.closeDir = closeDir; 251 directory = d; 252 analyzer = a; 253 254 Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME); 255 if (!writeLock.obtain(WRITE_LOCK_TIMEOUT)) throw new IOException ("Index locked for write: " + writeLock); 257 this.writeLock = writeLock; 259 synchronized (directory) { new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) { 261 public Object doBody() throws IOException { 262 if (create) 263 segmentInfos.write(directory); 264 else 265 segmentInfos.read(directory); 266 return null; 267 } 268 }.run(); 269 } 270 } 271 272 279 public void setMaxMergeDocs(int maxMergeDocs) { 280 this.maxMergeDocs = maxMergeDocs; 281 } 282 283 286 public int getMaxMergeDocs() { 287 return maxMergeDocs; 288 } 289 290 302 public void setMaxFieldLength(int maxFieldLength) { 303 this.maxFieldLength = maxFieldLength; 304 } 305 306 309 public int getMaxFieldLength() { 310 return maxFieldLength; 311 } 312 313 323 public void setMaxBufferedDocs(int maxBufferedDocs) { 324 if (maxBufferedDocs < 2) 325 throw new IllegalArgumentException ("maxBufferedDocs must at least be 2"); 326 this.minMergeDocs = maxBufferedDocs; 327 } 328 329 332 public int getMaxBufferedDocs() { 333 return minMergeDocs; 334 } 335 336 346 public void setMergeFactor(int mergeFactor) { 347 if (mergeFactor < 2) 348 throw new IllegalArgumentException ("mergeFactor cannot be less than 2"); 349 this.mergeFactor = mergeFactor; 350 } 351 352 355 public int getMergeFactor() { 356 return mergeFactor; 357 } 358 359 362 public void setInfoStream(PrintStream infoStream) { 363 this.infoStream = infoStream; 364 } 365 366 369 public PrintStream getInfoStream() { 370 return infoStream; 371 } 372 373 374 public synchronized void close() throws IOException { 375 flushRamSegments(); 376 ramDirectory.close(); 377 if (writeLock != null) { 378 writeLock.release(); writeLock = null; 380 } 381 if(closeDir) 382 directory.close(); 383 } 384 385 386 protected void finalize() throws IOException { 387 if (writeLock != null) { 388 writeLock.release(); writeLock = null; 390 } 391 } 392 393 394 public Directory getDirectory() { 395 return directory; 396 } 397 398 399 public Analyzer getAnalyzer() { 400 return analyzer; 401 } 402 403 404 405 public synchronized int docCount() { 406 int count = 0; 407 for (int i = 0; i < segmentInfos.size(); i++) { 408 SegmentInfo si = segmentInfos.info(i); 409 count += si.docCount; 410 } 411 return count; 412 } 413 414 428 public int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH; 429 430 435 public void addDocument(Document doc) throws IOException { 436 addDocument(doc, analyzer); 437 } 438 439 445 public void addDocument(Document doc, Analyzer analyzer) throws IOException { 446 DocumentWriter dw = 447 new DocumentWriter(ramDirectory, analyzer, this); 448 dw.setInfoStream(infoStream); 449 String segmentName = newSegmentName(); 450 dw.addDocument(segmentName, doc); 451 synchronized (this) { 452 segmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory)); 453 maybeMergeSegments(); 454 } 455 } 456 457 final int getSegmentsCounter(){ 458 return segmentInfos.counter; 459 } 460 461 private final synchronized String newSegmentName() { 462 return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); 463 } 464 465 476 public int mergeFactor = DEFAULT_MERGE_FACTOR; 477 478 487 public int minMergeDocs = DEFAULT_MIN_MERGE_DOCS; 488 489 490 498 public int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; 499 500 503 public PrintStream infoStream = null; 504 505 507 public synchronized void optimize() throws IOException { 508 flushRamSegments(); 509 while (segmentInfos.size() > 1 || 510 (segmentInfos.size() == 1 && 511 (SegmentReader.hasDeletions(segmentInfos.info(0)) || 512 segmentInfos.info(0).dir != directory || 513 (useCompoundFile && 514 (!SegmentReader.usesCompoundFile(segmentInfos.info(0)) || 515 SegmentReader.hasSeparateNorms(segmentInfos.info(0))))))) { 516 int minSegment = segmentInfos.size() - mergeFactor; 517 mergeSegments(minSegment < 0 ? 0 : minSegment); 518 } 519 } 520 521 530 public synchronized void addIndexes(Directory[] dirs) 531 throws IOException { 532 optimize(); 534 int start = segmentInfos.size(); 535 536 for (int i = 0; i < dirs.length; i++) { 537 SegmentInfos sis = new SegmentInfos(); sis.read(dirs[i]); 539 for (int j = 0; j < sis.size(); j++) { 540 segmentInfos.addElement(sis.info(j)); } 542 } 543 544 while (segmentInfos.size() > start+mergeFactor) { 546 for (int base = start+1; base < segmentInfos.size(); base++) { 547 int end = Math.min(segmentInfos.size(), base+mergeFactor); 548 if (end-base > 1) 549 mergeSegments(base, end); 550 } 551 } 552 553 optimize(); } 555 556 560 public synchronized void addIndexes(IndexReader[] readers) 561 throws IOException { 562 563 optimize(); 565 final String mergedName = newSegmentName(); 566 SegmentMerger merger = new SegmentMerger(this, mergedName); 567 568 final Vector segmentsToDelete = new Vector (); 569 IndexReader sReader = null; 570 if (segmentInfos.size() == 1){ sReader = SegmentReader.get(segmentInfos.info(0)); 572 merger.add(sReader); 573 segmentsToDelete.addElement(sReader); } 575 576 for (int i = 0; i < readers.length; i++) merger.add(readers[i]); 578 579 int docCount = merger.merge(); 581 segmentInfos.setSize(0); segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory)); 583 584 if(sReader != null) 585 sReader.close(); 586 587 synchronized (directory) { new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) { 589 public Object doBody() throws IOException { 590 segmentInfos.write(directory); deleteSegments(segmentsToDelete); return null; 593 } 594 }.run(); 595 } 596 597 if (useCompoundFile) { 598 final Vector filesToDelete = merger.createCompoundFile(mergedName + ".tmp"); 599 synchronized (directory) { new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) { 601 public Object doBody() throws IOException { 602 directory.renameFile(mergedName + ".tmp", mergedName + ".cfs"); 604 deleteFiles(filesToDelete); 606 return null; 607 } 608 }.run(); 609 } 610 } 611 } 612 613 614 private final void flushRamSegments() throws IOException { 615 int minSegment = segmentInfos.size()-1; 616 int docCount = 0; 617 while (minSegment >= 0 && 618 (segmentInfos.info(minSegment)).dir == ramDirectory) { 619 docCount += segmentInfos.info(minSegment).docCount; 620 minSegment--; 621 } 622 if (minSegment < 0 || (docCount + segmentInfos.info(minSegment).docCount) > mergeFactor || 624 !(segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory)) 625 minSegment++; 626 if (minSegment >= segmentInfos.size()) 627 return; mergeSegments(minSegment); 629 } 630 631 632 private final void maybeMergeSegments() throws IOException { 633 long targetMergeDocs = minMergeDocs; 634 while (targetMergeDocs <= maxMergeDocs) { 635 int minSegment = segmentInfos.size(); 637 int mergeDocs = 0; 638 while (--minSegment >= 0) { 639 SegmentInfo si = segmentInfos.info(minSegment); 640 if (si.docCount >= targetMergeDocs) 641 break; 642 mergeDocs += si.docCount; 643 } 644 645 if (mergeDocs >= targetMergeDocs) mergeSegments(minSegment+1); 647 else 648 break; 649 650 targetMergeDocs *= mergeFactor; } 652 } 653 654 656 private final void mergeSegments(int minSegment) 657 throws IOException { 658 mergeSegments(minSegment, segmentInfos.size()); 659 } 660 661 663 private final void mergeSegments(int minSegment, int end) 664 throws IOException { 665 final String mergedName = newSegmentName(); 666 if (infoStream != null) infoStream.print("merging segments"); 667 SegmentMerger merger = new SegmentMerger(this, mergedName); 668 669 final Vector segmentsToDelete = new Vector (); 670 for (int i = minSegment; i < end; i++) { 671 SegmentInfo si = segmentInfos.info(i); 672 if (infoStream != null) 673 infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); 674 IndexReader reader = SegmentReader.get(si); 675 merger.add(reader); 676 if ((reader.directory() == this.directory) || (reader.directory() == this.ramDirectory)) 678 segmentsToDelete.addElement(reader); } 680 681 int mergedDocCount = merger.merge(); 682 683 if (infoStream != null) { 684 infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); 685 } 686 687 for (int i = end-1; i >= minSegment; i--) segmentInfos.remove(i); 689 segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount, 690 directory)); 691 692 merger.closeReaders(); 694 695 synchronized (directory) { new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) { 697 public Object doBody() throws IOException { 698 segmentInfos.write(directory); deleteSegments(segmentsToDelete); return null; 701 } 702 }.run(); 703 } 704 705 if (useCompoundFile) { 706 final Vector filesToDelete = merger.createCompoundFile(mergedName + ".tmp"); 707 synchronized (directory) { new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) { 709 public Object doBody() throws IOException { 710 directory.renameFile(mergedName + ".tmp", mergedName + ".cfs"); 712 deleteFiles(filesToDelete); 714 return null; 715 } 716 }.run(); 717 } 718 } 719 } 720 721 727 728 private final void deleteSegments(Vector segments) throws IOException { 729 Vector deletable = new Vector (); 730 731 deleteFiles(readDeleteableFiles(), deletable); 733 for (int i = 0; i < segments.size(); i++) { 734 SegmentReader reader = (SegmentReader)segments.elementAt(i); 735 if (reader.directory() == this.directory) 736 deleteFiles(reader.files(), deletable); else 738 deleteFiles(reader.files(), reader.directory()); } 740 741 writeDeleteableFiles(deletable); } 743 744 private final void deleteFiles(Vector files) throws IOException { 745 Vector deletable = new Vector (); 746 deleteFiles(readDeleteableFiles(), deletable); deleteFiles(files, deletable); writeDeleteableFiles(deletable); } 750 751 private final void deleteFiles(Vector files, Directory directory) 752 throws IOException { 753 for (int i = 0; i < files.size(); i++) 754 directory.deleteFile((String )files.elementAt(i)); 755 } 756 757 private final void deleteFiles(Vector files, Vector deletable) 758 throws IOException { 759 for (int i = 0; i < files.size(); i++) { 760 String file = (String )files.elementAt(i); 761 try { 762 directory.deleteFile(file); } catch (IOException e) { if (directory.fileExists(file)) { 765 if (infoStream != null) 766 infoStream.println(e.toString() + "; Will re-try later."); 767 deletable.addElement(file); } 769 } 770 } 771 } 772 773 private final Vector readDeleteableFiles() throws IOException { 774 Vector result = new Vector (); 775 if (!directory.fileExists(IndexFileNames.DELETABLE)) 776 return result; 777 778 IndexInput input = directory.openInput(IndexFileNames.DELETABLE); 779 try { 780 for (int i = input.readInt(); i > 0; i--) result.addElement(input.readString()); 782 } finally { 783 input.close(); 784 } 785 return result; 786 } 787 788 private final void writeDeleteableFiles(Vector files) throws IOException { 789 IndexOutput output = directory.createOutput("deleteable.new"); 790 try { 791 output.writeInt(files.size()); 792 for (int i = 0; i < files.size(); i++) 793 output.writeString((String )files.elementAt(i)); 794 } finally { 795 output.close(); 796 } 797 directory.renameFile("deleteable.new", IndexFileNames.DELETABLE); 798 } 799 } 800 | Popular Tags |