1 11 package org.eclipse.jdt.internal.core.index; 12 13 import java.io.*; 14 15 import org.eclipse.jdt.core.compiler.CharOperation; 16 import org.eclipse.jdt.core.search.*; 17 import org.eclipse.jdt.internal.core.util.*; 18 import org.eclipse.jdt.internal.compiler.util.HashtableOfIntValues; 19 import org.eclipse.jdt.internal.compiler.util.HashtableOfObject; 20 import org.eclipse.jdt.internal.compiler.util.SimpleLookupTable; 21 import org.eclipse.jdt.internal.compiler.util.SimpleSet; 22 import org.eclipse.jdt.internal.compiler.util.SimpleSetOfCharArray; 23 24 public class DiskIndex { 25 26 File indexFile; 27 28 private int headerInfoOffset; 29 private int numberOfChunks; 30 private int sizeOfLastChunk; 31 private int[] chunkOffsets; 32 private int documentReferenceSize; private int startOfCategoryTables; 34 private HashtableOfIntValues categoryOffsets, categoryEnds; 35 36 private int cacheUserCount; 37 private String [][] cachedChunks; private HashtableOfObject categoryTables; private char[] cachedCategoryName; 40 41 private static final int DEFAULT_BUFFER_SIZE = 2048; 42 private static int BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE; 43 private static final int BUFFER_WRITE_SIZE = DEFAULT_BUFFER_SIZE; 44 private byte[] streamBuffer; 45 private int bufferIndex, bufferEnd; private int streamEnd; 48 public static final String SIGNATURE= "INDEX VERSION 1.121"; private static final char[] SIGNATURE_CHARS = SIGNATURE.toCharArray(); 50 public static boolean DEBUG = false; 51 52 private static final int RE_INDEXED = -1; 53 private static final int DELETED = -2; 54 55 private static final int CHUNK_SIZE = 100; 56 57 private static final SimpleSetOfCharArray INTERNED_CATEGORY_NAMES = new SimpleSetOfCharArray(20); 58 59 static class IntList { 60 61 int size; 62 int[] elements; 63 64 IntList(int[] elements) { 65 this.elements = elements; 66 this.size = elements.length; 67 } 68 void add(int newElement) { 69 if (this.size == this.elements.length) { 70 int newSize = this.size * 3; 71 if (newSize < 7) newSize = 7; 72 System.arraycopy(this.elements, 0, this.elements = new int[newSize], 0, this.size); 73 } 74 this.elements[this.size++] = newElement; 75 } 76 int[] asArray() { 77 int[] result = new int[this.size]; 78 System.arraycopy(this.elements, 0, result, 0, this.size); 79 return result; 80 } 81 } 82 83 84 DiskIndex(String fileName) { 85 if (fileName == null) 86 throw new java.lang.IllegalArgumentException (); 87 this.indexFile = new File(fileName); 88 89 this.headerInfoOffset = -1; 91 this.numberOfChunks = -1; 92 this.sizeOfLastChunk = -1; 93 this.chunkOffsets = null; 94 this.documentReferenceSize = -1; 95 this.cacheUserCount = -1; 96 this.cachedChunks = null; 97 this.categoryTables = null; 98 this.cachedCategoryName = null; 99 this.categoryOffsets = null; 100 this.categoryEnds = null; 101 } 102 SimpleSet addDocumentNames(String substring, MemoryIndex memoryIndex) throws IOException { 103 String [] docNames = readAllDocumentNames(); 105 SimpleSet results = new SimpleSet(docNames.length); 106 if (substring == null) { 107 if (memoryIndex == null) { 108 for (int i = 0, l = docNames.length; i < l; i++) 109 results.add(docNames[i]); 110 } else { 111 SimpleLookupTable docsToRefs = memoryIndex.docsToReferences; 112 for (int i = 0, l = docNames.length; i < l; i++) { 113 String docName = docNames[i]; 114 if (!docsToRefs.containsKey(docName)) 115 results.add(docName); 116 } 117 } 118 } else { 119 if (memoryIndex == null) { 120 for (int i = 0, l = docNames.length; i < l; i++) 121 if (docNames[i].startsWith(substring, 0)) 122 results.add(docNames[i]); 123 } else { 124 SimpleLookupTable docsToRefs = memoryIndex.docsToReferences; 125 for (int i = 0, l = docNames.length; i < l; i++) { 126 String docName = docNames[i]; 127 if (docName.startsWith(substring, 0) && !docsToRefs.containsKey(docName)) 128 results.add(docName); 129 } 130 } 131 } 132 return results; 133 } 134 private HashtableOfObject addQueryResult(HashtableOfObject results, char[] word, HashtableOfObject wordsToDocNumbers, MemoryIndex memoryIndex) throws IOException { 135 if (results == null) 137 results = new HashtableOfObject(13); 138 EntryResult result = (EntryResult) results.get(word); 139 if (memoryIndex == null) { 140 if (result == null) 141 results.put(word, new EntryResult(word, wordsToDocNumbers)); 142 else 143 result.addDocumentTable(wordsToDocNumbers); 144 } else { 145 SimpleLookupTable docsToRefs = memoryIndex.docsToReferences; 146 if (result == null) result = new EntryResult(word, null); 147 int[] docNumbers = readDocumentNumbers(wordsToDocNumbers.get(word)); 148 for (int i = 0, l = docNumbers.length; i < l; i++) { 149 String docName = readDocumentName(docNumbers[i]); 150 if (!docsToRefs.containsKey(docName)) 151 result.addDocumentName(docName); 152 } 153 if (!result.isEmpty()) 154 results.put(word, result); 155 } 156 return results; 157 } 158 HashtableOfObject addQueryResults(char[][] categories, char[] key, int matchRule, MemoryIndex memoryIndex) throws IOException { 159 if (this.categoryOffsets == null) return null; 162 HashtableOfObject results = null; if (key == null) { 164 for (int i = 0, l = categories.length; i < l; i++) { 165 HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], true); if (wordsToDocNumbers != null) { 167 char[][] words = wordsToDocNumbers.keyTable; 168 if (results == null) 169 results = new HashtableOfObject(wordsToDocNumbers.elementSize); 170 for (int j = 0, m = words.length; j < m; j++) 171 if (words[j] != null) 172 results = addQueryResult(results, words[j], wordsToDocNumbers, memoryIndex); 173 } 174 } 175 if (results != null && this.cachedChunks == null) 176 cacheDocumentNames(); 177 } else { 178 switch (matchRule) { 179 case SearchPattern.R_EXACT_MATCH | SearchPattern.R_CASE_SENSITIVE: 180 for (int i = 0, l = categories.length; i < l; i++) { 181 HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false); 182 if (wordsToDocNumbers != null && wordsToDocNumbers.containsKey(key)) 183 results = addQueryResult(results, key, wordsToDocNumbers, memoryIndex); 184 } 185 break; 186 case SearchPattern.R_PREFIX_MATCH | SearchPattern.R_CASE_SENSITIVE: 187 for (int i = 0, l = categories.length; i < l; i++) { 188 HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false); 189 if (wordsToDocNumbers != null) { 190 char[][] words = wordsToDocNumbers.keyTable; 191 for (int j = 0, m = words.length; j < m; j++) { 192 char[] word = words[j]; 193 if (word != null && key[0] == word[0] && CharOperation.prefixEquals(key, word)) 194 results = addQueryResult(results, word, wordsToDocNumbers, memoryIndex); 195 } 196 } 197 } 198 break; 199 default: 200 for (int i = 0, l = categories.length; i < l; i++) { 201 HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false); 202 if (wordsToDocNumbers != null) { 203 char[][] words = wordsToDocNumbers.keyTable; 204 for (int j = 0, m = words.length; j < m; j++) { 205 char[] word = words[j]; 206 if (word != null && Index.isMatch(key, word, matchRule)) 207 results = addQueryResult(results, word, wordsToDocNumbers, memoryIndex); 208 } 209 } 210 } 211 } 212 } 213 214 if (results == null) return null; 215 return results; 216 } 217 private void cacheDocumentNames() throws IOException { 218 this.cachedChunks = new String [this.numberOfChunks][]; 220 FileInputStream stream = new FileInputStream(this.indexFile); 221 try { 222 if (this.numberOfChunks > 5) BUFFER_READ_SIZE <<= 1; 223 int offset = this.chunkOffsets[0]; 224 stream.skip(offset); 225 this.streamBuffer = new byte[BUFFER_READ_SIZE]; 226 this.bufferIndex = 0; 227 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); 228 for (int i = 0; i < this.numberOfChunks; i++) { 229 int size = i == this.numberOfChunks - 1 ? this.sizeOfLastChunk : CHUNK_SIZE; 230 readChunk(this.cachedChunks[i] = new String [size], stream, 0, size); 231 } 232 } catch (IOException e) { 233 this.cachedChunks = null; 234 throw e; 235 } finally { 236 stream.close(); 237 this.streamBuffer = null; 238 BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE; 239 } 240 } 241 private String [] computeDocumentNames(String [] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex) { 242 int onDiskLength = onDiskNames.length; 243 Object [] docNames = memoryIndex.docsToReferences.keyTable; 244 Object [] referenceTables = memoryIndex.docsToReferences.valueTable; 245 if (onDiskLength == 0) { 246 for (int i = 0, l = referenceTables.length; i < l; i++) 248 if (referenceTables[i] != null) 249 indexedDocuments.put(docNames[i], null); 251 String [] newDocNames = new String [indexedDocuments.elementSize]; 252 int count = 0; 253 Object [] added = indexedDocuments.keyTable; 254 for (int i = 0, l = added.length; i < l; i++) 255 if (added[i] != null) 256 newDocNames[count++] = (String ) added[i]; 257 Util.sort(newDocNames); 258 for (int i = 0, l = newDocNames.length; i < l; i++) 259 indexedDocuments.put(newDocNames[i], new Integer (i)); 260 return newDocNames; 261 } 262 263 for (int i = 0; i < onDiskLength; i++) 265 positions[i] = i; 266 267 int numDeletedDocNames = 0; 269 int numReindexedDocNames = 0; 270 nextPath : for (int i = 0, l = docNames.length; i < l; i++) { 271 String docName = (String ) docNames[i]; 272 if (docName != null) { 273 for (int j = 0; j < onDiskLength; j++) { 274 if (docName.equals(onDiskNames[j])) { 275 if (referenceTables[i] == null) { 276 positions[j] = DELETED; 277 numDeletedDocNames++; 278 } else { 279 positions[j] = RE_INDEXED; 280 numReindexedDocNames++; 281 } 282 continue nextPath; 283 } 284 } 285 if (referenceTables[i] != null) 286 indexedDocuments.put(docName, null); } 288 } 289 290 String [] newDocNames = onDiskNames; 291 if (numDeletedDocNames > 0 || indexedDocuments.elementSize > 0) { 292 newDocNames = new String [onDiskLength + indexedDocuments.elementSize - numDeletedDocNames]; 294 int count = 0; 295 for (int i = 0; i < onDiskLength; i++) 296 if (positions[i] >= RE_INDEXED) 297 newDocNames[count++] = onDiskNames[i]; Object [] added = indexedDocuments.keyTable; 299 for (int i = 0, l = added.length; i < l; i++) 300 if (added[i] != null) 301 newDocNames[count++] = (String ) added[i]; Util.sort(newDocNames); 303 for (int i = 0, l = newDocNames.length; i < l; i++) 304 if (indexedDocuments.containsKey(newDocNames[i])) 305 indexedDocuments.put(newDocNames[i], new Integer (i)); } 307 308 int count = -1; 312 for (int i = 0; i < onDiskLength;) { 313 switch(positions[i]) { 314 case DELETED : 315 i++; break; 317 case RE_INDEXED : 318 String newName = newDocNames[++count]; 319 if (newName.equals(onDiskNames[i])) { 320 indexedDocuments.put(newName, new Integer (count)); i++; 322 } 323 break; 324 default : 325 if (newDocNames[++count].equals(onDiskNames[i])) 326 positions[i++] = count; } 328 } 329 return newDocNames; 330 } 331 private void copyQueryResults(HashtableOfObject categoryToWords, int newPosition) { 332 char[][] categoryNames = categoryToWords.keyTable; 333 Object [] wordSets = categoryToWords.valueTable; 334 for (int i = 0, l = categoryNames.length; i < l; i++) { 335 char[] categoryName = categoryNames[i]; 336 if (categoryName != null) { 337 SimpleWordSet wordSet = (SimpleWordSet) wordSets[i]; 338 HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName); 339 if (wordsToDocs == null) 340 this.categoryTables.put(categoryName, wordsToDocs = new HashtableOfObject(wordSet.elementSize)); 341 342 char[][] words = wordSet.words; 343 for (int j = 0, m = words.length; j < m; j++) { 344 char[] word = words[j]; 345 if (word != null) { 346 Object o = wordsToDocs.get(word); 347 if (o == null) { 348 wordsToDocs.put(word, new int[] {newPosition}); 349 } else if (o instanceof IntList) { 350 ((IntList) o).add(newPosition); 351 } else { 352 IntList list = new IntList((int[]) o); 353 list.add(newPosition); 354 wordsToDocs.put(word, list); 355 } 356 } 357 } 358 } 359 } 360 } 361 void initialize(boolean reuseExistingFile) throws IOException { 362 if (this.indexFile.exists()) { 363 if (reuseExistingFile) { 364 RandomAccessFile file = new RandomAccessFile(this.indexFile, "r"); try { 366 String signature = file.readUTF(); 367 if (!signature.equals(SIGNATURE)) 368 throw new IOException(Messages.exception_wrongFormat); 369 370 this.headerInfoOffset = file.readInt(); 371 if (this.headerInfoOffset > 0) readHeaderInfo(file); 373 } finally { 374 file.close(); 375 } 376 return; 377 } 378 if (!this.indexFile.delete()) { 379 if (DEBUG) 380 System.out.println("initialize - Failed to delete index " + this.indexFile); throw new IOException("Failed to delete index " + this.indexFile); } 383 } 384 if (this.indexFile.createNewFile()) { 385 RandomAccessFile file = new RandomAccessFile(this.indexFile, "rw"); try { 387 file.writeUTF(SIGNATURE); 388 file.writeInt(-1); } finally { 390 file.close(); 391 } 392 } else { 393 if (DEBUG) 394 System.out.println("initialize - Failed to create new index " + this.indexFile); throw new IOException("Failed to create new index " + this.indexFile); } 397 } 398 private void initializeFrom(DiskIndex diskIndex, File newIndexFile) throws IOException { 399 if (newIndexFile.exists() && !newIndexFile.delete()) { if (DEBUG) 401 System.out.println("initializeFrom - Failed to delete temp index " + this.indexFile); } else if (!newIndexFile.createNewFile()) { 403 if (DEBUG) 404 System.out.println("initializeFrom - Failed to create temp index " + this.indexFile); throw new IOException("Failed to create temp index " + this.indexFile); } 407 408 int size = diskIndex.categoryOffsets == null ? 8 : diskIndex.categoryOffsets.elementSize; 409 this.categoryOffsets = new HashtableOfIntValues(size); 410 this.categoryEnds = new HashtableOfIntValues(size); 411 this.categoryTables = new HashtableOfObject(size); 412 } 413 private void mergeCategories(DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException { 414 char[][] oldNames = onDisk.categoryOffsets.keyTable; 416 for (int i = 0, l = oldNames.length; i < l; i++) { 417 char[] oldName = oldNames[i]; 418 if (oldName != null && !this.categoryTables.containsKey(oldName)) 419 this.categoryTables.put(oldName, null); 420 } 421 422 char[][] categoryNames = this.categoryTables.keyTable; 423 for (int i = 0, l = categoryNames.length; i < l; i++) 424 if (categoryNames[i] != null) 425 mergeCategory(categoryNames[i], onDisk, positions, stream); 426 this.categoryTables = null; 427 } 428 private void mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException { 429 HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName); 430 if (wordsToDocs == null) 431 wordsToDocs = new HashtableOfObject(3); 432 433 HashtableOfObject oldWordsToDocs = onDisk.readCategoryTable(categoryName, true); 434 if (oldWordsToDocs != null) { 435 char[][] oldWords = oldWordsToDocs.keyTable; 436 Object [] oldArrayOffsets = oldWordsToDocs.valueTable; 437 nextWord: for (int i = 0, l = oldWords.length; i < l; i++) { 438 char[] oldWord = oldWords[i]; 439 if (oldWord != null) { 440 int[] oldDocNumbers = (int[]) oldArrayOffsets[i]; 441 int length = oldDocNumbers.length; 442 int[] mappedNumbers = new int[length]; 443 int count = 0; 444 for (int j = 0; j < length; j++) { 445 int pos = positions[oldDocNumbers[j]]; 446 if (pos > RE_INDEXED) mappedNumbers[count++] = pos; 448 } 449 if (count < length) { 450 if (count == 0) continue nextWord; System.arraycopy(mappedNumbers, 0, mappedNumbers = new int[count], 0, count); 452 } 453 454 Object o = wordsToDocs.get(oldWord); 455 if (o == null) { 456 wordsToDocs.put(oldWord, mappedNumbers); 457 } else { 458 IntList list = null; 459 if (o instanceof IntList) { 460 list = (IntList) o; 461 } else { 462 list = new IntList((int[]) o); 463 wordsToDocs.put(oldWord, list); 464 } 465 for (int j = 0; j < count; j++) 466 list.add(mappedNumbers[j]); 467 } 468 } 469 } 470 onDisk.categoryTables.put(categoryName, null); } 472 writeCategoryTable(categoryName, wordsToDocs, stream); 473 } 474 DiskIndex mergeWith(MemoryIndex memoryIndex) throws IOException { 475 String [] docNames = readAllDocumentNames(); 478 int previousLength = docNames.length; 479 int[] positions = new int[previousLength]; SimpleLookupTable indexedDocuments = new SimpleLookupTable(3); docNames = computeDocumentNames(docNames, positions, indexedDocuments, memoryIndex); 482 if (docNames.length == 0) { 483 if (previousLength == 0) return this; 485 DiskIndex newDiskIndex = new DiskIndex(this.indexFile.getPath()); 487 newDiskIndex.initialize(false); 488 return newDiskIndex; 489 } 490 491 DiskIndex newDiskIndex = new DiskIndex(this.indexFile.getPath() + ".tmp"); try { 493 newDiskIndex.initializeFrom(this, newDiskIndex.indexFile); 494 FileOutputStream stream = new FileOutputStream(newDiskIndex.indexFile, false); 495 int offsetToHeader = -1; 496 try { 497 newDiskIndex.writeAllDocumentNames(docNames, stream); 498 docNames = null; 500 if (indexedDocuments.elementSize > 0) { 502 Object [] names = indexedDocuments.keyTable; 503 Object [] integerPositions = indexedDocuments.valueTable; 504 for (int i = 0, l = names.length; i < l; i++) 505 if (names[i] != null) 506 newDiskIndex.copyQueryResults( 507 (HashtableOfObject) memoryIndex.docsToReferences.get(names[i]), ((Integer ) integerPositions[i]).intValue()); 508 } 509 indexedDocuments = null; 511 if (previousLength == 0) 513 newDiskIndex.writeCategories(stream); 514 else 515 newDiskIndex.mergeCategories(this, positions, stream); 516 offsetToHeader = newDiskIndex.streamEnd; 517 newDiskIndex.writeHeaderInfo(stream); 518 positions = null; } finally { 520 stream.close(); 521 this.streamBuffer = null; 522 } 523 newDiskIndex.writeOffsetToHeader(offsetToHeader); 524 525 if (this.indexFile.exists() && !this.indexFile.delete()) { 527 if (DEBUG) 528 System.out.println("mergeWith - Failed to delete " + this.indexFile); throw new IOException("Failed to delete index file " + this.indexFile); } 531 if (!newDiskIndex.indexFile.renameTo(this.indexFile)) { 532 if (DEBUG) 533 System.out.println("mergeWith - Failed to rename " + this.indexFile); throw new IOException("Failed to rename index file " + this.indexFile); } 536 } catch (IOException e) { 537 if (newDiskIndex.indexFile.exists() && !newDiskIndex.indexFile.delete()) 538 if (DEBUG) 539 System.out.println("mergeWith - Failed to delete temp index " + newDiskIndex.indexFile); throw e; 541 } 542 543 newDiskIndex.indexFile = this.indexFile; 544 return newDiskIndex; 545 } 546 private synchronized String [] readAllDocumentNames() throws IOException { 547 if (this.numberOfChunks <= 0) 548 return CharOperation.NO_STRINGS; 549 550 FileInputStream stream = new FileInputStream(this.indexFile); 551 try { 552 int offset = this.chunkOffsets[0]; 553 stream.skip(offset); 554 this.streamBuffer = new byte[BUFFER_READ_SIZE]; 555 this.bufferIndex = 0; 556 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); 557 int lastIndex = this.numberOfChunks - 1; 558 String [] docNames = new String [lastIndex * CHUNK_SIZE + sizeOfLastChunk]; 559 for (int i = 0; i < this.numberOfChunks; i++) 560 readChunk(docNames, stream, i * CHUNK_SIZE, i < lastIndex ? CHUNK_SIZE : sizeOfLastChunk); 561 return docNames; 562 } finally { 563 stream.close(); 564 this.streamBuffer = null; 565 } 566 } 567 private synchronized HashtableOfObject readCategoryTable(char[] categoryName, boolean readDocNumbers) throws IOException { 568 int offset = this.categoryOffsets.get(categoryName); 570 if (offset == HashtableOfIntValues.NO_VALUE) { 571 return null; 572 } 573 574 if (this.categoryTables == null) { 575 this.categoryTables = new HashtableOfObject(3); 576 } else { 577 HashtableOfObject cachedTable = (HashtableOfObject) this.categoryTables.get(categoryName); 578 if (cachedTable != null) { 579 if (readDocNumbers) { Object [] arrayOffsets = cachedTable.valueTable; 581 for (int i = 0, l = arrayOffsets.length; i < l; i++) 582 if (arrayOffsets[i] instanceof Integer ) 583 arrayOffsets[i] = readDocumentNumbers(arrayOffsets[i]); 584 } 585 return cachedTable; 586 } 587 } 588 589 FileInputStream stream = new FileInputStream(this.indexFile); 590 HashtableOfObject categoryTable = null; 591 char[][] matchingWords = null; 592 int count = 0; 593 int firstOffset = -1; 594 this.streamBuffer = new byte[BUFFER_READ_SIZE]; 595 try { 596 stream.skip(offset); 597 this.bufferIndex = 0; 598 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); 599 int size = readStreamInt(stream); 600 try { 601 if (size < 0) { System.err.println("-------------------- DEBUG --------------------"); System.err.println("file = "+this.indexFile); System.err.println("offset = "+offset); System.err.println("size = "+size); System.err.println("-------------------- END --------------------"); } 608 categoryTable = new HashtableOfObject(size); 609 } catch (OutOfMemoryError oom) { 610 oom.printStackTrace(); 612 System.err.println("-------------------- DEBUG --------------------"); System.err.println("file = "+this.indexFile); System.err.println("offset = "+offset); System.err.println("size = "+size); System.err.println("-------------------- END --------------------"); throw oom; 618 } 619 int largeArraySize = 256; 620 for (int i = 0; i < size; i++) { 621 char[] word = readStreamChars(stream); 622 int arrayOffset = readStreamInt(stream); 623 if (arrayOffset <= 0) { 628 categoryTable.put(word, new int[] {-arrayOffset}); } else if (arrayOffset < largeArraySize) { 630 categoryTable.put(word, readStreamDocumentArray(stream, arrayOffset)); } else { 632 arrayOffset = readStreamInt(stream); if (readDocNumbers) { 634 if (matchingWords == null) 635 matchingWords = new char[size][]; 636 if (count == 0) 637 firstOffset = arrayOffset; 638 matchingWords[count++] = word; 639 } 640 categoryTable.put(word, new Integer (arrayOffset)); } 642 } 643 this.categoryTables.put(INTERNED_CATEGORY_NAMES.get(categoryName), categoryTable); 644 this.cachedCategoryName = categoryTable.elementSize < 20000 ? categoryName : null; 647 } catch (IOException ioe) { 648 this.streamBuffer = null; 649 throw ioe; 650 } finally { 651 stream.close(); 652 } 653 654 if (matchingWords != null && count > 0) { 655 stream = new FileInputStream(this.indexFile); 656 try { 657 stream.skip(firstOffset); 658 this.bufferIndex = 0; 659 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); 660 for (int i = 0; i < count; i++) { categoryTable.put(matchingWords[i], readStreamDocumentArray(stream, readStreamInt(stream))); 662 } 663 } catch (IOException ioe) { 664 this.streamBuffer = null; 665 throw ioe; 666 } finally { 667 stream.close(); 668 } 669 } 670 this.streamBuffer = null; 671 return categoryTable; 672 } 673 private void readChunk(String [] docNames, FileInputStream stream, int index, int size) throws IOException { 674 String current = new String (readStreamChars(stream)); 675 docNames[index++] = current; 676 for (int i = 1; i < size; i++) { 677 if (stream != null && this.bufferIndex + 2 >= this.bufferEnd) 678 readStreamBuffer(stream); 679 int start = streamBuffer[this.bufferIndex++] & 0xFF; 680 int end = streamBuffer[this.bufferIndex++] & 0xFF; 681 String next = new String (readStreamChars(stream)); 682 if (start > 0) { 683 if (end > 0) { 684 int length = current.length(); 685 next = current.substring(0, start) + next + current.substring(length - end, length); 686 } else { 687 next = current.substring(0, start) + next; 688 } 689 } else if (end > 0) { 690 int length = current.length(); 691 next = next + current.substring(length - end, length); 692 } 693 docNames[index++] = next; 694 current = next; 695 } 696 } 697 synchronized String readDocumentName(int docNumber) throws IOException { 698 if (this.cachedChunks == null) 699 this.cachedChunks = new String [this.numberOfChunks][]; 700 701 int chunkNumber = docNumber / CHUNK_SIZE; 702 String [] chunk = this.cachedChunks[chunkNumber]; 703 if (chunk == null) { 704 boolean isLastChunk = chunkNumber == this.numberOfChunks - 1; 705 int start = this.chunkOffsets[chunkNumber]; 706 int numberOfBytes = (isLastChunk ? this.startOfCategoryTables : this.chunkOffsets[chunkNumber + 1]) - start; 707 if (numberOfBytes < 0) 708 throw new IllegalArgumentException (); 709 this.streamBuffer = new byte[numberOfBytes]; 710 this.bufferIndex = 0; 711 FileInputStream file = new FileInputStream(this.indexFile); 712 try { 713 file.skip(start); 714 if (file.read(this.streamBuffer, 0, numberOfBytes) != numberOfBytes) 715 throw new IOException(); 716 } catch (IOException ioe) { 717 this.streamBuffer = null; 718 throw ioe; 719 } finally { 720 file.close(); 721 } 722 int numberOfNames = isLastChunk ? this.sizeOfLastChunk : CHUNK_SIZE; 723 chunk = new String [numberOfNames]; 724 try { 725 readChunk(chunk, null, 0, numberOfNames); 726 } catch (IOException ioe) { 727 this.streamBuffer = null; 728 throw ioe; 729 } 730 this.cachedChunks[chunkNumber] = chunk; 731 } 732 this.streamBuffer = null; 733 return chunk[docNumber - (chunkNumber * CHUNK_SIZE)]; 734 } 735 synchronized int[] readDocumentNumbers(Object arrayOffset) throws IOException { 736 if (arrayOffset instanceof int[]) 738 return (int[]) arrayOffset; 739 740 FileInputStream stream = new FileInputStream(this.indexFile); 741 try { 742 int offset = ((Integer ) arrayOffset).intValue(); 743 stream.skip(offset); 744 this.streamBuffer = new byte[BUFFER_READ_SIZE]; 745 this.bufferIndex = 0; 746 this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length); 747 return readStreamDocumentArray(stream, readStreamInt(stream)); 748 } finally { 749 stream.close(); 750 this.streamBuffer = null; 751 } 752 } 753 private void readHeaderInfo(RandomAccessFile file) throws IOException { 754 file.seek(this.headerInfoOffset); 755 756 this.numberOfChunks = file.readInt(); 758 this.sizeOfLastChunk = file.readUnsignedByte(); 759 this.documentReferenceSize = file.readUnsignedByte(); 760 761 this.chunkOffsets = new int[this.numberOfChunks]; 762 for (int i = 0; i < this.numberOfChunks; i++) 763 this.chunkOffsets[i] = file.readInt(); 764 765 this.startOfCategoryTables = file.readInt(); 766 767 int size = file.readInt(); 768 this.categoryOffsets = new HashtableOfIntValues(size); 769 this.categoryEnds = new HashtableOfIntValues(size); 770 char[] previousCategory = null; 771 int offset = -1; 772 for (int i = 0; i < size; i++) { 773 char[] categoryName = INTERNED_CATEGORY_NAMES.get(file.readUTF().toCharArray()); 774 offset = file.readInt(); 775 this.categoryOffsets.put(categoryName, offset); if (previousCategory != null) { 777 this.categoryEnds.put(previousCategory, offset); } 779 previousCategory = categoryName; 780 } 781 if (previousCategory != null) { 782 this.categoryEnds.put(previousCategory, this.headerInfoOffset); } 784 this.categoryTables = new HashtableOfObject(3); 785 } 786 synchronized void startQuery() { 787 this.cacheUserCount++; 788 } 789 synchronized void stopQuery() { 790 if (--this.cacheUserCount < 0) { 791 this.cacheUserCount = -1; 793 this.cachedChunks = null; 794 if (this.categoryTables != null) { 795 if (this.cachedCategoryName == null) { 796 this.categoryTables = null; 797 } else if (this.categoryTables.elementSize > 1) { 798 HashtableOfObject newTables = new HashtableOfObject(3); 799 newTables.put(this.cachedCategoryName, this.categoryTables.get(this.cachedCategoryName)); 800 this.categoryTables = newTables; 801 } 802 } 803 } 804 } 805 private void readStreamBuffer(FileInputStream stream) throws IOException { 806 if (this.bufferEnd < this.streamBuffer.length) 809 return; 811 int bytesInBuffer = this.bufferEnd - this.bufferIndex; 812 if (bytesInBuffer > 0) 813 System.arraycopy(this.streamBuffer, this.bufferIndex, this.streamBuffer, 0, bytesInBuffer); 814 this.bufferEnd = bytesInBuffer + stream.read(this.streamBuffer, bytesInBuffer, this.bufferIndex); 815 this.bufferIndex = 0; 816 } 817 837 private char[] readStreamChars(FileInputStream stream) throws IOException { 838 if (stream != null && this.bufferIndex + 2 >= this.bufferEnd) 840 readStreamBuffer(stream); 841 int length = (streamBuffer[this.bufferIndex++] & 0xFF) << 8; 842 length += this.streamBuffer[this.bufferIndex++] & 0xFF; 843 844 char[] word = new char[length]; 846 int i = 0; 847 while (i < length) { 848 int charsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 3); 850 if (charsInBuffer > length || this.bufferEnd != this.streamBuffer.length || stream == null) 852 charsInBuffer = length; 853 while (i < charsInBuffer) { 854 byte b = this.streamBuffer[this.bufferIndex++]; 855 switch (b & 0xF0) { 856 case 0x00 : 857 case 0x10 : 858 case 0x20 : 859 case 0x30 : 860 case 0x40 : 861 case 0x50 : 862 case 0x60 : 863 case 0x70 : 864 word[i++]= (char) b; 865 break; 866 case 0xC0 : 867 case 0xD0 : 868 char next = (char) this.streamBuffer[this.bufferIndex++]; 869 if ((next & 0xC0) != 0x80) { 870 throw new UTFDataFormatException(); 871 } 872 char ch = (char) ((b & 0x1F) << 6); 873 ch |= next & 0x3F; 874 word[i++] = ch; 875 break; 876 case 0xE0 : 877 char first = (char) this.streamBuffer[this.bufferIndex++]; 878 char second = (char) this.streamBuffer[this.bufferIndex++]; 879 if ((first & second & 0xC0) != 0x80) { 880 throw new UTFDataFormatException(); 881 } 882 ch = (char) ((b & 0x0F) << 12); 883 ch |= ((first& 0x3F) << 6); 884 ch |= second & 0x3F; 885 word[i++] = ch; 886 break; 887 default: 888 throw new UTFDataFormatException(); 889 } 890 } 891 if (i < length && stream != null) 892 readStreamBuffer(stream); 893 } 894 return word; 895 } 896 private int[] readStreamDocumentArray(FileInputStream stream, int arraySize) throws IOException { 897 int[] indexes = new int[arraySize]; 898 if (arraySize == 0) return indexes; 899 900 int i = 0; 901 switch (this.documentReferenceSize) { 902 case 1 : 903 while (i < arraySize) { 904 int bytesInBuffer = i + this.bufferEnd - this.bufferIndex; 906 if (bytesInBuffer > arraySize) 907 bytesInBuffer = arraySize; 908 while (i < bytesInBuffer) { 909 indexes[i++] = this.streamBuffer[this.bufferIndex++] & 0xFF; 910 } 911 if (i < arraySize && stream != null) 912 readStreamBuffer(stream); 913 } 914 break; 915 case 2 : 916 while (i < arraySize) { 917 int shortsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 2); 919 if (shortsInBuffer > arraySize) 920 shortsInBuffer = arraySize; 921 while (i < shortsInBuffer) { 922 int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8; 923 indexes[i++] = val + (this.streamBuffer[this.bufferIndex++] & 0xFF); 924 } 925 if (i < arraySize && stream != null) 926 readStreamBuffer(stream); 927 } 928 break; 929 default : 930 while (i < arraySize) { 931 indexes[i++] = readStreamInt(stream); 932 } 933 break; 934 } 935 return indexes; 936 } 937 private int readStreamInt(FileInputStream stream) throws IOException { 938 if (this.bufferIndex + 4 >= this.bufferEnd) { 939 readStreamBuffer(stream); 940 } 941 int val = (streamBuffer[this.bufferIndex++] & 0xFF) << 24; 942 val += (streamBuffer[this.bufferIndex++] & 0xFF) << 16; 943 val += (streamBuffer[this.bufferIndex++] & 0xFF) << 8; 944 return val + (streamBuffer[this.bufferIndex++] & 0xFF); 945 } 946 private void writeAllDocumentNames(String [] sortedDocNames, FileOutputStream stream) throws IOException { 947 if (sortedDocNames.length == 0) 948 throw new IllegalArgumentException (); 949 950 this.streamBuffer = new byte[BUFFER_WRITE_SIZE]; 952 this.bufferIndex = 0; 953 this.streamEnd = 0; 954 955 writeStreamChars(stream, SIGNATURE_CHARS); 957 this.headerInfoOffset = this.streamEnd; 958 writeStreamInt(stream, -1); 960 int size = sortedDocNames.length; 961 this.numberOfChunks = (size / CHUNK_SIZE) + 1; 962 this.sizeOfLastChunk = size % CHUNK_SIZE; 963 if (this.sizeOfLastChunk == 0) { 964 this.numberOfChunks--; 965 this.sizeOfLastChunk = CHUNK_SIZE; 966 } 967 this.documentReferenceSize = size <= 0x7F ? 1 : (size <= 0x7FFF ? 2 : 4); 969 this.chunkOffsets = new int[this.numberOfChunks]; 970 int lastIndex = this.numberOfChunks - 1; 971 for (int i = 0; i < this.numberOfChunks; i++) { 972 this.chunkOffsets[i] = this.streamEnd; 973 974 int chunkSize = i == lastIndex ? this.sizeOfLastChunk : CHUNK_SIZE; 975 int chunkIndex = i * CHUNK_SIZE; 976 String current = sortedDocNames[chunkIndex]; 977 writeStreamChars(stream, current.toCharArray()); 978 for (int j = 1; j < chunkSize; j++) { 979 String next = sortedDocNames[chunkIndex + j]; 980 int len1 = current.length(); 981 int len2 = next.length(); 982 int max = len1 < len2 ? len1 : len2; 983 int start = 0; while (current.charAt(start) == next.charAt(start)) { 985 start++; 986 if (max == start) break; } 988 if (start > 255) start = 255; 989 990 int end = 0; while (current.charAt(--len1) == next.charAt(--len2)) { 992 end++; 993 if (len2 == start) break; if (len1 == 0) break; } 996 if (end > 255) end = 255; 997 if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) { 998 stream.write(this.streamBuffer, 0, this.bufferIndex); 999 this.bufferIndex = 0; 1000 } 1001 this.streamBuffer[this.bufferIndex++] = (byte) start; 1002 this.streamBuffer[this.bufferIndex++] = (byte) end; 1003 this.streamEnd += 2; 1004 1005 int last = next.length() - end; 1006 writeStreamChars(stream, (start < last ? CharOperation.subarray(next.toCharArray(), start, last) : CharOperation.NO_CHAR)); 1007 current = next; 1008 } 1009 } 1010 this.startOfCategoryTables = this.streamEnd + 1; 1011} 1012private void writeCategories(FileOutputStream stream) throws IOException { 1013 char[][] categoryNames = this.categoryTables.keyTable; 1014 Object [] tables = this.categoryTables.valueTable; 1015 for (int i = 0, l = categoryNames.length; i < l; i++) 1016 if (categoryNames[i] != null) 1017 writeCategoryTable(categoryNames[i], (HashtableOfObject) tables[i], stream); 1018 this.categoryTables = null; 1019} 1020private void writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, FileOutputStream stream) throws IOException { 1021 1029 int largeArraySize = 256; 1030 Object [] values = wordsToDocs.valueTable; 1031 for (int i = 0, l = values.length; i < l; i++) { 1032 Object o = values[i]; 1033 if (o != null) { 1034 if (o instanceof IntList) 1035 o = values[i] = ((IntList) values[i]).asArray(); 1036 int[] documentNumbers = (int[]) o; 1037 if (documentNumbers.length >= largeArraySize) { 1038 values[i] = new Integer (this.streamEnd); 1039 writeDocumentNumbers(documentNumbers, stream); 1040 } 1041 } 1042 } 1043 1044 this.categoryOffsets.put(categoryName, this.streamEnd); this.categoryTables.put(categoryName, null); writeStreamInt(stream, wordsToDocs.elementSize); 1047 char[][] words = wordsToDocs.keyTable; 1048 for (int i = 0, l = words.length; i < l; i++) { 1049 Object o = values[i]; 1050 if (o != null) { 1051 writeStreamChars(stream, words[i]); 1052 if (o instanceof int[]) { 1053 int[] documentNumbers = (int[]) o; 1054 if (documentNumbers.length == 1) 1055 writeStreamInt(stream, -documentNumbers[0]); else 1057 writeDocumentNumbers(documentNumbers, stream); 1058 } else { 1059 writeStreamInt(stream, largeArraySize); writeStreamInt(stream, ((Integer ) o).intValue()); } 1062 } 1063 } 1064} 1065private void writeDocumentNumbers(int[] documentNumbers, FileOutputStream stream) throws IOException { 1066 int length = documentNumbers.length; 1068 writeStreamInt(stream, length); 1069 Util.sort(documentNumbers); 1070 int start = 0; 1071 switch (this.documentReferenceSize) { 1072 case 1 : 1073 while ((this.bufferIndex + length - start) >= BUFFER_WRITE_SIZE) { 1074 int bytesLeft = BUFFER_WRITE_SIZE - this.bufferIndex; 1076 for (int i=0; i < bytesLeft; i++) { 1077 this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; 1078 } 1079 stream.write(this.streamBuffer, 0, this.bufferIndex); 1080 this.bufferIndex = 0; 1081 } 1082 while (start < length) { 1083 this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; 1084 } 1085 this.streamEnd += length; 1086 break; 1087 case 2 : 1088 while ((this.bufferIndex + ((length - start) * 2)) >= BUFFER_WRITE_SIZE) { 1089 int shortsLeft = (BUFFER_WRITE_SIZE - this.bufferIndex) / 2; 1091 for (int i=0; i < shortsLeft; i++) { 1092 this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8); 1093 this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; 1094 } 1095 stream.write(this.streamBuffer, 0, this.bufferIndex); 1096 this.bufferIndex = 0; 1097 } 1098 while (start < length) { 1099 this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8); 1100 this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++]; 1101 } 1102 this.streamEnd += length * 2; 1103 break; 1104 default : 1105 while (start < length) { 1106 writeStreamInt(stream, documentNumbers[start++]); 1107 } 1108 break; 1109 } 1110} 1111private void writeHeaderInfo(FileOutputStream stream) throws IOException { 1112 writeStreamInt(stream, this.numberOfChunks); 1113 if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) { 1114 stream.write(this.streamBuffer, 0, this.bufferIndex); 1115 this.bufferIndex = 0; 1116 } 1117 this.streamBuffer[this.bufferIndex++] = (byte) this.sizeOfLastChunk; 1118 this.streamBuffer[this.bufferIndex++] = (byte) this.documentReferenceSize; 1119 this.streamEnd += 2; 1120 1121 for (int i = 0; i < this.numberOfChunks; i++) { 1123 writeStreamInt(stream, this.chunkOffsets[i]); 1124 } 1125 1126 writeStreamInt(stream, this.startOfCategoryTables); 1127 1128 writeStreamInt(stream, this.categoryOffsets.elementSize); 1130 char[][] categoryNames = this.categoryOffsets.keyTable; 1131 int[] offsets = this.categoryOffsets.valueTable; 1132 for (int i = 0, l = categoryNames.length; i < l; i++) { 1133 if (categoryNames[i] != null) { 1134 writeStreamChars(stream, categoryNames[i]); 1135 writeStreamInt(stream, offsets[i]); 1136 } 1137 } 1138 if (this.bufferIndex > 0) { 1140 stream.write(this.streamBuffer, 0, this.bufferIndex); 1141 this.bufferIndex = 0; 1142 } 1143} 1144private void writeOffsetToHeader(int offsetToHeader) throws IOException { 1145 if (offsetToHeader > 0) { 1146 RandomAccessFile file = new RandomAccessFile(this.indexFile, "rw"); try { 1148 file.seek(this.headerInfoOffset); file.writeInt(offsetToHeader); 1150 this.headerInfoOffset = offsetToHeader; } finally { 1152 file.close(); 1153 } 1154 } 1155} 1156 1175private void writeStreamChars(FileOutputStream stream, char[] array) throws IOException { 1176 if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE) { 1177 stream.write(this.streamBuffer, 0, this.bufferIndex); 1178 this.bufferIndex = 0; 1179 } 1180 int length = array.length; 1181 this.streamBuffer[this.bufferIndex++] = (byte) ((length >>> 8) & 0xFF); this.streamBuffer[this.bufferIndex++] = (byte) (length & 0xFF); this.streamEnd += 2; 1184 1185 int totalBytesNeeded = length * 3; 1187 if (totalBytesNeeded <= BUFFER_WRITE_SIZE) { 1188 if (this.bufferIndex + totalBytesNeeded > BUFFER_WRITE_SIZE) { 1189 stream.write(this.streamBuffer, 0, this.bufferIndex); 1191 this.bufferIndex = 0; 1192 } 1193 writeStreamChars(stream, array, 0, length); 1194 } else { 1195 int charsPerWrite = BUFFER_WRITE_SIZE / 3; 1196 int start = 0; 1197 while (start < length) { 1198 stream.write(this.streamBuffer, 0, this.bufferIndex); 1199 this.bufferIndex = 0; 1200 int charsLeftToWrite = length - start; 1201 int end = start + (charsPerWrite < charsLeftToWrite ? charsPerWrite : charsLeftToWrite); 1202 writeStreamChars(stream, array, start, end); 1203 start = end; 1204 } 1205 } 1206} 1207private void writeStreamChars(FileOutputStream stream, char[] array, int start, int end) throws IOException { 1208 1211 int oldIndex = this.bufferIndex; 1212 while (start < end) { 1213 int ch = array[start++]; 1214 if ((ch & 0x007F) == ch) { 1215 this.streamBuffer[this.bufferIndex++] = (byte) ch; 1216 } else if ((ch & 0x07FF) == ch) { 1217 byte b = (byte) (ch >> 6); 1219 b &= 0x1F; 1220 b |= 0xC0; 1221 this.streamBuffer[this.bufferIndex++] = b; 1222 b = (byte) (ch & 0x3F); 1224 b |= 0x80; 1225 this.streamBuffer[this.bufferIndex++] = b; 1226 } else { 1227 byte b = (byte) (ch >> 12); 1229 b &= 0x0F; 1230 b |= 0xE0; 1231 this.streamBuffer[this.bufferIndex++] = b; 1232 b = (byte) (ch >> 6); 1234 b &= 0x3F; 1235 b |= 0x80; 1236 this.streamBuffer[this.bufferIndex++] = b; 1237 b = (byte) (ch & 0x3F); 1239 b |= 0x80; 1240 this.streamBuffer[this.bufferIndex++] = b; 1241 } 1242 } 1243 this.streamEnd += this.bufferIndex - oldIndex; 1244} 1245private void writeStreamInt(FileOutputStream stream, int val) throws IOException { 1246 if ((this.bufferIndex + 4) >= BUFFER_WRITE_SIZE) { 1247 stream.write(this.streamBuffer, 0, this.bufferIndex); 1248 this.bufferIndex = 0; 1249 } 1250 this.streamBuffer[this.bufferIndex++] = (byte) (val >> 24); 1251 this.streamBuffer[this.bufferIndex++] = (byte) (val >> 16); 1252 this.streamBuffer[this.bufferIndex++] = (byte) (val >> 8); 1253 this.streamBuffer[this.bufferIndex++] = (byte) val; 1254 this.streamEnd += 4; 1255} 1256} 1257 | Popular Tags |