DiskIndex


1   /*******************************************************************************
2    * Copyright (c) 2000, 2007 IBM Corporation and others.
3    * All rights reserved. This program and the accompanying materials
4    * are made available under the terms of the Eclipse Public License v1.0
5    * which accompanies this distribution, and is available at
6    * http://www.eclipse.org/legal/epl-v10.html
7    *
8    * Contributors:
9    *     IBM Corporation - initial API and implementation
10   *******************************************************************************/
11  package org.eclipse.jdt.internal.core.index;
12  
13  import java.io.*;
14  
15  import org.eclipse.jdt.core.compiler.CharOperation;
16  import org.eclipse.jdt.core.search.*;
17  import org.eclipse.jdt.internal.core.util.*;
18  import org.eclipse.jdt.internal.compiler.util.HashtableOfIntValues;
19  import org.eclipse.jdt.internal.compiler.util.HashtableOfObject;
20  import org.eclipse.jdt.internal.compiler.util.SimpleLookupTable;
21  import org.eclipse.jdt.internal.compiler.util.SimpleSet;
22  import org.eclipse.jdt.internal.compiler.util.SimpleSetOfCharArray;
23  
24  public class DiskIndex {
25  
26  File indexFile;
27  
28  private int headerInfoOffset;
29  private int numberOfChunks;
30  private int sizeOfLastChunk;
31  private int[] chunkOffsets;
32  private int documentReferenceSize; // 1, 2 or more bytes... depends on # of document names
33  private int startOfCategoryTables;
34  private HashtableOfIntValues categoryOffsets, categoryEnds;
35  
36  private int cacheUserCount;
37  private String  [][] cachedChunks; // decompressed chunks of document names
38  private HashtableOfObject categoryTables; // category name -> HashtableOfObject(words -> int[] of document #'s) or offset if not read yet
39  private char[] cachedCategoryName;
40  
41  private static final int DEFAULT_BUFFER_SIZE = 2048;
42  private static int BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE;
43  private static final int BUFFER_WRITE_SIZE = DEFAULT_BUFFER_SIZE;
44  private byte[] streamBuffer;
45  private int bufferIndex, bufferEnd; // used when reading from the file into the streamBuffer
46  private int streamEnd; // used when writing data from the streamBuffer to the file
47  
48  public static final String   SIGNATURE= "INDEX VERSION 1.121"; //$NON-NLS-1$
49  private static final char[] SIGNATURE_CHARS = SIGNATURE.toCharArray();
50  public static boolean DEBUG = false;
51  
52  private static final int RE_INDEXED = -1;
53  private static final int DELETED = -2;
54  
55  private static final int CHUNK_SIZE = 100;
56  
57  private static final SimpleSetOfCharArray INTERNED_CATEGORY_NAMES = new SimpleSetOfCharArray(20);
58  
59  static class IntList {
60  
61  int size;
62  int[] elements;
63  
64  IntList(int[] elements) {
65      this.elements = elements;
66      this.size = elements.length;
67  }
68  void add(int newElement) {
69      if (this.size == this.elements.length) {
70          int newSize = this.size * 3;
71          if (newSize < 7) newSize = 7;
72          System.arraycopy(this.elements, 0, this.elements = new int[newSize], 0, this.size);
73      }
74      this.elements[this.size++] = newElement;
75  }
76  int[] asArray() {
77      int[] result = new int[this.size];
78      System.arraycopy(this.elements, 0, result, 0, this.size);
79      return result;
80  }   
81  }
82  
83  
84  DiskIndex(String   fileName) {
85      if (fileName == null)
86          throw new java.lang.IllegalArgumentException  ();
87      this.indexFile = new File(fileName);
88  
89      // clear cached items
90      this.headerInfoOffset = -1;
91      this.numberOfChunks = -1;
92      this.sizeOfLastChunk = -1;
93      this.chunkOffsets = null;
94      this.documentReferenceSize = -1;
95      this.cacheUserCount = -1;
96      this.cachedChunks = null;
97      this.categoryTables = null;
98      this.cachedCategoryName = null;
99      this.categoryOffsets = null;
100     this.categoryEnds = null;
101 }
102 SimpleSet addDocumentNames(String   substring, MemoryIndex memoryIndex) throws IOException {
103     // must skip over documents which have been added/changed/deleted in the memory index
104     String  [] docNames = readAllDocumentNames();
105     SimpleSet results = new SimpleSet(docNames.length);
106     if (substring == null) {
107         if (memoryIndex == null) {
108             for (int i = 0, l = docNames.length; i < l; i++)
109                 results.add(docNames[i]);
110         } else {
111             SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
112             for (int i = 0, l = docNames.length; i < l; i++) {
113                 String   docName = docNames[i];
114                 if (!docsToRefs.containsKey(docName))
115                     results.add(docName);
116             }
117         }
118     } else {
119         if (memoryIndex == null) {
120             for (int i = 0, l = docNames.length; i < l; i++)
121                 if (docNames[i].startsWith(substring, 0))
122                     results.add(docNames[i]);
123         } else {
124             SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
125             for (int i = 0, l = docNames.length; i < l; i++) {
126                 String   docName = docNames[i];
127                 if (docName.startsWith(substring, 0) && !docsToRefs.containsKey(docName))
128                     results.add(docName);
129             }
130         }
131     }
132     return results;
133 }
134 private HashtableOfObject addQueryResult(HashtableOfObject results, char[] word, HashtableOfObject wordsToDocNumbers, MemoryIndex memoryIndex) throws IOException {
135     // must skip over documents which have been added/changed/deleted in the memory index
136     if (results == null)
137         results = new HashtableOfObject(13);
138     EntryResult result = (EntryResult) results.get(word);
139     if (memoryIndex == null) {
140         if (result == null)
141             results.put(word, new EntryResult(word, wordsToDocNumbers));
142         else
143             result.addDocumentTable(wordsToDocNumbers);
144     } else {
145         SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
146         if (result == null) result = new EntryResult(word, null);
147         int[] docNumbers = readDocumentNumbers(wordsToDocNumbers.get(word));
148         for (int i = 0, l = docNumbers.length; i < l; i++) {
149             String   docName = readDocumentName(docNumbers[i]);
150             if (!docsToRefs.containsKey(docName))
151                 result.addDocumentName(docName);
152         }
153         if (!result.isEmpty())
154             results.put(word, result);
155     }
156     return results;
157 }
158 HashtableOfObject addQueryResults(char[][] categories, char[] key, int matchRule, MemoryIndex memoryIndex) throws IOException {
159     // assumes sender has called startQuery() & will call stopQuery() when finished
160     if (this.categoryOffsets == null) return null; // file is empty
161 
162     HashtableOfObject results = null; // initialized if needed
163     if (key == null) {
164         for (int i = 0, l = categories.length; i < l; i++) {
165             HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], true); // cache if key is null since its a definite match
166             if (wordsToDocNumbers != null) {
167                 char[][] words = wordsToDocNumbers.keyTable;
168                 if (results == null)
169                     results = new HashtableOfObject(wordsToDocNumbers.elementSize);
170                 for (int j = 0, m = words.length; j < m; j++)
171                     if (words[j] != null)
172                         results = addQueryResult(results, words[j], wordsToDocNumbers, memoryIndex);
173             }
174         }
175         if (results != null && this.cachedChunks == null)
176             cacheDocumentNames();
177     } else {
178         switch (matchRule) {
179             case SearchPattern.R_EXACT_MATCH | SearchPattern.R_CASE_SENSITIVE:
180                 for (int i = 0, l = categories.length; i < l; i++) {
181                     HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
182                     if (wordsToDocNumbers != null && wordsToDocNumbers.containsKey(key))
183                         results = addQueryResult(results, key, wordsToDocNumbers, memoryIndex);
184                 }
185                 break;
186             case SearchPattern.R_PREFIX_MATCH | SearchPattern.R_CASE_SENSITIVE:
187                 for (int i = 0, l = categories.length; i < l; i++) {
188                     HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
189                     if (wordsToDocNumbers != null) {
190                         char[][] words = wordsToDocNumbers.keyTable;
191                         for (int j = 0, m = words.length; j < m; j++) {
192                             char[] word = words[j];
193                             if (word != null && key[0] == word[0] && CharOperation.prefixEquals(key, word))
194                                 results = addQueryResult(results, word, wordsToDocNumbers, memoryIndex);
195                         }
196                     }
197                 }
198                 break;
199             default:
200                 for (int i = 0, l = categories.length; i < l; i++) {
201                     HashtableOfObject wordsToDocNumbers = readCategoryTable(categories[i], false);
202                     if (wordsToDocNumbers != null) {
203                         char[][] words = wordsToDocNumbers.keyTable;
204                         for (int j = 0, m = words.length; j < m; j++) {
205                             char[] word = words[j];
206                             if (word != null && Index.isMatch(key, word, matchRule))
207                                 results = addQueryResult(results, word, wordsToDocNumbers, memoryIndex);
208                         }
209                     }
210                 }
211         }
212     }
213 
214     if (results == null) return null;
215     return results;
216 }
217 private void cacheDocumentNames() throws IOException {
218     // will need all document names so get them now
219     this.cachedChunks = new String  [this.numberOfChunks][];
220     FileInputStream stream = new FileInputStream(this.indexFile);
221     try {
222         if (this.numberOfChunks > 5) BUFFER_READ_SIZE <<= 1;
223         int offset = this.chunkOffsets[0];
224         stream.skip(offset);
225         this.streamBuffer = new byte[BUFFER_READ_SIZE];
226         this.bufferIndex = 0;
227         this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
228         for (int i = 0; i < this.numberOfChunks; i++) {
229             int size = i == this.numberOfChunks - 1 ? this.sizeOfLastChunk : CHUNK_SIZE;
230             readChunk(this.cachedChunks[i] = new String  [size], stream, 0, size);
231         }
232     } catch (IOException e) {
233         this.cachedChunks = null;
234         throw e;
235     } finally {
236         stream.close();
237         this.streamBuffer = null;
238         BUFFER_READ_SIZE = DEFAULT_BUFFER_SIZE;
239     }
240 }
241 private String  [] computeDocumentNames(String  [] onDiskNames, int[] positions, SimpleLookupTable indexedDocuments, MemoryIndex memoryIndex) {
242     int onDiskLength = onDiskNames.length;
243     Object  [] docNames = memoryIndex.docsToReferences.keyTable;
244     Object  [] referenceTables = memoryIndex.docsToReferences.valueTable;
245     if (onDiskLength == 0) {
246         // disk index was empty, so add every indexed document
247         for (int i = 0, l = referenceTables.length; i < l; i++)
248             if (referenceTables[i] != null)
249                 indexedDocuments.put(docNames[i], null); // remember each new document
250 
251         String  [] newDocNames = new String  [indexedDocuments.elementSize];
252         int count = 0;
253         Object  [] added = indexedDocuments.keyTable;
254         for (int i = 0, l = added.length; i < l; i++)
255             if (added[i] != null)
256                 newDocNames[count++] = (String  ) added[i];
257         Util.sort(newDocNames);
258         for (int i = 0, l = newDocNames.length; i < l; i++)
259             indexedDocuments.put(newDocNames[i], new Integer  (i));
260         return newDocNames;
261     }
262 
263     // initialize positions as if each document will remain in the same position
264     for (int i = 0; i < onDiskLength; i++)
265         positions[i] = i;
266 
267     // find out if the memory index has any new or deleted documents, if not then the names & positions are the same
268     int numDeletedDocNames = 0;
269     int numReindexedDocNames = 0;
270     nextPath : for (int i = 0, l = docNames.length; i < l; i++) {
271         String   docName = (String  ) docNames[i];
272         if (docName != null) {
273             for (int j = 0; j < onDiskLength; j++) {
274                 if (docName.equals(onDiskNames[j])) {
275                     if (referenceTables[i] == null) {
276                         positions[j] = DELETED;
277                         numDeletedDocNames++;
278                     } else {
279                         positions[j] = RE_INDEXED;
280                         numReindexedDocNames++;
281                     }
282                     continue nextPath;
283                 }
284             }
285             if (referenceTables[i] != null)
286                 indexedDocuments.put(docName, null); // remember each new document, skip deleted documents which were never saved
287         }
288     }
289 
290     String  [] newDocNames = onDiskNames;
291     if (numDeletedDocNames > 0 || indexedDocuments.elementSize > 0) {
292         // some new documents have been added or some old ones deleted
293         newDocNames = new String  [onDiskLength + indexedDocuments.elementSize - numDeletedDocNames];
294         int count = 0;
295         for (int i = 0; i < onDiskLength; i++)
296             if (positions[i] >= RE_INDEXED)
297                 newDocNames[count++] = onDiskNames[i]; // keep each unchanged document
298         Object  [] added = indexedDocuments.keyTable;
299         for (int i = 0, l = added.length; i < l; i++)
300             if (added[i] != null)
301                 newDocNames[count++] = (String  ) added[i]; // add each new document
302         Util.sort(newDocNames);
303         for (int i = 0, l = newDocNames.length; i < l; i++)
304             if (indexedDocuments.containsKey(newDocNames[i]))
305                 indexedDocuments.put(newDocNames[i], new Integer  (i)); // remember the position for each new document
306     }
307 
308     // need to be able to look up an old position (ref# from a ref[]) and map it to its new position
309     // if its old position == DELETED then its forgotton
310     // if its old position == ReINDEXED then its also forgotten but its new position is needed to map references
311     int count = -1;
312     for (int i = 0; i < onDiskLength;) {
313         switch(positions[i]) {
314             case DELETED :
315                 i++; // skip over deleted... references are forgotten
316                 break;
317             case RE_INDEXED :
318                 String   newName = newDocNames[++count];
319                 if (newName.equals(onDiskNames[i])) {
320                     indexedDocuments.put(newName, new Integer  (count)); // the reindexed docName that was at position i is now at position count
321                     i++;
322                 }
323                 break;
324             default :
325                 if (newDocNames[++count].equals(onDiskNames[i]))
326                     positions[i++] = count; // the unchanged docName that was at position i is now at position count
327         }
328     }
329     return newDocNames;
330 }
331 private void copyQueryResults(HashtableOfObject categoryToWords, int newPosition) {
332     char[][] categoryNames = categoryToWords.keyTable;
333     Object  [] wordSets = categoryToWords.valueTable;
334     for (int i = 0, l = categoryNames.length; i < l; i++) {
335         char[] categoryName = categoryNames[i];
336         if (categoryName != null) {
337             SimpleWordSet wordSet = (SimpleWordSet) wordSets[i];
338             HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName);
339             if (wordsToDocs == null)
340                 this.categoryTables.put(categoryName, wordsToDocs = new HashtableOfObject(wordSet.elementSize));
341 
342             char[][] words = wordSet.words;
343             for (int j = 0, m = words.length; j < m; j++) {
344                 char[] word = words[j];
345                 if (word != null) {
346                     Object   o = wordsToDocs.get(word);
347                     if (o == null) {
348                         wordsToDocs.put(word, new int[] {newPosition});
349                     } else if (o instanceof IntList) {
350                         ((IntList) o).add(newPosition);
351                     } else {
352                         IntList list = new IntList((int[]) o);
353                         list.add(newPosition);
354                         wordsToDocs.put(word, list);
355                     }
356                 }
357             }
358         }
359     }
360 }
361 void initialize(boolean reuseExistingFile) throws IOException {
362     if (this.indexFile.exists()) {
363         if (reuseExistingFile) {
364             RandomAccessFile file = new RandomAccessFile(this.indexFile, "r"); //$NON-NLS-1$
365             try {
366                 String   signature = file.readUTF();
367                 if (!signature.equals(SIGNATURE))
368                     throw new IOException(Messages.exception_wrongFormat); 
369 
370                 this.headerInfoOffset = file.readInt();
371                 if (this.headerInfoOffset > 0) // file is empty if its not set
372                     readHeaderInfo(file);
373             } finally {
374                 file.close();
375             }
376             return;
377         }
378         if (!this.indexFile.delete()) {
379             if (DEBUG)
380                 System.out.println("initialize - Failed to delete index " + this.indexFile); //$NON-NLS-1$
381             throw new IOException("Failed to delete index " + this.indexFile); //$NON-NLS-1$
382         }
383     }
384     if (this.indexFile.createNewFile()) {
385         RandomAccessFile file = new RandomAccessFile(this.indexFile, "rw"); //$NON-NLS-1$
386         try {
387             file.writeUTF(SIGNATURE);
388             file.writeInt(-1); // file is empty
389         } finally {
390             file.close();
391         }
392     } else {
393         if (DEBUG)
394             System.out.println("initialize - Failed to create new index " + this.indexFile); //$NON-NLS-1$
395         throw new IOException("Failed to create new index " + this.indexFile); //$NON-NLS-1$
396     }
397 }
398 private void initializeFrom(DiskIndex diskIndex, File newIndexFile) throws IOException {
399     if (newIndexFile.exists() && !newIndexFile.delete()) { // delete the temporary index file
400         if (DEBUG)
401             System.out.println("initializeFrom - Failed to delete temp index " + this.indexFile); //$NON-NLS-1$
402     } else if (!newIndexFile.createNewFile()) {
403         if (DEBUG)
404             System.out.println("initializeFrom - Failed to create temp index " + this.indexFile); //$NON-NLS-1$
405         throw new IOException("Failed to create temp index " + this.indexFile); //$NON-NLS-1$
406     }
407 
408     int size = diskIndex.categoryOffsets == null ? 8 : diskIndex.categoryOffsets.elementSize;
409     this.categoryOffsets = new HashtableOfIntValues(size);
410     this.categoryEnds = new HashtableOfIntValues(size);
411     this.categoryTables = new HashtableOfObject(size);
412 }
413 private void mergeCategories(DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException {
414     // at this point, this.categoryTables contains the names -> wordsToDocs added in copyQueryResults()
415     char[][] oldNames = onDisk.categoryOffsets.keyTable;
416     for (int i = 0, l = oldNames.length; i < l; i++) {
417         char[] oldName = oldNames[i];
418         if (oldName != null && !this.categoryTables.containsKey(oldName))
419             this.categoryTables.put(oldName, null);
420     }
421 
422     char[][] categoryNames = this.categoryTables.keyTable;
423     for (int i = 0, l = categoryNames.length; i < l; i++)
424         if (categoryNames[i] != null)
425             mergeCategory(categoryNames[i], onDisk, positions, stream);
426     this.categoryTables = null;
427 }
428 private void mergeCategory(char[] categoryName, DiskIndex onDisk, int[] positions, FileOutputStream stream) throws IOException {
429     HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables.get(categoryName);
430     if (wordsToDocs == null)
431         wordsToDocs = new HashtableOfObject(3);
432 
433     HashtableOfObject oldWordsToDocs = onDisk.readCategoryTable(categoryName, true);
434     if (oldWordsToDocs != null) {
435         char[][] oldWords = oldWordsToDocs.keyTable;
436         Object  [] oldArrayOffsets = oldWordsToDocs.valueTable;
437         nextWord: for (int i = 0, l = oldWords.length; i < l; i++) {
438             char[] oldWord = oldWords[i];
439             if (oldWord != null) {
440                 int[] oldDocNumbers = (int[]) oldArrayOffsets[i];
441                 int length = oldDocNumbers.length;
442                 int[] mappedNumbers = new int[length];
443                 int count = 0;
444                 for (int j = 0; j < length; j++) {
445                     int pos = positions[oldDocNumbers[j]];
446                     if (pos > RE_INDEXED) // forget any reference to a document which was deleted or re_indexed
447                         mappedNumbers[count++] = pos;
448                 }
449                 if (count < length) {
450                     if (count == 0) continue nextWord; // skip words which no longer have any references
451                     System.arraycopy(mappedNumbers, 0, mappedNumbers = new int[count], 0, count);
452                 }
453 
454                 Object   o = wordsToDocs.get(oldWord);
455                 if (o == null) {
456                     wordsToDocs.put(oldWord, mappedNumbers);
457                 } else {
458                     IntList list = null;
459                     if (o instanceof IntList) {
460                         list = (IntList) o;
461                     } else {
462                         list = new IntList((int[]) o);
463                         wordsToDocs.put(oldWord, list);
464                     }
465                     for (int j = 0; j < count; j++)
466                         list.add(mappedNumbers[j]);
467                 }
468             }
469         }
470         onDisk.categoryTables.put(categoryName, null); // flush cached table
471     }
472     writeCategoryTable(categoryName, wordsToDocs, stream);
473 }
474 DiskIndex mergeWith(MemoryIndex memoryIndex) throws IOException {
475     // assume write lock is held
476     // compute & write out new docNames
477     String  [] docNames = readAllDocumentNames();
478     int previousLength = docNames.length;
479     int[] positions = new int[previousLength]; // keeps track of the position of each document in the new sorted docNames
480     SimpleLookupTable indexedDocuments = new SimpleLookupTable(3); // for each new/changed document in the memoryIndex
481     docNames = computeDocumentNames(docNames, positions, indexedDocuments, memoryIndex);
482     if (docNames.length == 0) {
483         if (previousLength == 0) return this; // nothing to do... memory index contained deleted documents that had never been saved
484 
485         // index is now empty since all the saved documents were removed
486         DiskIndex newDiskIndex = new DiskIndex(this.indexFile.getPath());
487         newDiskIndex.initialize(false);
488         return newDiskIndex;
489     }
490 
491     DiskIndex newDiskIndex = new DiskIndex(this.indexFile.getPath() + ".tmp"); //$NON-NLS-1$
492     try {
493         newDiskIndex.initializeFrom(this, newDiskIndex.indexFile);
494         FileOutputStream stream = new FileOutputStream(newDiskIndex.indexFile, false);
495         int offsetToHeader = -1;
496         try {
497             newDiskIndex.writeAllDocumentNames(docNames, stream);
498             docNames = null; // free up the space
499 
500             // add each new/changed doc to empty category tables using its new position #
501             if (indexedDocuments.elementSize > 0) {
502                 Object  [] names = indexedDocuments.keyTable;
503                 Object  [] integerPositions = indexedDocuments.valueTable;
504                 for (int i = 0, l = names.length; i < l; i++)
505                     if (names[i] != null)
506                         newDiskIndex.copyQueryResults(
507                             (HashtableOfObject) memoryIndex.docsToReferences.get(names[i]), ((Integer  ) integerPositions[i]).intValue());
508             }
509             indexedDocuments = null; // free up the space
510 
511             // merge each category table with the new ones & write them out
512             if (previousLength == 0)
513                 newDiskIndex.writeCategories(stream);
514             else
515                 newDiskIndex.mergeCategories(this, positions, stream);
516             offsetToHeader = newDiskIndex.streamEnd;
517             newDiskIndex.writeHeaderInfo(stream);
518             positions = null; // free up the space
519         } finally {
520             stream.close();
521             this.streamBuffer = null;
522         }
523         newDiskIndex.writeOffsetToHeader(offsetToHeader);
524 
525         // rename file by deleting previous index file & renaming temp one
526         if (this.indexFile.exists() && !this.indexFile.delete()) {
527             if (DEBUG)
528                 System.out.println("mergeWith - Failed to delete " + this.indexFile); //$NON-NLS-1$
529             throw new IOException("Failed to delete index file " + this.indexFile); //$NON-NLS-1$
530         }
531         if (!newDiskIndex.indexFile.renameTo(this.indexFile)) {
532             if (DEBUG)
533                 System.out.println("mergeWith - Failed to rename " + this.indexFile); //$NON-NLS-1$
534             throw new IOException("Failed to rename index file " + this.indexFile); //$NON-NLS-1$
535         }
536     } catch (IOException e) {
537         if (newDiskIndex.indexFile.exists() && !newDiskIndex.indexFile.delete())
538             if (DEBUG)
539                 System.out.println("mergeWith - Failed to delete temp index " + newDiskIndex.indexFile); //$NON-NLS-1$
540         throw e;
541     }
542 
543     newDiskIndex.indexFile = this.indexFile;
544     return newDiskIndex;
545 }
546 private synchronized String  [] readAllDocumentNames() throws IOException {
547     if (this.numberOfChunks <= 0)
548         return CharOperation.NO_STRINGS;
549 
550     FileInputStream stream = new FileInputStream(this.indexFile);
551     try {
552         int offset = this.chunkOffsets[0];
553         stream.skip(offset);
554         this.streamBuffer = new byte[BUFFER_READ_SIZE];
555         this.bufferIndex = 0;
556         this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
557         int lastIndex = this.numberOfChunks - 1;
558         String  [] docNames = new String  [lastIndex * CHUNK_SIZE + sizeOfLastChunk];
559         for (int i = 0; i < this.numberOfChunks; i++)
560             readChunk(docNames, stream, i * CHUNK_SIZE, i < lastIndex ? CHUNK_SIZE : sizeOfLastChunk);
561         return docNames;
562     } finally {
563         stream.close();
564         this.streamBuffer = null;
565     }
566 }
567 private synchronized HashtableOfObject readCategoryTable(char[] categoryName, boolean readDocNumbers) throws IOException {
568     // result will be null if categoryName is unknown
569     int offset = this.categoryOffsets.get(categoryName);
570     if (offset == HashtableOfIntValues.NO_VALUE) {
571         return null;
572     }
573 
574     if (this.categoryTables == null) {
575         this.categoryTables = new HashtableOfObject(3);
576     } else {
577         HashtableOfObject cachedTable = (HashtableOfObject) this.categoryTables.get(categoryName);
578         if (cachedTable != null) {
579             if (readDocNumbers) { // must cache remaining document number arrays
580                 Object  [] arrayOffsets = cachedTable.valueTable;
581                 for (int i = 0, l = arrayOffsets.length; i < l; i++)
582                     if (arrayOffsets[i] instanceof Integer  )
583                         arrayOffsets[i] = readDocumentNumbers(arrayOffsets[i]);
584             }
585             return cachedTable;
586         }
587     }
588 
589     FileInputStream stream = new FileInputStream(this.indexFile);
590     HashtableOfObject categoryTable = null;
591     char[][] matchingWords = null;
592     int count = 0;
593     int firstOffset = -1;
594     this.streamBuffer = new byte[BUFFER_READ_SIZE];
595     try {
596         stream.skip(offset);
597         this.bufferIndex = 0;
598         this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
599         int size = readStreamInt(stream);
600         try {
601             if (size < 0) { // DEBUG
602                 System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
603                 System.err.println("file = "+this.indexFile); //$NON-NLS-1$
604                 System.err.println("offset = "+offset); //$NON-NLS-1$
605                 System.err.println("size = "+size); //$NON-NLS-1$
606                 System.err.println("--------------------   END   --------------------"); //$NON-NLS-1$
607             }
608             categoryTable = new HashtableOfObject(size);
609         } catch (OutOfMemoryError   oom) {
610             // DEBUG
611             oom.printStackTrace();
612             System.err.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
613             System.err.println("file = "+this.indexFile); //$NON-NLS-1$
614             System.err.println("offset = "+offset); //$NON-NLS-1$
615             System.err.println("size = "+size); //$NON-NLS-1$
616             System.err.println("--------------------   END   --------------------"); //$NON-NLS-1$
617             throw oom;
618         }
619         int largeArraySize = 256;
620         for (int i = 0; i < size; i++) {
621             char[] word = readStreamChars(stream);
622             int arrayOffset = readStreamInt(stream);
623             // if arrayOffset is:
624             //      <= 0 then the array size == 1 with the value -> -arrayOffset
625             //      > 1 & < 256 then the size of the array is > 1 & < 256, the document array follows immediately
626             //      256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table)
627             if (arrayOffset <= 0) {
628                 categoryTable.put(word, new int[] {-arrayOffset}); // store 1 element array by negating documentNumber
629             } else if (arrayOffset < largeArraySize) {
630                 categoryTable.put(word, readStreamDocumentArray(stream, arrayOffset)); // read in-lined array providing size
631             } else {
632                 arrayOffset = readStreamInt(stream); // read actual offset
633                 if (readDocNumbers) {
634                     if (matchingWords == null)
635                         matchingWords = new char[size][];
636                     if (count == 0)
637                         firstOffset = arrayOffset;
638                     matchingWords[count++] = word;
639                 }
640                 categoryTable.put(word, new Integer  (arrayOffset)); // offset to array in the file
641             }
642         }
643         this.categoryTables.put(INTERNED_CATEGORY_NAMES.get(categoryName), categoryTable);
644         // cache the table as long as its not too big
645         // in practice, some tables can be greater than 500K when they contain more than 10K elements
646         this.cachedCategoryName = categoryTable.elementSize < 20000 ? categoryName : null;
647     } catch (IOException ioe) {
648         this.streamBuffer = null;
649         throw ioe;
650     } finally {
651         stream.close();
652     }
653 
654     if (matchingWords != null && count > 0) {
655         stream = new FileInputStream(this.indexFile);
656         try {
657             stream.skip(firstOffset);
658             this.bufferIndex = 0;
659             this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
660             for (int i = 0; i < count; i++) { // each array follows the previous one
661                 categoryTable.put(matchingWords[i], readStreamDocumentArray(stream, readStreamInt(stream)));
662             }
663         } catch (IOException ioe) {
664             this.streamBuffer = null;
665             throw ioe;
666         } finally {
667             stream.close();
668         }
669     }
670     this.streamBuffer = null;
671     return categoryTable;
672 }
673 private void readChunk(String  [] docNames, FileInputStream stream, int index, int size) throws IOException {
674     String   current = new String  (readStreamChars(stream));
675     docNames[index++] = current;
676     for (int i = 1; i < size; i++) {
677         if (stream != null && this.bufferIndex + 2 >= this.bufferEnd)
678             readStreamBuffer(stream);
679         int start = streamBuffer[this.bufferIndex++] & 0xFF;
680         int end = streamBuffer[this.bufferIndex++] & 0xFF;
681         String   next  = new String  (readStreamChars(stream));
682         if (start > 0) {
683             if (end > 0) {
684                 int length = current.length();
685                 next = current.substring(0, start) + next + current.substring(length - end, length);
686             } else {
687                 next = current.substring(0, start) + next;
688             }
689         } else if (end > 0) {
690             int length = current.length();
691             next = next + current.substring(length - end, length);
692         }
693         docNames[index++] = next;
694         current = next;
695     }
696 }
697 synchronized String   readDocumentName(int docNumber) throws IOException {
698     if (this.cachedChunks == null)
699         this.cachedChunks = new String  [this.numberOfChunks][];
700 
701     int chunkNumber = docNumber / CHUNK_SIZE;
702     String  [] chunk = this.cachedChunks[chunkNumber];
703     if (chunk == null) {
704         boolean isLastChunk = chunkNumber == this.numberOfChunks - 1;
705         int start = this.chunkOffsets[chunkNumber];
706         int numberOfBytes = (isLastChunk ? this.startOfCategoryTables : this.chunkOffsets[chunkNumber + 1]) - start;
707         if (numberOfBytes < 0)
708             throw new IllegalArgumentException  ();
709         this.streamBuffer = new byte[numberOfBytes];
710         this.bufferIndex = 0;
711         FileInputStream file = new FileInputStream(this.indexFile);
712         try {
713             file.skip(start);
714             if (file.read(this.streamBuffer, 0, numberOfBytes) != numberOfBytes)
715                 throw new IOException();
716         } catch (IOException ioe) {
717             this.streamBuffer = null;
718             throw ioe;
719         } finally {
720             file.close();
721         }
722         int numberOfNames = isLastChunk ? this.sizeOfLastChunk : CHUNK_SIZE;
723         chunk = new String  [numberOfNames];
724         try {
725             readChunk(chunk, null, 0, numberOfNames);
726         } catch (IOException ioe) {
727             this.streamBuffer = null;
728             throw ioe;
729         }
730         this.cachedChunks[chunkNumber] = chunk;
731     }
732     this.streamBuffer = null;
733     return chunk[docNumber - (chunkNumber * CHUNK_SIZE)];
734 }
735 synchronized int[] readDocumentNumbers(Object   arrayOffset) throws IOException {
736     // arrayOffset is either a cached array of docNumbers or an Integer offset in the file
737     if (arrayOffset instanceof int[])
738         return (int[]) arrayOffset;
739 
740     FileInputStream stream = new FileInputStream(this.indexFile);
741     try {
742         int offset = ((Integer  ) arrayOffset).intValue();
743         stream.skip(offset);
744         this.streamBuffer = new byte[BUFFER_READ_SIZE];
745         this.bufferIndex = 0;
746         this.bufferEnd = stream.read(this.streamBuffer, 0, this.streamBuffer.length);
747         return readStreamDocumentArray(stream, readStreamInt(stream));
748     } finally {
749         stream.close();
750         this.streamBuffer = null;
751     }
752 }
753 private void readHeaderInfo(RandomAccessFile file) throws IOException {
754     file.seek(this.headerInfoOffset);
755 
756     // must be same order as writeHeaderInfo()
757     this.numberOfChunks = file.readInt();
758     this.sizeOfLastChunk = file.readUnsignedByte();
759     this.documentReferenceSize = file.readUnsignedByte();
760 
761     this.chunkOffsets = new int[this.numberOfChunks];
762     for (int i = 0; i < this.numberOfChunks; i++)
763         this.chunkOffsets[i] = file.readInt();
764 
765     this.startOfCategoryTables = file.readInt();
766 
767     int size = file.readInt();
768     this.categoryOffsets = new HashtableOfIntValues(size);
769     this.categoryEnds = new HashtableOfIntValues(size);
770     char[] previousCategory = null;
771     int offset = -1;
772     for (int i = 0; i < size; i++) {
773         char[] categoryName = INTERNED_CATEGORY_NAMES.get(file.readUTF().toCharArray());
774         offset = file.readInt();
775         this.categoryOffsets.put(categoryName, offset); // cache offset to category table
776         if (previousCategory != null) {
777             this.categoryEnds.put(previousCategory, offset); // cache end of the category table
778         }
779         previousCategory = categoryName;
780     }
781     if (previousCategory != null) {
782         this.categoryEnds.put(previousCategory, this.headerInfoOffset); // cache end of the category table
783     }
784     this.categoryTables = new HashtableOfObject(3);
785 }
786 synchronized void startQuery() {
787     this.cacheUserCount++;
788 }
789 synchronized void stopQuery() {
790     if (--this.cacheUserCount < 0) {
791         // clear cached items
792         this.cacheUserCount = -1;
793         this.cachedChunks = null;
794         if (this.categoryTables != null) {
795             if (this.cachedCategoryName == null) {
796                 this.categoryTables = null;
797             } else if (this.categoryTables.elementSize > 1) {
798                 HashtableOfObject newTables = new HashtableOfObject(3);
799                 newTables.put(this.cachedCategoryName, this.categoryTables.get(this.cachedCategoryName));
800                 this.categoryTables = newTables;
801             }
802         }
803     }
804 }
805 private void readStreamBuffer(FileInputStream stream) throws IOException {
806     // if we're about to read a known amount at the end of the existing buffer, but it does not completely fit
807     // so we need to shift the remaining bytes to be read, and fill the buffer from the stream
808     if (this.bufferEnd < this.streamBuffer.length)
809         return; // we're at the end of the stream - nothing left to read
810 
811     int bytesInBuffer = this.bufferEnd - this.bufferIndex;
812     if (bytesInBuffer > 0)
813         System.arraycopy(this.streamBuffer, this.bufferIndex, this.streamBuffer, 0, bytesInBuffer);
814     this.bufferEnd = bytesInBuffer + stream.read(this.streamBuffer, bytesInBuffer, this.bufferIndex);
815     this.bufferIndex = 0;
816 }
817 /**
818  * Reads in a string from the specified data input stream. The 
819  * string has been encoded using a modified UTF-8 format. 
820  * <p>
821  * The first two bytes are read as an unsigned short.
822  * This value gives the number of following bytes that are in the encoded string,
823  * not the length of the resulting string. The following bytes are then 
824  * interpreted as bytes encoding characters in the UTF-8 format 
825  * and are converted into characters. 
826  * <p>
827  * This method blocks until all the bytes are read, the end of the 
828  * stream is detected, or an exception is thrown. 
829  *
830  * @param      stream   a data input stream.
831  * @return     UTF decoded string as a char array
832  * @exception  EOFException if this end of data input is reached while reading it.
833  * @exception  IOException if an I/O error occurs while reading data input.
834  * @exception  UTFDataFormatException  if the bytes do not represent a
835  *               valid UTF-8 encoding of a Unicode string.
836  */
837 private char[] readStreamChars(FileInputStream stream) throws IOException {
838     // read chars array length
839     if (stream != null && this.bufferIndex + 2 >= this.bufferEnd)
840         readStreamBuffer(stream);
841     int length = (streamBuffer[this.bufferIndex++] & 0xFF) << 8;
842     length += this.streamBuffer[this.bufferIndex++] & 0xFF;
843 
844     // fill the chars from bytes buffer
845     char[] word = new char[length];
846     int i = 0;
847     while (i < length) {
848         // how many characters can be decoded without refilling the buffer?
849         int charsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 3);
850         // all the characters must already be in the buffer if we're at the end of the stream
851         if (charsInBuffer > length || this.bufferEnd != this.streamBuffer.length || stream == null)
852             charsInBuffer = length;
853         while (i < charsInBuffer) {
854             byte b = this.streamBuffer[this.bufferIndex++];
855             switch (b & 0xF0) {
856                 case 0x00 :
857                 case 0x10 :
858                 case 0x20 :
859                 case 0x30 :
860                 case 0x40 :
861                 case 0x50 :
862                 case 0x60 :
863                 case 0x70 :
864                     word[i++]= (char) b;
865                     break;
866                 case 0xC0 :
867                 case 0xD0 :
868                     char next = (char) this.streamBuffer[this.bufferIndex++];
869                     if ((next & 0xC0) != 0x80) {
870                         throw new UTFDataFormatException();
871                     }
872                     char ch = (char) ((b & 0x1F) << 6);
873                     ch |= next & 0x3F;
874                     word[i++] = ch;
875                     break;
876                 case 0xE0 :
877                     char first = (char) this.streamBuffer[this.bufferIndex++];
878                     char second = (char) this.streamBuffer[this.bufferIndex++];
879                     if ((first & second & 0xC0) != 0x80) {
880                         throw new UTFDataFormatException();
881                     }
882                     ch = (char) ((b & 0x0F) << 12);
883                     ch |= ((first& 0x3F) << 6);
884                     ch |= second & 0x3F;
885                     word[i++] = ch;
886                     break;
887                 default:
888                     throw new UTFDataFormatException();
889             }
890         }
891         if (i < length && stream != null)
892             readStreamBuffer(stream);
893     }
894     return word;
895 }
896 private int[] readStreamDocumentArray(FileInputStream stream, int arraySize) throws IOException {
897     int[] indexes = new int[arraySize];
898     if (arraySize == 0) return indexes;
899 
900     int i = 0;
901     switch (this.documentReferenceSize) {
902         case 1 :
903             while (i < arraySize) {
904                 // how many bytes without refilling the buffer?
905                 int bytesInBuffer = i + this.bufferEnd - this.bufferIndex;
906                 if (bytesInBuffer > arraySize)
907                     bytesInBuffer = arraySize;
908                 while (i < bytesInBuffer) {
909                     indexes[i++] = this.streamBuffer[this.bufferIndex++] & 0xFF;
910                 }
911                 if (i < arraySize && stream != null)
912                     readStreamBuffer(stream);
913             }
914             break;
915         case 2 :
916             while (i < arraySize) {
917                 // how many shorts without refilling the buffer?
918                 int shortsInBuffer = i + ((this.bufferEnd - this.bufferIndex) / 2);
919                 if (shortsInBuffer > arraySize)
920                     shortsInBuffer = arraySize;
921                 while (i < shortsInBuffer) {
922                     int val = (this.streamBuffer[this.bufferIndex++] & 0xFF) << 8;
923                     indexes[i++] = val + (this.streamBuffer[this.bufferIndex++] & 0xFF);
924                 }
925                 if (i < arraySize && stream != null)
926                     readStreamBuffer(stream);
927             }
928             break;
929         default :
930             while (i < arraySize) {
931                 indexes[i++] = readStreamInt(stream);
932             }
933             break;
934     }
935     return indexes;
936 }
937 private int readStreamInt(FileInputStream stream) throws IOException {
938     if (this.bufferIndex + 4 >= this.bufferEnd) {
939         readStreamBuffer(stream);
940     }
941     int val = (streamBuffer[this.bufferIndex++] & 0xFF) << 24;
942     val += (streamBuffer[this.bufferIndex++] & 0xFF) << 16;
943     val += (streamBuffer[this.bufferIndex++] & 0xFF) << 8;
944     return val + (streamBuffer[this.bufferIndex++] & 0xFF);
945 }
946 private void writeAllDocumentNames(String  [] sortedDocNames, FileOutputStream stream) throws IOException {
947     if (sortedDocNames.length == 0)
948         throw new IllegalArgumentException  ();
949 
950     // assume the file was just created by initializeFrom()
951     this.streamBuffer = new byte[BUFFER_WRITE_SIZE];
952     this.bufferIndex = 0;
953     this.streamEnd = 0;
954 
955     // in order, write: SIGNATURE & headerInfoOffset place holder, then each compressed chunk of document names
956     writeStreamChars(stream, SIGNATURE_CHARS);
957     this.headerInfoOffset = this.streamEnd;
958     writeStreamInt(stream, -1); // will overwrite with correct value later
959 
960     int size = sortedDocNames.length;
961     this.numberOfChunks = (size / CHUNK_SIZE) + 1;
962     this.sizeOfLastChunk = size % CHUNK_SIZE;
963     if (this.sizeOfLastChunk == 0) {
964         this.numberOfChunks--;
965         this.sizeOfLastChunk = CHUNK_SIZE;
966     }
967     this.documentReferenceSize = size <= 0x7F ? 1 : (size <= 0x7FFF ? 2 : 4); // number of bytes used to encode a reference
968 
969     this.chunkOffsets = new int[this.numberOfChunks];
970     int lastIndex = this.numberOfChunks - 1;
971     for (int i = 0; i < this.numberOfChunks; i++) {
972         this.chunkOffsets[i] = this.streamEnd;
973 
974         int chunkSize = i == lastIndex ? this.sizeOfLastChunk : CHUNK_SIZE;
975         int chunkIndex = i * CHUNK_SIZE;
976         String   current = sortedDocNames[chunkIndex];
977         writeStreamChars(stream, current.toCharArray());
978         for (int j = 1; j < chunkSize; j++) {
979             String   next = sortedDocNames[chunkIndex + j];
980             int len1 = current.length();
981             int len2 = next.length();
982             int max = len1 < len2 ? len1 : len2;
983             int start = 0; // number of identical characters at the beginning (also the index of first character that is different)
984             while (current.charAt(start) == next.charAt(start)) {
985                 start++;
986                 if (max == start) break; // current is 'abba', next is 'abbab'
987             }
988             if (start > 255) start = 255;
989 
990             int end = 0; // number of identical characters at the end
991             while (current.charAt(--len1) == next.charAt(--len2)) {
992                 end++;
993                 if (len2 == start) break; // current is 'abbba', next is 'abba'
994                 if (len1 == 0) break; // current is 'xabc', next is 'xyabc'
995             }
996             if (end > 255) end = 255;
997             if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE)  {
998                 stream.write(this.streamBuffer, 0, this.bufferIndex);
999                 this.bufferIndex = 0;
1000            }
1001            this.streamBuffer[this.bufferIndex++] = (byte) start;
1002            this.streamBuffer[this.bufferIndex++] = (byte) end;
1003            this.streamEnd += 2;
1004
1005            int last = next.length() - end;
1006            writeStreamChars(stream, (start < last ? CharOperation.subarray(next.toCharArray(), start, last) : CharOperation.NO_CHAR));
1007            current = next;
1008        }
1009    }
1010    this.startOfCategoryTables = this.streamEnd + 1;
1011}
1012private void writeCategories(FileOutputStream stream) throws IOException {
1013    char[][] categoryNames = this.categoryTables.keyTable;
1014    Object  [] tables = this.categoryTables.valueTable;
1015    for (int i = 0, l = categoryNames.length; i < l; i++)
1016        if (categoryNames[i] != null)
1017            writeCategoryTable(categoryNames[i], (HashtableOfObject) tables[i], stream);
1018    this.categoryTables = null;
1019}
1020private void writeCategoryTable(char[] categoryName, HashtableOfObject wordsToDocs, FileOutputStream stream) throws IOException {
1021    // the format of a category table is as follows:
1022    // any document number arrays with >= 256 elements are written before the table (the offset to each array is remembered)
1023    // then the number of word->int[] pairs in the table is written
1024    // for each word -> int[] pair, the word is written followed by:
1025    //      an int <= 0 if the array size == 1
1026    //      an int > 1 & < 256 for the size of the array if its > 1 & < 256, the document array follows immediately
1027    //      256 if the array size >= 256 followed by another int which is the offset to the array (written prior to the table)
1028
1029    int largeArraySize = 256;
1030    Object  [] values = wordsToDocs.valueTable;
1031    for (int i = 0, l = values.length; i < l; i++) {
1032        Object   o = values[i];
1033        if (o != null) {
1034            if (o instanceof IntList)
1035                o = values[i] = ((IntList) values[i]).asArray();
1036            int[] documentNumbers = (int[]) o;
1037            if (documentNumbers.length >= largeArraySize) {
1038                values[i] = new Integer  (this.streamEnd);
1039                writeDocumentNumbers(documentNumbers, stream);
1040            }
1041        }
1042    }
1043
1044    this.categoryOffsets.put(categoryName, this.streamEnd); // remember the offset to the start of the table
1045    this.categoryTables.put(categoryName, null); // flush cached table
1046    writeStreamInt(stream, wordsToDocs.elementSize);
1047    char[][] words = wordsToDocs.keyTable;
1048    for (int i = 0, l = words.length; i < l; i++) {
1049        Object   o = values[i];
1050        if (o != null) {
1051            writeStreamChars(stream, words[i]);
1052            if (o instanceof int[]) {
1053                int[] documentNumbers = (int[]) o;
1054                if (documentNumbers.length == 1)
1055                    writeStreamInt(stream, -documentNumbers[0]); // store an array of 1 element by negating the documentNumber (can be zero)
1056                else
1057                    writeDocumentNumbers(documentNumbers, stream);
1058            } else {
1059                writeStreamInt(stream, largeArraySize); // mark to identify that an offset follows
1060                writeStreamInt(stream, ((Integer  ) o).intValue()); // offset in the file of the array of document numbers
1061            }
1062        }
1063    }
1064}
1065private void writeDocumentNumbers(int[] documentNumbers, FileOutputStream stream) throws IOException {
1066    // must store length as a positive int to detect in-lined array of 1 element
1067    int length = documentNumbers.length;
1068    writeStreamInt(stream, length);
1069    Util.sort(documentNumbers);
1070    int start = 0;
1071    switch (this.documentReferenceSize) {
1072        case 1 :
1073            while ((this.bufferIndex + length - start) >= BUFFER_WRITE_SIZE) {
1074                // when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part
1075                int bytesLeft = BUFFER_WRITE_SIZE - this.bufferIndex;
1076                for (int i=0; i < bytesLeft; i++) {
1077                    this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1078                }
1079                stream.write(this.streamBuffer, 0, this.bufferIndex);
1080                this.bufferIndex = 0;
1081            }
1082            while (start < length) {
1083                this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1084            }
1085            this.streamEnd += length;
1086            break;
1087        case 2 :
1088            while ((this.bufferIndex + ((length - start) * 2)) >= BUFFER_WRITE_SIZE) {
1089                // when documentNumbers is large, write BUFFER_WRITE_SIZE parts & fall thru to write the last part
1090                int shortsLeft = (BUFFER_WRITE_SIZE - this.bufferIndex) / 2;
1091                for (int i=0; i < shortsLeft; i++) {
1092                    this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8);
1093                    this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1094                }
1095                stream.write(this.streamBuffer, 0, this.bufferIndex);
1096                this.bufferIndex = 0;
1097            }
1098            while (start < length) {
1099                this.streamBuffer[this.bufferIndex++] = (byte) (documentNumbers[start] >> 8);
1100                this.streamBuffer[this.bufferIndex++] = (byte) documentNumbers[start++];
1101            }
1102            this.streamEnd += length * 2;
1103            break;
1104        default :
1105            while (start < length) {
1106                writeStreamInt(stream, documentNumbers[start++]);
1107            }
1108            break;
1109    }
1110}
1111private void writeHeaderInfo(FileOutputStream stream) throws IOException {
1112    writeStreamInt(stream, this.numberOfChunks);
1113    if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE)  {
1114        stream.write(this.streamBuffer, 0, this.bufferIndex);
1115        this.bufferIndex = 0;
1116    }
1117    this.streamBuffer[this.bufferIndex++] = (byte) this.sizeOfLastChunk;
1118    this.streamBuffer[this.bufferIndex++] = (byte) this.documentReferenceSize;
1119    this.streamEnd += 2;
1120
1121    // apend the file with chunk offsets
1122    for (int i = 0; i < this.numberOfChunks; i++) {
1123        writeStreamInt(stream, this.chunkOffsets[i]);
1124    }
1125
1126    writeStreamInt(stream, this.startOfCategoryTables);
1127
1128    // append the file with the category offsets... # of name -> offset pairs, followed by each name & an offset to its word->doc# table
1129    writeStreamInt(stream, this.categoryOffsets.elementSize);
1130    char[][] categoryNames = this.categoryOffsets.keyTable;
1131    int[] offsets = this.categoryOffsets.valueTable;
1132    for (int i = 0, l = categoryNames.length; i < l; i++) {
1133        if (categoryNames[i] != null) {
1134            writeStreamChars(stream, categoryNames[i]);
1135            writeStreamInt(stream, offsets[i]);
1136        }
1137    }
1138    // ensure buffer is written to the stream
1139    if (this.bufferIndex > 0) {
1140        stream.write(this.streamBuffer, 0, this.bufferIndex);
1141        this.bufferIndex = 0;
1142    }
1143}
1144private void writeOffsetToHeader(int offsetToHeader) throws IOException {
1145    if (offsetToHeader > 0) {
1146        RandomAccessFile file = new RandomAccessFile(this.indexFile, "rw"); //$NON-NLS-1$
1147        try {
1148            file.seek(this.headerInfoOffset); // offset to position in header
1149            file.writeInt(offsetToHeader);
1150            this.headerInfoOffset = offsetToHeader; // update to reflect the correct offset
1151        } finally {
1152            file.close();
1153        }
1154    }
1155}
1156/**
1157 * Writes a string to the given output stream using UTF-8 
1158 * encoding in a machine-independent manner. 
1159 * <p>
1160 * First, two bytes of the array are giving the number of bytes to 
1161 * follow. This value is the number of bytes actually written out, 
1162 * not the length of the string. Following the length, each character 
1163 * of the string is put in the bytes array, in sequence, using the UTF-8
1164 * encoding for the character.
1165 * </p>
1166 * <p>
1167 * Then the entire byte array is written to the output stream
1168 * using {@link OutputStream#write(byte[], int, int)} method.
1169 * </p>
1170 *
1171 * @param array char array to be written.
1172 * @exception  IOException  if an I/O error occurs while writting
1173 *  the bytes array to the stream.
1174 */
1175private void writeStreamChars(FileOutputStream stream, char[] array) throws IOException {
1176    if ((this.bufferIndex + 2) >= BUFFER_WRITE_SIZE)  {
1177        stream.write(this.streamBuffer, 0, this.bufferIndex);
1178        this.bufferIndex = 0;
1179    }
1180    int length = array.length;
1181    this.streamBuffer[this.bufferIndex++] = (byte) ((length >>> 8) & 0xFF); // store chars array length instead of bytes
1182    this.streamBuffer[this.bufferIndex++] = (byte) (length & 0xFF); // this will allow to read it faster
1183    this.streamEnd += 2;
1184
1185    // we're assuming that very few char[] are so large that we need to flush the buffer more than once, if at all
1186    int totalBytesNeeded = length * 3;
1187    if (totalBytesNeeded <= BUFFER_WRITE_SIZE) {
1188        if (this.bufferIndex + totalBytesNeeded > BUFFER_WRITE_SIZE) {
1189            // flush the buffer now to make sure there is room for the array
1190            stream.write(this.streamBuffer, 0, this.bufferIndex);
1191            this.bufferIndex = 0;
1192        }
1193        writeStreamChars(stream, array, 0, length);
1194    } else {
1195        int charsPerWrite = BUFFER_WRITE_SIZE / 3;
1196        int start = 0;
1197        while (start < length) {
1198            stream.write(this.streamBuffer, 0, this.bufferIndex);
1199            this.bufferIndex = 0;
1200            int charsLeftToWrite = length - start;
1201            int end = start + (charsPerWrite < charsLeftToWrite ? charsPerWrite : charsLeftToWrite);
1202            writeStreamChars(stream, array, start, end);
1203            start = end;
1204        }
1205    }
1206}
1207private void writeStreamChars(FileOutputStream stream, char[] array, int start, int end) throws IOException {
1208    // start can NOT be == end
1209    // must have checked that there is enough room for end - start * 3 bytes in the buffer
1210
1211    int oldIndex = this.bufferIndex;
1212    while (start < end) {
1213        int ch = array[start++];
1214        if ((ch & 0x007F) == ch) {
1215            this.streamBuffer[this.bufferIndex++] = (byte) ch;
1216        } else if ((ch & 0x07FF) == ch) {
1217            // first two bits are stored in first byte
1218            byte b = (byte) (ch >> 6);
1219            b &= 0x1F;
1220            b |= 0xC0;
1221            this.streamBuffer[this.bufferIndex++] = b;
1222            // last six bits are stored in second byte
1223            b = (byte) (ch & 0x3F);
1224            b |= 0x80;
1225            this.streamBuffer[this.bufferIndex++] = b;
1226        } else {
1227            // first four bits are stored in first byte
1228            byte b = (byte) (ch >> 12);
1229            b &= 0x0F;
1230            b |= 0xE0;
1231            this.streamBuffer[this.bufferIndex++] = b;
1232            // six following bits are stored in second byte
1233            b = (byte) (ch >> 6);
1234            b &= 0x3F;
1235            b |= 0x80;
1236            this.streamBuffer[this.bufferIndex++] = b;
1237            // last six bits are stored in third byte
1238            b = (byte) (ch & 0x3F);
1239            b |= 0x80;
1240            this.streamBuffer[this.bufferIndex++] = b;
1241        }
1242    }
1243    this.streamEnd += this.bufferIndex - oldIndex;
1244}
1245private void writeStreamInt(FileOutputStream stream, int val) throws IOException {
1246    if ((this.bufferIndex + 4) >= BUFFER_WRITE_SIZE)  {
1247        stream.write(this.streamBuffer, 0, this.bufferIndex);
1248        this.bufferIndex = 0;
1249    }
1250    this.streamBuffer[this.bufferIndex++] = (byte) (val >> 24);
1251    this.streamBuffer[this.bufferIndex++] = (byte) (val >> 16);
1252    this.streamBuffer[this.bufferIndex++] = (byte) (val >> 8);
1253    this.streamBuffer[this.bufferIndex++] = (byte) val;
1254    this.streamEnd += 4;
1255}
1256}
1257
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags