KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > mybatchfwk > history > KeysFileIndexer


1 /*
2  * MyBatchFramework - Open-source batch framework.
3  * Copyright (C) 2006 Jérôme Bertèche cyberteche@users.sourceforge.net
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * Jérôme Bertèche
16  * Email: cyberteche@users.sourceforge.net
17  */

18 package net.sf.mybatchfwk.history;
19
20 import java.io.File JavaDoc;
21 import java.io.IOException JavaDoc;
22 import java.io.RandomAccessFile JavaDoc;
23
24 /**
25  * The KeysFileIndexer classe is used in order to index keys contained into a file (one line = one key).<br>
26  * This indexer (based on the HashMap class) store into the memory only the hash key and the line number of each key
27  * (so it can be used to work with a large number of keys).
28  *
29  * @author Jérôme Bertèche (cyberteche@users.sourceforge.net)
30  */

31 public class KeysFileIndexer {
32     
33     /**
34      * The keys reader
35      */

36     private RandomAccessFile JavaDoc reader;
37
38     /**
39      * The default initial capacity - MUST be a power of two.
40      */

41     static final int DEFAULT_INITIAL_CAPACITY = 16;
42     
43     /**
44      * The maximum capacity, used if a higher value is implicitly specified
45      * by either of the constructors with arguments.
46      * MUST be a power of two <= 1<<30.
47      */

48     static final int MAXIMUM_CAPACITY = 1 << 30;
49     
50     /**
51      * The load factor used when none specified in constructor.
52      **/

53     static final float DEFAULT_LOAD_FACTOR = 0.75f;
54     
55     /**
56      * The table, resized as necessary. Length MUST Always be a power of two.
57      */

58     transient Entry[] table;
59     
60     /**
61      * The number of key-value mappings contained in this identity hash map.
62      */

63     transient int size;
64     
65     /**
66      * The next size value at which to resize (capacity * load factor).
67      * @serial
68      */

69     int threshold;
70     
71     /**
72      * The load factor for the hash table.
73      *
74      * @serial
75      */

76     final float loadFactor;
77     
78     /**
79      * The number of times this KeysFileIndexer has been structurally modified
80      * Structural modifications are those that change the number of mappings in
81      * the KeysFileIndexer or otherwise modify its internal structure (e.g.,
82      * rehash). This field is used to make iterators on Collection-views of
83      * the KeysFileIndexer fail-fast. (See ConcurrentModificationException).
84      */

85     transient volatile int modCount;
86
87     /**
88      * Constructs an empty <tt>KeysFileIndexer</tt> with the default initial capacity
89      * (16) and the default load factor (0.75).
90      * @throws IOException
91      */

92     public KeysFileIndexer(File JavaDoc keysFile, String JavaDoc mode) throws IOException JavaDoc {
93         
94         this.loadFactor = DEFAULT_LOAD_FACTOR;
95         threshold = (int)(DEFAULT_INITIAL_CAPACITY * DEFAULT_LOAD_FACTOR);
96         table = new Entry[DEFAULT_INITIAL_CAPACITY];
97         
98         reader = new RandomAccessFile JavaDoc(keysFile, mode);
99         buildIndex();
100     }
101     
102     /**
103      * Index the keys of the file
104      * @throws IOException
105      */

106     protected void buildIndex() throws IOException JavaDoc {
107         String JavaDoc key = null;
108         long pointer = 0;
109         while ((key = reader.readLine()) != null) {
110             if (key.length() == 0) {
111                 pointer = reader.getFilePointer();
112                 continue;
113             }
114             
115             putKey(pointer, key);
116             
117             pointer = reader.getFilePointer();
118         }
119     }
120     
121     /**
122      * Close the keys reader
123      * @throws IOException
124      */

125     public void closeReader() throws IOException JavaDoc {
126         reader.close();
127     }
128
129     /**
130      * Returns a hash value for the specified object. In addition to
131      * the object's own hashCode, this method applies a "supplemental
132      * hash function," which defends against poor quality hash functions.
133      * This is critical because HashMap uses power-of two length
134      * hash tables.<p>
135      *
136      * The shift distances in this function were chosen as the result
137      * of an automated search over the entire four-dimensional search space.
138      */

139     static int hash(Object JavaDoc x) {
140         int h = x.hashCode();
141
142         h += ~(h << 9);
143         h ^= (h >>> 14);
144         h += (h << 4);
145         h ^= (h >>> 10);
146         return h;
147     }
148
149     /**
150      * Returns index for hash code h.
151      */

152     static int indexFor(int h, int length) {
153         return h & (length-1);
154     }
155  
156     /**
157      * Returns the number of key-value mappings in this map.
158      *
159      * @return the number of key-value mappings in this map.
160      */

161     public int size() {
162         return size;
163     }
164   
165     /**
166      * Returns <tt>true</tt> if this map contains no key-value mappings.
167      *
168      * @return <tt>true</tt> if this map contains no key-value mappings.
169      */

170     public boolean isEmpty() {
171         return size == 0;
172     }
173     
174     /**
175      * Add a new key to the end of the file
176      * @param key
177      * @throws IOException
178      */

179     public void addKey(String JavaDoc key) throws IOException JavaDoc {
180         reader.seek(reader.length());
181         long pointer = reader.getFilePointer();
182         reader.write(key.getBytes());
183         reader.write("\n".getBytes());
184         putKey(pointer, key);
185     }
186
187     /**
188      * Add a new entry from the given key
189      * @param pointer
190      * @param key
191      */

192     protected void putKey(long pointer, String JavaDoc key) {
193         int hash = hash(key);
194         int i = indexFor(hash, table.length);
195
196         for (Entry e = table[i]; e != null; e = e.next) {
197             if (e.hash == hash && (pointer == e.pointer)) {
198                 return;
199             }
200         }
201
202         modCount++;
203         addEntry(hash, pointer, i);
204         return;
205     }
206     
207     /**
208      * Returns <tt>true</tt> if this map contains a mapping for the
209      * specified key.
210      *
211      * @param key The key whose presence in this map is to be tested
212      * @return <tt>true</tt> if this map contains a mapping for the specified
213      * key.
214      * @throws IOException
215      */

216     public boolean containsKey(String JavaDoc key) throws IOException JavaDoc {
217         int hash = hash(key);
218         int i = indexFor(hash, table.length);
219         Entry e = table[i];
220         while (e != null) {
221             if (e.hash == hash) {
222                 reader.seek(e.pointer);
223                 if (key.equals(reader.readLine())) {
224                     return true;
225                 }
226             }
227             e = e.next;
228         }
229         return false;
230     }
231
232     /**
233      * Rehashes the contents of this map into a new array with a
234      * larger capacity. This method is called automatically when the
235      * number of keys in this map reaches its threshold.
236      *
237      * If current capacity is MAXIMUM_CAPACITY, this method does not
238      * resize the map, but sets threshold to Integer.MAX_VALUE.
239      * This has the effect of preventing future calls.
240      *
241      * @param newCapacity the new capacity, MUST be a power of two;
242      * must be greater than current capacity unless current
243      * capacity is MAXIMUM_CAPACITY (in which case value
244      * is irrelevant).
245      */

246     void resize(int newCapacity) {
247         Entry[] oldTable = table;
248         int oldCapacity = oldTable.length;
249         if (oldCapacity == MAXIMUM_CAPACITY) {
250             threshold = Integer.MAX_VALUE;
251             return;
252         }
253
254         Entry[] newTable = new Entry[newCapacity];
255         transfer(newTable);
256         table = newTable;
257         threshold = (int)(newCapacity * loadFactor);
258     }
259
260     /**
261      * Transfer all entries from current table to newTable.
262      */

263     void transfer(Entry[] newTable) {
264         Entry[] src = table;
265         int newCapacity = newTable.length;
266         for (int j = 0; j < src.length; j++) {
267             Entry e = src[j];
268             if (e != null) {
269                 src[j] = null;
270                 do {
271                     Entry next = e.next;
272                     int i = indexFor(e.hash, newCapacity);
273                     e.next = newTable[i];
274                     newTable[i] = e;
275                     e = next;
276                 } while (e != null);
277             }
278         }
279     }
280
281     /**
282      * An entry contains a hash key and a pointer to the line
283      */

284     static class Entry {
285         final int hash;
286         final long pointer;
287         Entry next;
288
289         /**
290          * Create new entry.
291          */

292         Entry(int hash, long pointer, Entry next) {
293             this.next = next;
294             this.pointer = pointer;
295             this.hash = hash;
296         }
297
298         public long getPointer() {
299             return pointer;
300         }
301     }
302
303     /**
304      * Add a new entry with the specified key, value and hash code to
305      * the specified bucket. It is the responsibility of this
306      * method to resize the table if appropriate.
307      *
308      * Subclass overrides this to alter the behavior of put method.
309      */

310     void addEntry(int hash, long pointer, int bucketIndex) {
311         Entry e = table[bucketIndex];
312         table[bucketIndex] = new Entry(hash, pointer, e);
313         if (size++ >= threshold)
314             resize(2 * table.length);
315     }
316 }
317
Popular Tags