KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > util > SymbolTable


1 /*
2  * Copyright 2000-2002,2004,2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.util;
18
19 /**
20  * This class is a symbol table implementation that guarantees that
21  * strings used as identifiers are unique references. Multiple calls
22  * to <code>addSymbol</code> will always return the same string
23  * reference.
24  * <p>
25  * The symbol table performs the same task as <code>String.intern()</code>
26  * with the following differences:
27  * <ul>
28  * <li>
29  * A new string object does not need to be created in order to
30  * retrieve a unique reference. Symbols can be added by using
31  * a series of characters in a character array.
32  * </li>
33  * <li>
34  * Users of the symbol table can provide their own symbol hashing
35  * implementation. For example, a simple string hashing algorithm
36  * may fail to produce a balanced set of hashcodes for symbols
37  * that are <em>mostly</em> unique. Strings with similar leading
38  * characters are especially prone to this poor hashing behavior.
39  * </li>
40  * </ul>
41  *
42  * An instance of <code>SymbolTable</code> has two parameters that affect its
43  * performance: <i>initial capacity</i> and <i>load factor</i>. The
44  * <i>capacity</i> is the number of <i>buckets</i> in the SymbolTable, and the
45  * <i>initial capacity</i> is simply the capacity at the time the SymbolTable
46  * is created. Note that the SymbolTable is <i>open</i>: in the case of a "hash
47  * collision", a single bucket stores multiple entries, which must be searched
48  * sequentially. The <i>load factor</i> is a measure of how full the SymbolTable
49  * is allowed to get before its capacity is automatically increased.
50  * When the number of entries in the SymbolTable exceeds the product of the load
51  * factor and the current capacity, the capacity is increased by calling the
52  * <code>rehash</code> method.<p>
53  *
54  * Generally, the default load factor (.75) offers a good tradeoff between
55  * time and space costs. Higher values decrease the space overhead but
56  * increase the time cost to look up an entry (which is reflected in most
57  * <tt>SymbolTable</tt> operations, including <tt>addSymbol</tt> and <tt>containsSymbol</tt>).<p>
58  *
59  * The initial capacity controls a tradeoff between wasted space and the
60  * need for <code>rehash</code> operations, which are time-consuming.
61  * No <code>rehash</code> operations will <i>ever</i> occur if the initial
62  * capacity is greater than the maximum number of entries the
63  * <tt>Hashtable</tt> will contain divided by its load factor. However,
64  * setting the initial capacity too high can waste space.<p>
65  *
66  * If many entries are to be made into a <code>SymbolTable</code>,
67  * creating it with a sufficiently large capacity may allow the
68  * entries to be inserted more efficiently than letting it perform
69  * automatic rehashing as needed to grow the table. <p>
70
71  * @see SymbolHash
72  *
73  * @author Andy Clark
74  * @author John Kim, IBM
75  *
76  * @version $Id: SymbolTable.java,v 1.10 2005/03/14 23:09:35 mrglavas Exp $
77  */

78 public class SymbolTable {
79
80     //
81
// Constants
82
//
83

84     /** Default table size. */
85     protected static final int TABLE_SIZE = 101;
86
87     //
88
// Data
89
//
90

91     /** Buckets. */
92     protected Entry[] fBuckets = null;
93
94     /** actual table size **/
95     protected int fTableSize;
96
97     /** The total number of entries in the hash table. */
98     protected transient int fCount;
99
100     /** The table is rehashed when its size exceeds this threshold. (The
101      * value of this field is (int)(capacity * loadFactor).) */

102     protected int fThreshold;
103                              
104     /** The load factor for the SymbolTable. */
105     protected float fLoadFactor;
106
107     //
108
// Constructors
109
//
110

111     /**
112      * Constructs a new, empty SymbolTable with the specified initial
113      * capacity and the specified load factor.
114      *
115      * @param initialCapacity the initial capacity of the SymbolTable.
116      * @param loadFactor the load factor of the SymbolTable.
117      * @throws IllegalArgumentException if the initial capacity is less
118      * than zero, or if the load factor is nonpositive.
119      */

120     public SymbolTable(int initialCapacity, float loadFactor) {
121         
122         if (initialCapacity < 0) {
123             throw new IllegalArgumentException JavaDoc("Illegal Capacity: " + initialCapacity);
124         }
125         
126         if (loadFactor <= 0 || Float.isNaN(loadFactor)) {
127             throw new IllegalArgumentException JavaDoc("Illegal Load: " + loadFactor);
128         }
129         
130         if (initialCapacity == 0) {
131             initialCapacity = 1;
132         }
133         
134         fLoadFactor = loadFactor;
135         fTableSize = initialCapacity;
136         fBuckets = new Entry[fTableSize];
137         fThreshold = (int)(fTableSize * loadFactor);
138         fCount = 0;
139     }
140
141     /**
142      * Constructs a new, empty SymbolTable with the specified initial capacity
143      * and default load factor, which is <tt>0.75</tt>.
144      *
145      * @param initialCapacity the initial capacity of the hashtable.
146      * @throws IllegalArgumentException if the initial capacity is less
147      * than zero.
148      */

149     public SymbolTable(int initialCapacity) {
150         this(initialCapacity, 0.75f);
151     }
152     
153     /**
154      * Constructs a new, empty SymbolTable with a default initial capacity (101)
155      * and load factor, which is <tt>0.75</tt>.
156      */

157     public SymbolTable() {
158         this(TABLE_SIZE, 0.75f);
159     }
160
161     //
162
// Public methods
163
//
164

165     /**
166      * Adds the specified symbol to the symbol table and returns a
167      * reference to the unique symbol. If the symbol already exists,
168      * the previous symbol reference is returned instead, in order
169      * guarantee that symbol references remain unique.
170      *
171      * @param symbol The new symbol.
172      */

173     public String JavaDoc addSymbol(String JavaDoc symbol) {
174         
175         // search for identical symbol
176
int bucket = hash(symbol) % fTableSize;
177         for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
178             if (entry.symbol.equals(symbol)) {
179                 return entry.symbol;
180             }
181         }
182         
183         if (fCount >= fThreshold) {
184             // Rehash the table if the threshold is exceeded
185
rehash();
186             bucket = hash(symbol) % fTableSize;
187         }
188         
189         // create new entry
190
Entry entry = new Entry(symbol, fBuckets[bucket]);
191         fBuckets[bucket] = entry;
192         ++fCount;
193         return entry.symbol;
194         
195     } // addSymbol(String):String
196

197     /**
198      * Adds the specified symbol to the symbol table and returns a
199      * reference to the unique symbol. If the symbol already exists,
200      * the previous symbol reference is returned instead, in order
201      * guarantee that symbol references remain unique.
202      *
203      * @param buffer The buffer containing the new symbol.
204      * @param offset The offset into the buffer of the new symbol.
205      * @param length The length of the new symbol in the buffer.
206      */

207     public String JavaDoc addSymbol(char[] buffer, int offset, int length) {
208         
209         // search for identical symbol
210
int bucket = hash(buffer, offset, length) % fTableSize;
211         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
212             if (length == entry.characters.length) {
213                 for (int i = 0; i < length; i++) {
214                     if (buffer[offset + i] != entry.characters[i]) {
215                         continue OUTER;
216                     }
217                 }
218                 return entry.symbol;
219             }
220         }
221         
222         if (fCount >= fThreshold) {
223             // Rehash the table if the threshold is exceeded
224
rehash();
225             bucket = hash(buffer, offset, length) % fTableSize;
226         }
227         
228         // add new entry
229
Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
230         fBuckets[bucket] = entry;
231         ++fCount;
232         return entry.symbol;
233         
234     } // addSymbol(char[],int,int):String
235

236     /**
237      * Returns a hashcode value for the specified symbol. The value
238      * returned by this method must be identical to the value returned
239      * by the <code>hash(char[],int,int)</code> method when called
240      * with the character array that comprises the symbol string.
241      *
242      * @param symbol The symbol to hash.
243      */

244     public int hash(String JavaDoc symbol) {
245         return symbol.hashCode() & 0x7FFFFFF;
246     } // hash(String):int
247

248     /**
249      * Returns a hashcode value for the specified symbol information.
250      * The value returned by this method must be identical to the value
251      * returned by the <code>hash(String)</code> method when called
252      * with the string object created from the symbol information.
253      *
254      * @param buffer The character buffer containing the symbol.
255      * @param offset The offset into the character buffer of the start
256      * of the symbol.
257      * @param length The length of the symbol.
258      */

259     public int hash(char[] buffer, int offset, int length) {
260
261         int code = 0;
262         for (int i = 0; i < length; ++i) {
263             code = code * 31 + buffer[offset + i];
264         }
265         return code & 0x7FFFFFF;
266
267     } // hash(char[],int,int):int
268

269     /**
270      * Increases the capacity of and internally reorganizes this
271      * SymbolTable, in order to accommodate and access its entries more
272      * efficiently. This method is called automatically when the
273      * number of keys in the SymbolTable exceeds this hashtable's capacity
274      * and load factor.
275      */

276     protected void rehash() {
277
278         int oldCapacity = fBuckets.length;
279         Entry[] oldTable = fBuckets;
280
281         int newCapacity = oldCapacity * 2 + 1;
282         Entry[] newTable = new Entry[newCapacity];
283
284         fThreshold = (int)(newCapacity * fLoadFactor);
285         fBuckets = newTable;
286         fTableSize = fBuckets.length;
287
288         for (int i = oldCapacity ; i-- > 0 ;) {
289             for (Entry old = oldTable[i] ; old != null ; ) {
290                 Entry e = old;
291                 old = old.next;
292
293                 int index = hash(e.characters, 0, e.characters.length) % newCapacity;
294                 e.next = newTable[index];
295                 newTable[index] = e;
296             }
297         }
298     }
299
300     /**
301      * Returns true if the symbol table already contains the specified
302      * symbol.
303      *
304      * @param symbol The symbol to look for.
305      */

306     public boolean containsSymbol(String JavaDoc symbol) {
307
308         // search for identical symbol
309
int bucket = hash(symbol) % fTableSize;
310         int length = symbol.length();
311         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
312             if (length == entry.characters.length) {
313                 for (int i = 0; i < length; i++) {
314                     if (symbol.charAt(i) != entry.characters[i]) {
315                         continue OUTER;
316                     }
317                 }
318                 return true;
319             }
320         }
321
322         return false;
323
324     } // containsSymbol(String):boolean
325

326     /**
327      * Returns true if the symbol table already contains the specified
328      * symbol.
329      *
330      * @param buffer The buffer containing the symbol to look for.
331      * @param offset The offset into the buffer.
332      * @param length The length of the symbol in the buffer.
333      */

334     public boolean containsSymbol(char[] buffer, int offset, int length) {
335
336         // search for identical symbol
337
int bucket = hash(buffer, offset, length) % fTableSize;
338         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
339             if (length == entry.characters.length) {
340                 for (int i = 0; i < length; i++) {
341                     if (buffer[offset + i] != entry.characters[i]) {
342                         continue OUTER;
343                     }
344                 }
345                 return true;
346             }
347         }
348
349         return false;
350
351     } // containsSymbol(char[],int,int):boolean
352

353     //
354
// Classes
355
//
356

357     /**
358      * This class is a symbol table entry. Each entry acts as a node
359      * in a linked list.
360      */

361     protected static final class Entry {
362
363         //
364
// Data
365
//
366

367         /** Symbol. */
368         public String JavaDoc symbol;
369
370         /**
371          * Symbol characters. This information is duplicated here for
372          * comparison performance.
373          */

374         public char[] characters;
375
376         /** The next entry. */
377         public Entry next;
378
379         //
380
// Constructors
381
//
382

383         /**
384          * Constructs a new entry from the specified symbol and next entry
385          * reference.
386          */

387         public Entry(String JavaDoc symbol, Entry next) {
388             this.symbol = symbol.intern();
389             characters = new char[symbol.length()];
390             symbol.getChars(0, characters.length, characters, 0);
391             this.next = next;
392         }
393
394         /**
395          * Constructs a new entry from the specified symbol information and
396          * next entry reference.
397          */

398         public Entry(char[] ch, int offset, int length, Entry next) {
399             characters = new char[length];
400             System.arraycopy(ch, offset, characters, 0, length);
401             symbol = new String JavaDoc(characters).intern();
402             this.next = next;
403         }
404
405     } // class Entry
406

407 } // class SymbolTable
408
Popular Tags