KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > utils > SymbolCache


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.utils;
59
60 /**
61  *
62  * @version
63  */

64 public final class SymbolCache {
65     //
66
// Symbol Cache
67
//
68
public static final int CHAR_OFFSET = 0;
69     public static final int INDEX_OFFSET = 1;
70     public static final int NEXT_OFFSET = 2;
71     public static final int CACHE_RECORD_SIZE = 3;
72
73     // start with 4 entries per level on cache lines other than 0
74
public static final int INITIAL_CACHE_RECORD_COUNT = 4;
75
76     // start with 85 entries on cache line 0 (85*3+1 = 256)
77
public static final int HEAD_INITIAL_CACHE_RECORD_COUNT = 85;
78     //
79
public char[] fSymbolChars = new char[8192];
80     public int fSymbolCharsOffset = 0;
81     public int[][] fCacheLines = new int[256][];
82     public int fCacheLineCount = 0;
83
84 /*
85  *
86  * A brief description of how a SymbolCache is organized:
87  *
88  * The first entry in any fCacheLines[] array is a count of the number of
89  * records stored in that array. Subsequent entries are arranged in groups
90  * of three: a character value; the symbol handle, if this is the last
91  * character in a symbol, or -1 otherwise; and a pointer to the next character
92  * in the symbol, or -1 if there is none. All symbol entries begin in
93  * fCacheLines[0].
94  *
95  * For example, if the symbols "help", "he:p", "hel", and "bob" are
96  * entered, and have symbol indices 11, 22, 33 and 44, respectively, the
97  * fCacheLines array will look something like this (ignoring zero elements):
98  *
99  * fCacheLines[0] = {2, 'h', -1, 1, 'b', -1, 5}
100  * fCacheLines[1] = {1, 'e', -1, 2}
101  * fCacheLines[2] = {2, 'l', 33, 3, ':', -1, 4}
102  * fCacheLines[3] = {1, 'p', 11, -1}
103  * fCacheLines[4] = {1, 'p', 22, -1}
104  * fCacheLines[5] = {1, 'o', -1, 6}
105  * fCacheLines[6] = {1, 'b', 44, -1}
106  *
107  * The initial character of every symbol is stored in fCacheLines[0]. Other
108  * elements of fCacheLines contain records for characters that share common
109  * prefixes. For instance, fCacheLines[2] contains records for all symbols
110  * that begin with "he". Note also that the symbol "hel" has both a symbol
111  * index and a pointer to the record containing the next character, as both
112  * "hel" and "help" are symbols in the cache.
113  *
114  */

115
116 /****
117 public void dumpCache() {
118     System.out.println("fSymbolChars.length == "+fSymbolChars.length);
119
120     for (int i = 0; i < fSymbolCharsOffset; i++)
121         System.out.println("fSymbolChars["+i+"] == "+fSymbolChars[i]);
122
123     for (int i = 0; i < fCacheLineCount; i++) {
124         System.out.print("fCacheLines["+i+"] (num records == "+
125                          fCacheLines[i][0]+") == {");
126
127         int offset = 1;
128         for (int j = 0; j < fCacheLines[i][0]; j++) {
129             System.out.print("{char="+
130                  (new Character((char)fCacheLines[i][offset+CHAR_OFFSET]).
131                                                                    toString())+
132                              "; idx="+fCacheLines[i][offset+INDEX_OFFSET]+
133                              "; next="+fCacheLines[i][offset+NEXT_OFFSET]+"}");
134             offset += CACHE_RECORD_SIZE;
135         }
136         System.out.println("} - (Actual size == "+fCacheLines[i].length+")");
137     }
138 }
139 ****/

140
141     public SymbolCache() {
142         fCacheLines[fCacheLineCount++] =
143                 new int[1+(HEAD_INITIAL_CACHE_RECORD_COUNT*CACHE_RECORD_SIZE)];
144     }
145     //
146
public void reset() {
147         fSymbolCharsOffset = 0;
148         fCacheLineCount = 0;
149         fCacheLines[fCacheLineCount++] =
150                 new int[1+(HEAD_INITIAL_CACHE_RECORD_COUNT*CACHE_RECORD_SIZE)];
151     }
152     //
153
//
154
//
155
public char[] getSymbolChars() {
156         return fSymbolChars;
157     }
158     //
159
// Symbol interfaces
160
//
161
public String JavaDoc createSymbol(int symbolHandle, int startOffset, int entry,
162                                int[] entries, int offset) {
163         int slen = fSymbolCharsOffset - startOffset;
164         String JavaDoc str = new String JavaDoc(fSymbolChars, startOffset, slen);
165         try {
166             entries[offset + SymbolCache.INDEX_OFFSET] = symbolHandle;
167         } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
168             throw new RuntimeException JavaDoc("UTL001 untested");
169         }
170         return str;
171     }
172     public int addSymbolToCache(String JavaDoc str, int slen, int symbolHandle) {
173         int charsOffset = fSymbolCharsOffset;
174         if (slen == 0) // EMPTY_STRING cannot be a legal "symbol"
175
return charsOffset;
176         int strIndex = 0;
177         char ch = str.charAt(strIndex++);
178         try {
179             fSymbolChars[fSymbolCharsOffset] = ch;
180         } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
181             char[] newChars = new char[fSymbolChars.length * 2];
182             System.arraycopy(fSymbolChars, 0, newChars, 0, fSymbolChars.length);
183             fSymbolChars = newChars;
184             fSymbolChars[fSymbolCharsOffset] = ch;
185         }
186         fSymbolCharsOffset++;
187         int entry = 0;
188         int[] entries = fCacheLines[entry];
189         int count = entries[0];
190         int i = 0;
191         int offset = 1;
192         while (true) {
193             if (i == count)
194                 break;
195             if (entries[offset + CHAR_OFFSET] != ch) {
196                 i++;
197                 offset += CACHE_RECORD_SIZE;
198                 continue;
199             }
200             if (strIndex == slen) {
201                 if (entries[offset + INDEX_OFFSET] != -1) {
202                     // How did we miss this during lookup ?
203
throw new RuntimeException JavaDoc("addSymbolToCache");
204                 }
205                 entries[offset + INDEX_OFFSET] = symbolHandle;
206                 return charsOffset;
207             }
208             ch = str.charAt(strIndex++);
209             try {
210                 fSymbolChars[fSymbolCharsOffset] = ch;
211             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
212                 char[] newChars = new char[fSymbolChars.length * 2];
213                 System.arraycopy(fSymbolChars, 0, newChars, 0, fSymbolChars.length);
214                 fSymbolChars = newChars;
215                 fSymbolChars[fSymbolCharsOffset] = ch;
216             }
217             fSymbolCharsOffset++;
218             entry = entries[offset + NEXT_OFFSET];
219             try {
220                 entries = fCacheLines[entry];
221             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
222                 if (entry == -1) {
223                     entry = fCacheLineCount++;
224                     entries[offset + NEXT_OFFSET] = entry;
225                     entries = new int[1+(INITIAL_CACHE_RECORD_COUNT*CACHE_RECORD_SIZE)];
226                     try {
227                         fCacheLines[entry] = entries;
228                     } catch (ArrayIndexOutOfBoundsException JavaDoc ex2) {
229                         int[][] newCache = new int[entry * 2][];
230                         System.arraycopy(fCacheLines, 0, newCache, 0, entry);
231                         fCacheLines = newCache;
232                         fCacheLines[entry] = entries;
233                     }
234                 } else {
235                     entries = fCacheLines[entry];
236                     throw new RuntimeException JavaDoc("UTL001 untested");
237                 }
238             }
239             count = entries[0];
240             i = 0;
241             offset = 1;
242         }
243         while (true) {
244             entries[0]++;
245             try {
246                 entries[offset + CHAR_OFFSET] = ch;
247             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
248                 int newSize = 1 + ((offset - 1) * 2);
249                 int[] newEntries = new int[newSize];
250                 System.arraycopy(entries, 0, newEntries, 0, offset);
251                 fCacheLines[entry] = entries = newEntries;
252                 entries[offset + CHAR_OFFSET] = ch;
253             }
254             if (strIndex == slen) {
255                 entries[offset + INDEX_OFFSET] = symbolHandle;
256                 entries[offset + NEXT_OFFSET] = -1;
257                 break;
258             }
259             entry = fCacheLineCount++;
260             entries[offset + INDEX_OFFSET] = -1;
261             entries[offset + NEXT_OFFSET] = entry;
262             entries = new int[1+(INITIAL_CACHE_RECORD_COUNT*CACHE_RECORD_SIZE)];
263             try {
264                 fCacheLines[entry] = entries;
265             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
266                 int[][] newCache = new int[entry * 2][];
267                 System.arraycopy(fCacheLines, 0, newCache, 0, entry);
268                 fCacheLines = newCache;
269                 fCacheLines[entry] = entries;
270             }
271             offset = 1;
272             ch = str.charAt(strIndex++);
273             try {
274                 fSymbolChars[fSymbolCharsOffset] = ch;
275             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
276                 char[] newChars = new char[fSymbolChars.length * 2];
277                 System.arraycopy(fSymbolChars, 0, newChars, 0, fSymbolChars.length);
278                 fSymbolChars = newChars;
279                 fSymbolChars[fSymbolCharsOffset] = ch;
280             }
281             fSymbolCharsOffset++;
282         }
283         return charsOffset;
284     }
285     public void updateCacheLine(int charsOffset, int totalMisses, int length) {
286 //System.err.println("found symbol " + toString(symbolIndex) + " after " + totalMisses + " total misses (" + (totalMisses/length) + " misses per character).");
287
int entry = 0;
288         int[] entries = fCacheLines[0];
289         int ch = fSymbolChars[charsOffset++];
290         int count = entries[0];
291         int offset = 1 + ((count - 1) * CACHE_RECORD_SIZE);
292         int misses = 0;
293         while (true) {
294             if (ch != entries[offset + CHAR_OFFSET]) {
295                 offset -= CACHE_RECORD_SIZE;
296                 misses++;
297                 continue;
298             }
299             if (misses > 4) {
300                 int symIndex = entries[offset + INDEX_OFFSET];
301                 int nextIndex = entries[offset + NEXT_OFFSET];
302                 System.arraycopy(entries, offset + CACHE_RECORD_SIZE, entries, offset, misses * CACHE_RECORD_SIZE);
303                 offset = 1 + ((count - 1) * CACHE_RECORD_SIZE);
304                 entries[offset + CHAR_OFFSET] = ch;
305                 entries[offset + INDEX_OFFSET] = symIndex;
306                 entries[offset + NEXT_OFFSET] = nextIndex;
307             }
308             if (--length == 0)
309                 break;
310             entry = entries[offset + NEXT_OFFSET];
311             entries = fCacheLines[entry];
312             ch = fSymbolChars[charsOffset++];
313             count = entries[0];
314             offset = 1 + ((count - 1) * CACHE_RECORD_SIZE);
315             misses = 0;
316         }
317     }
318 }
319
Popular Tags