KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > impl > IntTrie


1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
6 */

7
8 package com.ibm.icu.impl;
9
10 import java.io.InputStream JavaDoc;
11 import java.io.DataInputStream JavaDoc;
12 import java.io.IOException JavaDoc;
13 import java.util.Arrays JavaDoc;
14 import com.ibm.icu.text.UTF16;
15
16 /**
17  * Trie implementation which stores data in int, 32 bits.
18  * @author synwee
19  * @see com.ibm.icu.impl.Trie
20  * @since release 2.1, Jan 01 2002
21  */

22 public class IntTrie extends Trie
23 {
24     // public constructors ---------------------------------------------
25

26     /**
27     * <p>Creates a new Trie with the settings for the trie data.</p>
28     * <p>Unserialize the 32-bit-aligned input stream and use the data for the
29     * trie.</p>
30     * @param inputStream file input stream to a ICU data file, containing
31     * the trie
32     * @param dataManipulate object which provides methods to parse the char
33     * data
34     * @throws IOException thrown when data reading fails
35     * @draft 2.1
36     */

37     public IntTrie(InputStream JavaDoc inputStream, DataManipulate dataManipulate)
38                                                     throws IOException JavaDoc
39     {
40         super(inputStream, dataManipulate);
41         if (!isIntTrie()) {
42             throw new IllegalArgumentException JavaDoc(
43                                "Data given does not belong to a int trie.");
44         }
45     }
46
47     /**
48      * Make a dummy IntTrie.
49      * A dummy trie is an empty runtime trie, used when a real data trie cannot
50      * be loaded.
51      *
52      * The trie always returns the initialValue,
53      * or the leadUnitValue for lead surrogate code points.
54      * The Latin-1 part is always set up to be linear.
55      *
56      * @param initialValue the initial value that is set for all code points
57      * @param leadUnitValue the value for lead surrogate code _units_ that do not
58      * have associated supplementary data
59      * @param dataManipulate object which provides methods to parse the char data
60      */

61     public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
62         super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
63
64         int dataLength, latin1Length, i, limit;
65         char block;
66
67         /* calculate the actual size of the dummy trie data */
68
69         /* max(Latin-1, block 0) */
70         dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
71         if(leadUnitValue!=initialValue) {
72             dataLength+=DATA_BLOCK_LENGTH;
73         }
74         m_data_=new int[dataLength];
75         m_dataLength_=dataLength;
76
77         m_initialValue_=initialValue;
78
79         /* fill the index and data arrays */
80
81         /* indexes are preset to 0 (block 0) */
82
83         /* Latin-1 data */
84         for(i=0; i<latin1Length; ++i) {
85             m_data_[i]=initialValue;
86         }
87
88         if(leadUnitValue!=initialValue) {
89             /* indexes for lead surrogate code units to the block after Latin-1 */
90             block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
91             i=0xd800>>INDEX_STAGE_1_SHIFT_;
92             limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
93             for(; i<limit; ++i) {
94                 m_index_[i]=block;
95             }
96
97             /* data for lead surrogate code units */
98             limit=latin1Length+DATA_BLOCK_LENGTH;
99             for(i=latin1Length; i<limit; ++i) {
100                 m_data_[i]=leadUnitValue;
101             }
102         }
103     }
104
105     // public methods --------------------------------------------------
106

107     /**
108     * Gets the value associated with the codepoint.
109     * If no value is associated with the codepoint, a default value will be
110     * returned.
111     * @param ch codepoint
112     * @return offset to data
113     * @draft 2.1
114     */

115     public final int getCodePointValue(int ch)
116     {
117         int offset;
118
119         // fastpath for U+0000..U+D7FF
120
if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
121             // copy of getRawOffset()
122
offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
123                     + (ch & INDEX_STAGE_3_MASK_);
124             return m_data_[offset];
125         }
126
127         // handle U+D800..U+10FFFF
128
offset = getCodePointOffset(ch);
129         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
130     }
131
132     /**
133     * Gets the value to the data which this lead surrogate character points
134     * to.
135     * Returned data may contain folding offset information for the next
136     * trailing surrogate character.
137     * This method does not guarantee correct results for trail surrogates.
138     * @param ch lead surrogate character
139     * @return data value
140     * @draft 2.1
141     */

142     public final int getLeadValue(char ch)
143     {
144         return m_data_[getLeadOffset(ch)];
145     }
146
147     /**
148     * Get the value associated with the BMP code point.
149     * Lead surrogate code points are treated as normal code points, with
150     * unfolded values that may differ from getLeadValue() results.
151     * @param ch the input BMP code point
152     * @return trie data value associated with the BMP codepoint
153     * @draft 2.1
154     */

155     public final int getBMPValue(char ch)
156     {
157         return m_data_[getBMPOffset(ch)];
158     }
159
160     /**
161     * Get the value associated with a pair of surrogates.
162     * @param lead a lead surrogate
163     * @param trail a trail surrogate
164     * @draft 2.1
165     */

166     public final int getSurrogateValue(char lead, char trail)
167     {
168         if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
169             throw new IllegalArgumentException JavaDoc(
170                 "Argument characters do not form a supplementary character");
171         }
172         // get fold position for the next trail surrogate
173
int offset = getSurrogateOffset(lead, trail);
174
175         // get the real data from the folded lead/trail units
176
if (offset > 0) {
177             return m_data_[offset];
178         }
179
180         // return m_initialValue_ if there is an error
181
return m_initialValue_;
182     }
183
184     /**
185     * Get a value from a folding offset (from the value of a lead surrogate)
186     * and a trail surrogate.
187     * @param leadvalue the value of a lead surrogate that contains the
188     * folding offset
189     * @param trail surrogate
190     * @return trie data value associated with the trail character
191     * @draft 2.1
192     */

193     public final int getTrailValue(int leadvalue, char trail)
194     {
195         if (m_dataManipulate_ == null) {
196             throw new NullPointerException JavaDoc(
197                              "The field DataManipulate in this Trie is null");
198         }
199         int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
200         if (offset > 0) {
201             return m_data_[getRawOffset(offset,
202                                          (char)(trail & SURROGATE_MASK_))];
203         }
204         return m_initialValue_;
205     }
206     
207     /**
208      * <p>Gets the latin 1 fast path value.</p>
209      * <p>Note this only works if latin 1 characters have their own linear
210      * array.</p>
211      * @param ch latin 1 characters
212      * @return value associated with latin character
213      */

214     public final int getLatin1LinearValue(char ch)
215     {
216         return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch];
217     }
218
219     /**
220      * Checks if the argument Trie has the same data as this Trie
221      * @param other Trie to check
222      * @return true if the argument Trie has the same data as this Trie, false
223      * otherwise
224      */

225     ///CLOVER:OFF
226
public boolean equals(Object JavaDoc other)
227     {
228         boolean result = super.equals(other);
229         if (result && other instanceof IntTrie) {
230             IntTrie othertrie = (IntTrie)other;
231             if (m_initialValue_ != othertrie.m_initialValue_
232                 || !Arrays.equals(m_data_, othertrie.m_data_)) {
233                 return false;
234             }
235             return true;
236         }
237         return false;
238     }
239     ///CLOVER:ON
240

241     // protected methods -----------------------------------------------
242

243     /**
244     * <p>Parses the input stream and stores its trie content into a index and
245     * data array</p>
246     * @param inputStream data input stream containing trie data
247     * @exception IOException thrown when data reading fails
248     */

249     protected final void unserialize(InputStream JavaDoc inputStream)
250                                                     throws IOException JavaDoc
251     {
252         super.unserialize(inputStream);
253         // one used for initial value
254
m_data_ = new int[m_dataLength_];
255         DataInputStream JavaDoc input = new DataInputStream JavaDoc(inputStream);
256         for (int i = 0; i < m_dataLength_; i ++) {
257             m_data_[i] = input.readInt();
258         }
259         m_initialValue_ = m_data_[0];
260     }
261     
262     /**
263     * Gets the offset to the data which the surrogate pair points to.
264     * @param lead lead surrogate
265     * @param trail trailing surrogate
266     * @return offset to data
267     * @draft 2.1
268     */

269     protected final int getSurrogateOffset(char lead, char trail)
270     {
271         if (m_dataManipulate_ == null) {
272             throw new NullPointerException JavaDoc(
273                              "The field DataManipulate in this Trie is null");
274         }
275         // get fold position for the next trail surrogate
276
int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
277
278         // get the real data from the folded lead/trail units
279
if (offset > 0) {
280             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
281         }
282
283         // return -1 if there is an error, in this case we return the default
284
// value: m_initialValue_
285
return -1;
286     }
287     
288     /**
289     * Gets the value at the argument index.
290     * For use internally in TrieIterator
291     * @param index value at index will be retrieved
292     * @return 32 bit value
293     * @see com.ibm.icu.impl.TrieIterator
294     * @draft 2.1
295     */

296     protected final int getValue(int index)
297     {
298       return m_data_[index];
299     }
300     
301     /**
302     * Gets the default initial value
303     * @return 32 bit value
304     * @draft 2.1
305     */

306     protected final int getInitialValue()
307     {
308         return m_initialValue_;
309     }
310
311     // package private methods -----------------------------------------
312

313     /**
314      * Internal constructor for builder use
315      * @param index the index array to be slotted into this trie
316      * @param data the data array to be slotted into this trie
317      * @param initialvalue the initial value for this trie
318      * @param options trie options to use
319      * @param datamanipulate folding implementation
320      */

321     IntTrie(char index[], int data[], int initialvalue, int options,
322             DataManipulate datamanipulate)
323     {
324         super(index, options, datamanipulate);
325         m_data_ = data;
326         m_dataLength_ = m_data_.length;
327         m_initialValue_ = initialvalue;
328     }
329     
330     // private data members --------------------------------------------
331

332     /**
333     * Default value
334     */

335     private int m_initialValue_;
336     /**
337     * Array of char data
338     */

339     private int m_data_[];
340 }
341
Popular Tags