KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > impl > CharTrie


1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2005, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
6 */

7
8 package com.ibm.icu.impl;
9
10 import java.io.InputStream JavaDoc;
11 import java.io.DataInputStream JavaDoc;
12 import java.io.IOException JavaDoc;
13
14 import com.ibm.icu.text.UTF16;
15
16 /**
17  * Trie implementation which stores data in char, 16 bits.
18  * @author synwee
19  * @see com.ibm.icu.impl.Trie
20  * @since release 2.1, Jan 01 2002
21  */

22
23  // note that i need to handle the block calculations later, since chartrie
24
// in icu4c uses the same index array.
25
public class CharTrie extends Trie
26 {
27     // public constructors ---------------------------------------------
28

29     /**
30     * <p>Creates a new Trie with the settings for the trie data.</p>
31     * <p>Unserialize the 32-bit-aligned input stream and use the data for the
32     * trie.</p>
33     * @param inputStream file input stream to a ICU data file, containing
34     * the trie
35     * @param dataManipulate object which provides methods to parse the char
36     * data
37     * @throws IOException thrown when data reading fails
38     * @draft 2.1
39     */

40     public CharTrie(InputStream JavaDoc inputStream,
41                     DataManipulate dataManipulate) throws IOException JavaDoc
42     {
43         super(inputStream, dataManipulate);
44         
45         if (!isCharTrie()) {
46             throw new IllegalArgumentException JavaDoc(
47                                "Data given does not belong to a char trie.");
48         }
49         m_friendAgent_ = new FriendAgent();
50     }
51
52     /**
53      * Make a dummy CharTrie.
54      * A dummy trie is an empty runtime trie, used when a real data trie cannot
55      * be loaded.
56      *
57      * The trie always returns the initialValue,
58      * or the leadUnitValue for lead surrogate code points.
59      * The Latin-1 part is always set up to be linear.
60      *
61      * @param initialValue the initial value that is set for all code points
62      * @param leadUnitValue the value for lead surrogate code _units_ that do not
63      * have associated supplementary data
64      * @param dataManipulate object which provides methods to parse the char data
65      */

66     public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
67         super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
68
69         int dataLength, latin1Length, i, limit;
70         char block;
71
72         /* calculate the actual size of the dummy trie data */
73
74         /* max(Latin-1, block 0) */
75         dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
76         if(leadUnitValue!=initialValue) {
77             dataLength+=DATA_BLOCK_LENGTH;
78         }
79         m_data_=new char[dataLength];
80         m_dataLength_=dataLength;
81
82         m_initialValue_=(char)initialValue;
83
84         /* fill the index and data arrays */
85
86         /* indexes are preset to 0 (block 0) */
87
88         /* Latin-1 data */
89         for(i=0; i<latin1Length; ++i) {
90             m_data_[i]=(char)initialValue;
91         }
92
93         if(leadUnitValue!=initialValue) {
94             /* indexes for lead surrogate code units to the block after Latin-1 */
95             block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
96             i=0xd800>>INDEX_STAGE_1_SHIFT_;
97             limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
98             for(; i<limit; ++i) {
99                 m_index_[i]=block;
100             }
101
102             /* data for lead surrogate code units */
103             limit=latin1Length+DATA_BLOCK_LENGTH;
104             for(i=latin1Length; i<limit; ++i) {
105                 m_data_[i]=(char)leadUnitValue;
106             }
107         }
108
109         m_friendAgent_ = new FriendAgent();
110     }
111
112     /**
113      * Java friend implementation
114      */

115     public class FriendAgent
116     {
117         /**
118          * Gives out the index array of the trie
119          * @return index array of trie
120          */

121         public char[] getPrivateIndex()
122         {
123             return m_index_;
124         }
125         /**
126          * Gives out the data array of the trie
127          * @return data array of trie
128          */

129         public char[] getPrivateData()
130         {
131             return m_data_;
132         }
133         /**
134          * Gives out the data offset in the trie
135          * @return data offset in the trie
136          */

137         public int getPrivateInitialValue()
138         {
139             return m_initialValue_;
140         }
141     }
142     
143     // public methods --------------------------------------------------
144

145     /**
146      * Java friend implementation
147      * To store the index and data array into the argument.
148      * @param friend java friend UCharacterProperty object to store the array
149      */

150     public void putIndexData(UCharacterProperty friend)
151     {
152         friend.setIndexData(m_friendAgent_);
153     }
154
155     /**
156     * Gets the value associated with the codepoint.
157     * If no value is associated with the codepoint, a default value will be
158     * returned.
159     * @param ch codepoint
160     * @return offset to data
161     * @draft 2.1
162     */

163     public final char getCodePointValue(int ch)
164     {
165         int offset;
166
167         // fastpath for U+0000..U+D7FF
168
if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
169             // copy of getRawOffset()
170
offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
171                     + (ch & INDEX_STAGE_3_MASK_);
172             return m_data_[offset];
173         }
174
175         // handle U+D800..U+10FFFF
176
offset = getCodePointOffset(ch);
177         
178         // return -1 if there is an error, in this case we return the default
179
// value: m_initialValue_
180
return (offset >= 0) ? m_data_[offset] : m_initialValue_;
181     }
182
183     /**
184     * Gets the value to the data which this lead surrogate character points
185     * to.
186     * Returned data may contain folding offset information for the next
187     * trailing surrogate character.
188     * This method does not guarantee correct results for trail surrogates.
189     * @param ch lead surrogate character
190     * @return data value
191     * @draft 2.1
192     */

193     public final char getLeadValue(char ch)
194     {
195        return m_data_[getLeadOffset(ch)];
196     }
197
198     /**
199     * Get the value associated with the BMP code point.
200     * Lead surrogate code points are treated as normal code points, with
201     * unfolded values that may differ from getLeadValue() results.
202     * @param ch the input BMP code point
203     * @return trie data value associated with the BMP codepoint
204     * @draft 2.1
205     */

206     public final char getBMPValue(char ch)
207     {
208         return m_data_[getBMPOffset(ch)];
209     }
210
211     /**
212     * Get the value associated with a pair of surrogates.
213     * @param lead a lead surrogate
214     * @param trail a trail surrogate
215     * @draft 2.1
216     */

217     public final char getSurrogateValue(char lead, char trail)
218     {
219         int offset = getSurrogateOffset(lead, trail);
220         if (offset > 0) {
221             return m_data_[offset];
222         }
223         return m_initialValue_;
224     }
225
226     /**
227     * <p>Get a value from a folding offset (from the value of a lead surrogate)
228     * and a trail surrogate.</p>
229     * <p>If the
230     * @param leadvalue value associated with the lead surrogate which contains
231     * the folding offset
232     * @param trail surrogate
233     * @return trie data value associated with the trail character
234     * @draft 2.1
235     */

236     public final char getTrailValue(int leadvalue, char trail)
237     {
238         if (m_dataManipulate_ == null) {
239             throw new NullPointerException JavaDoc(
240                              "The field DataManipulate in this Trie is null");
241         }
242         int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
243         if (offset > 0) {
244             return m_data_[getRawOffset(offset,
245                                         (char)(trail & SURROGATE_MASK_))];
246         }
247         return m_initialValue_;
248     }
249     
250     /**
251      * <p>Gets the latin 1 fast path value.</p>
252      * <p>Note this only works if latin 1 characters have their own linear
253      * array.</p>
254      * @param ch latin 1 characters
255      * @return value associated with latin character
256      */

257     public final char getLatin1LinearValue(char ch)
258     {
259         return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch];
260     }
261     
262     /**
263      * Checks if the argument Trie has the same data as this Trie
264      * @param other Trie to check
265      * @return true if the argument Trie has the same data as this Trie, false
266      * otherwise
267      */

268     ///CLOVER:OFF
269
public boolean equals(Object JavaDoc other)
270     {
271         boolean result = super.equals(other);
272         if (result && other instanceof CharTrie) {
273             CharTrie othertrie = (CharTrie)other;
274             return m_initialValue_ == othertrie.m_initialValue_;
275         }
276         return false;
277     }
278     ///CLOVER:ON
279

280     // protected methods -----------------------------------------------
281

282     /**
283     * <p>Parses the input stream and stores its trie content into a index and
284     * data array</p>
285     * @param inputStream data input stream containing trie data
286     * @exception IOException thrown when data reading fails
287     */

288     protected final void unserialize(InputStream JavaDoc inputStream)
289                                                 throws IOException JavaDoc
290     {
291         DataInputStream JavaDoc input = new DataInputStream JavaDoc(inputStream);
292         int indexDataLength = m_dataOffset_ + m_dataLength_;
293         m_index_ = new char[indexDataLength];
294         for (int i = 0; i < indexDataLength; i ++) {
295             m_index_[i] = input.readChar();
296         }
297         m_data_ = m_index_;
298         m_initialValue_ = m_data_[m_dataOffset_];
299     }
300     
301     /**
302     * Gets the offset to the data which the surrogate pair points to.
303     * @param lead lead surrogate
304     * @param trail trailing surrogate
305     * @return offset to data
306     * @draft 2.1
307     */

308     protected final int getSurrogateOffset(char lead, char trail)
309     {
310         if (m_dataManipulate_ == null) {
311             throw new NullPointerException JavaDoc(
312                              "The field DataManipulate in this Trie is null");
313         }
314         
315         // get fold position for the next trail surrogate
316
int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
317
318         // get the real data from the folded lead/trail units
319
if (offset > 0) {
320             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
321         }
322
323         // return -1 if there is an error, in this case we return the default
324
// value: m_initialValue_
325
return -1;
326     }
327     
328     /**
329     * Gets the value at the argument index.
330     * For use internally in TrieIterator.
331     * @param index value at index will be retrieved
332     * @return 32 bit value
333     * @see com.ibm.icu.impl.TrieIterator
334     * @draft 2.1
335     */

336     protected final int getValue(int index)
337     {
338         return m_data_[index];
339     }
340
341     /**
342     * Gets the default initial value
343     * @return 32 bit value
344     * @draft 2.1
345     */

346     protected final int getInitialValue()
347     {
348         return m_initialValue_;
349     }
350   
351     // private data members --------------------------------------------
352

353     /**
354     * Default value
355     */

356     private char m_initialValue_;
357     /**
358     * Array of char data
359     */

360     private char m_data_[];
361     /**
362      * Agent for friends
363      */

364     private FriendAgent m_friendAgent_;
365 }
366
Popular Tags