KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > lang > UCharacterNameIterator


1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2004, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
6 */

7
8 package com.ibm.icu.lang;
9
10 import com.ibm.icu.util.ValueIterator;
11 import com.ibm.icu.impl.UCharacterName;
12 import com.ibm.icu.impl.UCharacterNameChoice;
13
14 /**
15  * <p>Class enabling iteration of the codepoints and their names.</p>
16  * <p>Result of each iteration contains a valid codepoint that has valid
17  * name.</p>
18  * <p>See UCharacter.getNameIterator() for an example of use.</p>
19  * @author synwee
20  * @since release 2.1, March 5 2002
21  */

22 class UCharacterNameIterator implements ValueIterator
23 {
24     // public methods ----------------------------------------------------
25

26     /**
27     * <p>Gets the next result for this iteration and returns
28     * true if we are not at the end of the iteration, false otherwise.</p>
29     * <p>If the return boolean is a false, the contents of elements will not
30     * be updated.</p>
31     * @param element for storing the result codepoint and name
32     * @return true if we are not at the end of the iteration, false otherwise.
33     * @see Element
34     * @draft 2.1
35     */

36     public boolean next(ValueIterator.Element element)
37     {
38         if (m_current_ >= m_limit_) {
39             return false;
40         }
41
42         if (m_choice_ != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
43             int length = m_name_.getAlgorithmLength();
44             if (m_algorithmIndex_ < length) {
45                 while (m_algorithmIndex_ < length) {
46                     // find the algorithm range that could contain m_current_
47
if (m_algorithmIndex_ < 0 ||
48                         m_name_.getAlgorithmEnd(m_algorithmIndex_) <
49                         m_current_) {
50                         m_algorithmIndex_ ++;
51                     }
52                     else {
53                         break;
54                     }
55                 }
56
57                 if (m_algorithmIndex_ < length) {
58                     // interleave the data-driven ones with the algorithmic ones
59
// iterate over all algorithmic ranges; assume that they are
60
// in ascending order
61
int start = m_name_.getAlgorithmStart(m_algorithmIndex_);
62                     if (m_current_ < start) {
63                         // this should get rid of those codepoints that are not
64
// in the algorithmic range
65
int end = start;
66                         if (m_limit_ <= start) {
67                             end = m_limit_;
68                         }
69                         if (!iterateGroup(element, end)) {
70                             m_current_ ++;
71                             return true;
72                         }
73                     }
74
75                     if (m_current_ >= m_limit_) {
76                         // after iterateGroup fails, current codepoint may be
77
// greater than limit
78
return false;
79                     }
80
81                     element.integer = m_current_;
82                     element.value = m_name_.getAlgorithmName(m_algorithmIndex_,
83                                                                    m_current_);
84                     // reset the group index if we are in the algorithmic names
85
m_groupIndex_ = -1;
86                     m_current_ ++;
87                     return true;
88                 }
89             }
90         }
91         // enumerate the character names after the last algorithmic range
92
if (!iterateGroup(element, m_limit_)) {
93             m_current_ ++;
94             return true;
95         }
96         else if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
97             if (!iterateExtended(element, m_limit_)) {
98                 m_current_ ++;
99                 return true;
100             }
101         }
102
103         return false;
104     }
105
106     /**
107     * <p>Resets the iterator to start iterating from the integer index
108     * UCharacter.MIN_VALUE or X if a setRange(X, Y) has been called previously.
109     * </p>
110     * @draft 2.1
111     */

112     public void reset()
113     {
114         m_current_ = m_start_;
115         m_groupIndex_ = -1;
116         m_algorithmIndex_ = -1;
117     }
118
119     /**
120      * <p>Restricts the range of integers to iterate and resets the iteration
121      * to begin at the index argument start.</p>
122      * <p>If setRange(start, end) is not performed before next(element) is
123      * called, the iteration will start from the integer index
124      * UCharacter.MIN_VALUE and end at UCharacter.MAX_VALUE.</p>
125      * <p>
126      * If this range is set outside the range of UCharacter.MIN_VALUE and
127      * UCharacter.MAX_VALUE, next(element) will always return false.
128      * </p>
129      * @param start first integer in range to iterate
130      * @param limit 1 integer after the last integer in range
131      * @exception IllegalArgumentException thrown when attempting to set an
132      * illegal range. E.g limit <= start
133      * @draft 2.1
134      */

135     public void setRange(int start, int limit)
136     {
137         if (start >= limit) {
138             throw new IllegalArgumentException JavaDoc(
139                 "start or limit has to be valid Unicode codepoints and start < limit");
140         }
141         if (start < UCharacter.MIN_VALUE) {
142             m_start_ = UCharacter.MIN_VALUE;
143         }
144         else {
145             m_start_ = start;
146         }
147
148         if (limit > UCharacter.MAX_VALUE + 1) {
149             m_limit_ = UCharacter.MAX_VALUE + 1;
150         }
151         else {
152             m_limit_ = limit;
153         }
154         m_current_ = m_start_;
155     }
156
157     // protected constructor ---------------------------------------------
158

159     /**
160     * Constructor
161     * @param name name data
162     * @param choice name choice from the class
163     * com.ibm.icu.lang.UCharacterNameChoice
164     * @draft 2.1
165     */

166     protected UCharacterNameIterator(UCharacterName name, int choice)
167     {
168         if(name==null){
169             throw new IllegalArgumentException JavaDoc("UCharacterName name argument cannot be null. Missing unames.icu?");
170         }
171         m_name_ = name;
172         // no explicit choice in UCharacter so no checks on choice
173
m_choice_ = choice;
174         m_start_ = UCharacter.MIN_VALUE;
175         m_limit_ = UCharacter.MAX_VALUE + 1;
176         m_current_ = m_start_;
177     }
178
179     // private data members ---------------------------------------------
180

181     /**
182      * Name data
183      */

184     private UCharacterName m_name_;
185     /**
186      * Name choice
187      */

188     private int m_choice_;
189      /**
190      * Start iteration range
191      */

192     private int m_start_;
193     /**
194      * End + 1 iteration range
195      */

196     private int m_limit_;
197     /**
198      * Current codepoint
199      */

200     private int m_current_;
201     /**
202      * Group index
203      */

204     private int m_groupIndex_ = -1;
205     /**
206      * Algorithm index
207      */

208     private int m_algorithmIndex_ = -1;
209     /**
210     * Group use
211     */

212     private static char GROUP_OFFSETS_[] =
213                                 new char[UCharacterName.LINES_PER_GROUP_ + 1];
214     private static char GROUP_LENGTHS_[] =
215                                 new char[UCharacterName.LINES_PER_GROUP_ + 1];
216
217     // private methods --------------------------------------------------
218

219     /**
220      * Group name iteration, iterate all the names in the current 32-group and
221      * returns the first codepoint that has a valid name.
222      * @param result stores the result codepoint and name
223      * @param limit last codepoint + 1 in range to search
224      * @return false if a codepoint with a name is found in group and we can
225      * bail from further iteration, true to continue on with the
226      * iteration
227      */

228     private boolean iterateSingleGroup(ValueIterator.Element result, int limit)
229     {
230         synchronized(GROUP_OFFSETS_) {
231         synchronized(GROUP_LENGTHS_) {
232             int index = m_name_.getGroupLengths(m_groupIndex_, GROUP_OFFSETS_,
233                                                 GROUP_LENGTHS_);
234             while (m_current_ < limit) {
235                 int offset = UCharacterName.getGroupOffset(m_current_);
236                 String JavaDoc name = m_name_.getGroupName(
237                                           index + GROUP_OFFSETS_[offset],
238                                           GROUP_LENGTHS_[offset], m_choice_);
239                 if ((name == null || name.length() == 0) &&
240                     m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
241                     name = m_name_.getExtendedName(m_current_);
242                 }
243                 if (name != null && name.length() > 0) {
244                     result.integer = m_current_;
245                     result.value = name;
246                     return false;
247                 }
248                 ++ m_current_;
249             }
250         }
251         }
252         return true;
253     }
254
255     /**
256      * Group name iteration, iterate all the names in the current 32-group and
257      * returns the first codepoint that has a valid name.
258      * @param result stores the result codepoint and name
259      * @param limit last codepoint + 1 in range to search
260      * @return false if a codepoint with a name is found in group and we can
261      * bail from further iteration, true to continue on with the
262      * iteration
263      */

264     private boolean iterateGroup(ValueIterator.Element result, int limit)
265     {
266         if (m_groupIndex_ < 0) {
267             m_groupIndex_ = m_name_.getGroup(m_current_);
268         }
269
270         while (m_groupIndex_ < m_name_.m_groupcount_ &&
271                m_current_ < limit) {
272             // iterate till the last group or the last codepoint
273
int startMSB = UCharacterName.getCodepointMSB(m_current_);
274             int gMSB = m_name_.getGroupMSB(m_groupIndex_); // can be -1
275
if (startMSB == gMSB) {
276                 if (startMSB == UCharacterName.getCodepointMSB(limit - 1)) {
277                     // if start and limit - 1 are in the same group, then enumerate
278
// only in that one
279
return iterateSingleGroup(result, limit);
280                 }
281                 // enumerate characters in the partial start group
282
// if (m_name_.getGroupOffset(m_current_) != 0) {
283
if (!iterateSingleGroup(result,
284                                         UCharacterName.getGroupLimit(gMSB))) {
285                     return false;
286                 }
287                 ++ m_groupIndex_; // continue with the next group
288
}
289             else if (startMSB > gMSB) {
290                     // make sure that we start enumerating with the first group
291
// after start
292
m_groupIndex_ ++;
293             }
294             else {
295                 int gMIN = UCharacterName.getGroupMin(gMSB);
296                 if (gMIN > limit) {
297                     gMIN = limit;
298                 }
299                 if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
300                     if (!iterateExtended(result, gMIN)) {
301                         return false;
302                     }
303                 }
304                 m_current_ = gMIN;
305             }
306         }
307
308         return true;
309     }
310
311     /**
312      * Iterate extended names.
313      * @param result stores the result codepoint and name
314      * @param limit last codepoint + 1 in range to search
315      * @return false if a codepoint with a name is found and we can
316      * bail from further iteration, true to continue on with the
317      * iteration (this will always be false for valid codepoints)
318      */

319     private boolean iterateExtended(ValueIterator.Element result,
320                                     int limit)
321     {
322         while (m_current_ < limit) {
323             String JavaDoc name = m_name_.getExtendedOr10Name(m_current_);
324             if (name != null && name.length() > 0) {
325                 result.integer = m_current_;
326                 result.value = name;
327                 return false;
328             }
329             ++ m_current_;
330         }
331         return true;
332     }
333 }
334
Popular Tags