KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > impl > UCharacterName


1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 */

7 package com.ibm.icu.impl;
8
9 import java.io.InputStream JavaDoc;
10 import java.io.BufferedInputStream JavaDoc;
11 import java.io.IOException JavaDoc;
12 import java.util.MissingResourceException JavaDoc;
13
14 import com.ibm.icu.text.UTF16;
15 import com.ibm.icu.text.UnicodeSet;
16 import com.ibm.icu.lang.UCharacter;
17 import com.ibm.icu.lang.UCharacterCategory;
18
19 /**
20 * Internal class to manage character names.
21 * Since data for names are stored
22 * in an array of char, by default indexes used in this class is refering to
23 * a 2 byte count, unless otherwise stated. Cases where the index is refering
24 * to a byte count, the index is halved and depending on whether the index is
25 * even or odd, the MSB or LSB of the result char at the halved index is
26 * returned. For indexes to an array of int, the index is multiplied by 2,
27 * result char at the multiplied index and its following char is returned as an
28 * int.
29 * <a HREF=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class
30 * Note : 0 - 0x1F are control characters without names in Unicode 3.0
31 * @author Syn Wee Quek
32 * @since nov0700
33 */

34
35 public final class UCharacterName
36 {
37     // public data members ----------------------------------------------
38

39     /**
40     * Number of lines per group
41     * 1 << GROUP_SHIFT_
42     */

43     public static final int LINES_PER_GROUP_ = 1 << 5;
44     /**
45      * Maximum number of groups
46      */

47     public int m_groupcount_ = 0;
48
49     // public methods ---------------------------------------------------
50

51     /**
52      * Gets the only instance of UCharacterName
53      * @return only instance of UCharacterName
54      * @exception MissingResourceException thrown when reading of name data fails
55      */

56     public static UCharacterName getInstance()
57     {
58         if (INSTANCE_ == null) {
59             try {
60                 INSTANCE_ = new UCharacterName();
61             }catch(IOException JavaDoc e){
62                 throw new MissingResourceException JavaDoc("Could not construct UCharacterName. Missing unames.icu","","");
63             }
64             catch (Exception JavaDoc e) {
65                 throw new MissingResourceException JavaDoc(e.getMessage(),"","");
66             }
67         }
68         return INSTANCE_;
69     }
70
71     /**
72     * Retrieve the name of a Unicode code point.
73     * Depending on <code>choice</code>, the character name written into the
74     * buffer is the "modern" name or the name that was defined in Unicode
75     * version 1.0.
76     * The name contains only "invariant" characters
77     * like A-Z, 0-9, space, and '-'.
78     *
79     * @param ch the code point for which to get the name.
80     * @param choice Selector for which name to get.
81     * @return if code point is above 0x1fff, null is returned
82     */

83     public String JavaDoc getName(int ch, int choice)
84     {
85         if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
86             choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
87             return null;
88         }
89
90         String JavaDoc result = null;
91
92         result = getAlgName(ch, choice);
93
94         // getting normal character name
95
if (result == null || result.length() == 0) {
96             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
97                 result = getExtendedName(ch);
98             } else {
99                 result = getGroupName(ch, choice);
100             }
101         }
102
103         return result;
104     }
105
106     /**
107     * Find a character by its name and return its code point value
108     * @param choice selector to indicate if argument name is a Unicode 1.0
109     * or the most current version
110     * @param name the name to search for
111     * @return code point
112     */

113     public int getCharFromName(int choice, String JavaDoc name)
114     {
115         // checks for illegal arguments
116
if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT ||
117             name == null || name.length() == 0) {
118             return -1;
119         }
120
121         // try extended names first
122
int result = getExtendedChar(name.toLowerCase(), choice);
123         if (result >= -1) {
124             return result;
125         }
126
127         String JavaDoc upperCaseName = name.toUpperCase();
128         // try algorithmic names first, if fails then try group names
129
// int result = getAlgorithmChar(choice, uppercasename);
130

131         if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
132             int count = 0;
133             if (m_algorithm_ != null) {
134                 count = m_algorithm_.length;
135             }
136             for (count --; count >= 0; count --) {
137                 result = m_algorithm_[count].getChar(upperCaseName);
138                 if (result >= 0) {
139                     return result;
140                 }
141             }
142         }
143
144         if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
145             result = getGroupChar(upperCaseName,
146                                   UCharacterNameChoice.UNICODE_CHAR_NAME);
147             if (result == -1) {
148                 result = getGroupChar(upperCaseName,
149                                   UCharacterNameChoice.UNICODE_10_CHAR_NAME);
150             }
151         }
152         else {
153             result = getGroupChar(upperCaseName, choice);
154         }
155         return result;
156     }
157
158     // these are all UCharacterNameIterator use methods -------------------
159

160     /**
161     * Reads a block of compressed lengths of 32 strings and expands them into
162     * offsets and lengths for each string. Lengths are stored with a
163     * variable-width encoding in consecutive nibbles:
164     * If a nibble<0xc, then it is the length itself (0 = empty string).
165     * If a nibble>=0xc, then it forms a length value with the following
166     * nibble.
167     * The offsets and lengths arrays must be at least 33 (one more) long
168     * because there is no check here at the end if the last nibble is still
169     * used.
170     * @param index of group string object in array
171     * @param offsets array to store the value of the string offsets
172     * @param lengths array to store the value of the string length
173     * @return next index of the data string immediately after the lengths
174     * in terms of byte address
175     */

176     public int getGroupLengths(int index, char offsets[], char lengths[])
177     {
178         char length = 0xffff;
179         byte b = 0,
180             n = 0;
181         int shift;
182         index = index * m_groupsize_; // byte count offsets of group strings
183
int stringoffset = UCharacterUtility.toInt(
184                                  m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
185                                  m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
186
187         offsets[0] = 0;
188
189         // all 32 lengths must be read to get the offset of the first group
190
// string
191
for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
192             b = m_groupstring_[stringoffset];
193             shift = 4;
194
195             while (shift >= 0) {
196                 // getting nibble
197
n = (byte)((b >> shift) & 0x0F);
198                 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
199                     length = (char)((n - 12) << 4);
200                 }
201                 else {
202                     if (length != 0xffff) {
203                        lengths[i] = (char)((length | n) + 12);
204                     }
205                     else {
206                        lengths[i] = (char)n;
207                     }
208
209                     if (i < LINES_PER_GROUP_) {
210                        offsets[i + 1] = (char)(offsets[i] + lengths[i]);
211                     }
212
213                     length = 0xffff;
214                     i ++;
215                 }
216
217                 shift -= 4;
218             }
219         }
220         return stringoffset;
221     }
222
223     /**
224     * Gets the name of the argument group index.
225     * UnicodeData.txt uses ';' as a field separator, so no field can contain
226     * ';' as part of its contents. In unames.icu, it is marked as
227     * token[';'] == -1 only if the semicolon is used in the data file - which
228     * is iff we have Unicode 1.0 names or ISO comments.
229     * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments
230     * although we know that it will never be part of a name.
231     * Equivalent to ICU4C's expandName.
232     * @param index of the group name string in byte count
233     * @param length of the group name string
234     * @param choice of Unicode 1.0 name or the most current name
235     * @return name of the group
236     */

237     public String JavaDoc getGroupName(int index, int length, int choice)
238     {
239         if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME
240             || choice == UCharacterNameChoice.ISO_COMMENT_) {
241             if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) {
242                 // skip the modern name
243
int oldindex = index;
244                 index += UCharacterUtility.skipByteSubString(m_groupstring_,
245                                                    index, length, (byte)';');
246                 length -= (index - oldindex);
247                 if (choice == UCharacterNameChoice.ISO_COMMENT_) {
248                     // skips the 1.0 Name to the iso comment part
249
oldindex = index;
250                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
251                                                     index, length, (byte)';');
252                     length -= (index - oldindex);
253                 }
254             }
255             else {
256                 // the semicolon byte is a token number, therefore only modern
257
// names are stored in unames.dat and there is no such
258
// requested Unicode 1.0 name here
259
length = 0;
260             }
261         }
262
263         synchronized (m_utilStringBuffer_) {
264             m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
265             byte b;
266             char token;
267             for (int i = 0; i < length;) {
268                 b = m_groupstring_[index + i];
269                 i ++;
270
271                 if (b >= m_tokentable_.length) {
272                     if (b == ';') {
273                         break;
274                     }
275                     m_utilStringBuffer_.append(b); // implicit letter
276
}
277                 else {
278                     token = m_tokentable_[b & 0x00ff];
279                     if (token == 0xFFFE) {
280                         // this is a lead byte for a double-byte token
281
token = m_tokentable_[b << 8 |
282                                           (m_groupstring_[index + i] & 0x00ff)];
283                         i ++;
284                     }
285                     if (token == 0xFFFF) {
286                         if (b == ';') {
287                             // skip the semicolon if we are seeking extended
288
// names and there was no 2.0 name but there
289
// is a 1.0 name.
290
if (m_utilStringBuffer_.length() == 0 && choice ==
291                                    UCharacterNameChoice.EXTENDED_CHAR_NAME) {
292                                 continue;
293                             }
294                             break;
295                         }
296                         // explicit letter
297
m_utilStringBuffer_.append((char)(b & 0x00ff));
298                     }
299                     else { // write token word
300
UCharacterUtility.getNullTermByteSubString(
301                                 m_utilStringBuffer_, m_tokenstring_, token);
302                     }
303                 }
304             }
305
306             if (m_utilStringBuffer_.length() > 0) {
307                 return m_utilStringBuffer_.toString();
308             }
309         }
310         return null;
311     }
312
313     /**
314     * Retrieves the extended name
315     */

316     public String JavaDoc getExtendedName(int ch)
317     {
318         String JavaDoc result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
319         if (result == null) {
320             if (getType(ch) == UCharacterCategory.CONTROL) {
321                 result = getName(ch,
322                                  UCharacterNameChoice.UNICODE_10_CHAR_NAME);
323             }
324             if (result == null) {
325                 result = getExtendedOr10Name(ch);
326             }
327         }
328         return result;
329     }
330
331     /**
332      * Gets the group index for the codepoint, or the group before it.
333      * @param codepoint
334      * @return group index containing codepoint or the group before it.
335      */

336     public int getGroup(int codepoint)
337     {
338         int endGroup = m_groupcount_;
339         int msb = getCodepointMSB(codepoint);
340         int result = 0;
341         // binary search for the group of names that contains the one for
342
// code
343
// find the group that contains codepoint, or the highest before it
344
while (result < endGroup - 1) {
345             int gindex = (result + endGroup) >> 1;
346             if (msb < getGroupMSB(gindex)) {
347                 endGroup = gindex;
348             }
349             else {
350                 result = gindex;
351             }
352         }
353         return result;
354     }
355
356     /**
357      * Gets the extended and 1.0 name when the most current unicode names
358      * fail
359      * @param ch codepoint
360      * @return name of codepoint extended or 1.0
361      */

362     public String JavaDoc getExtendedOr10Name(int ch)
363     {
364         String JavaDoc result = null;
365         if (getType(ch) == UCharacterCategory.CONTROL) {
366             result = getName(ch,
367                              UCharacterNameChoice.UNICODE_10_CHAR_NAME);
368         }
369         if (result == null) {
370             int type = getType(ch);
371             // Return unknown if the table of names above is not up to
372
// date.
373
if (type >= TYPE_NAMES_.length) {
374                 result = UNKNOWN_TYPE_NAME_;
375             }
376             else {
377                 result = TYPE_NAMES_[type];
378             }
379             synchronized (m_utilStringBuffer_) {
380                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
381                 m_utilStringBuffer_.append('<');
382                 m_utilStringBuffer_.append(result);
383                 m_utilStringBuffer_.append('-');
384                 String JavaDoc chStr = Integer.toHexString(ch).toUpperCase();
385                 int zeros = 4 - chStr.length();
386                 while (zeros > 0) {
387                     m_utilStringBuffer_.append('0');
388                     zeros --;
389                 }
390                 m_utilStringBuffer_.append(chStr);
391                 m_utilStringBuffer_.append('>');
392                 result = m_utilStringBuffer_.toString();
393             }
394         }
395         return result;
396     }
397
398     /**
399      * Gets the MSB from the group index
400      * @param gindex group index
401      * @return the MSB of the group if gindex is valid, -1 otherwise
402      */

403     public int getGroupMSB(int gindex)
404     {
405         if (gindex >= m_groupcount_) {
406             return -1;
407         }
408         return m_groupinfo_[gindex * m_groupsize_];
409     }
410
411     /**
412      * Gets the MSB of the codepoint
413      * @param codepoint
414      * @return the MSB of the codepoint
415      */

416     public static int getCodepointMSB(int codepoint)
417     {
418         return codepoint >> GROUP_SHIFT_;
419     }
420
421     /**
422      * Gets the maximum codepoint + 1 of the group
423      * @param msb most significant byte of the group
424      * @return limit codepoint of the group
425      */

426     public static int getGroupLimit(int msb)
427     {
428         return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
429     }
430
431     /**
432      * Gets the minimum codepoint of the group
433      * @param msb most significant byte of the group
434      * @return minimum codepoint of the group
435      */

436     public static int getGroupMin(int msb)
437     {
438         return msb << GROUP_SHIFT_;
439     }
440
441     /**
442      * Gets the offset to a group
443      * @param codepoint
444      * @return offset to a group
445      */

446     public static int getGroupOffset(int codepoint)
447     {
448         return codepoint & GROUP_MASK_;
449     }
450
451     /**
452      * Gets the minimum codepoint of a group
453      * @param codepoint
454      * @return minimum codepoint in the group which codepoint belongs to
455      */

456     ///CLOVER:OFF
457
public static int getGroupMinFromCodepoint(int codepoint)
458     {
459         return codepoint & ~GROUP_MASK_;
460     }
461     ///CLOVER:ON
462

463     /**
464      * Get the Algorithm range length
465      * @return Algorithm range length
466      */

467     public int getAlgorithmLength()
468     {
469         return m_algorithm_.length;
470     }
471
472     /**
473      * Gets the start of the range
474      * @param index algorithm index
475      * @return algorithm range start
476      */

477     public int getAlgorithmStart(int index)
478     {
479         return m_algorithm_[index].m_rangestart_;
480     }
481
482     /**
483      * Gets the end of the range
484      * @param index algorithm index
485      * @return algorithm range end
486      */

487     public int getAlgorithmEnd(int index)
488     {
489         return m_algorithm_[index].m_rangeend_;
490     }
491
492     /**
493      * Gets the Algorithmic name of the codepoint
494      * @param index algorithmic range index
495      * @param codepoint
496      * @return algorithmic name of codepoint
497      */

498     public String JavaDoc getAlgorithmName(int index, int codepoint)
499     {
500         String JavaDoc result = null;
501         synchronized (m_utilStringBuffer_) {
502             m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
503             m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
504             result = m_utilStringBuffer_.toString();
505         }
506         return result;
507     }
508
509     /**
510     * Gets the group name of the character
511     * @param ch character to get the group name
512     * @param choice name choice selector to choose a unicode 1.0 or newer name
513     */

514     public String JavaDoc getGroupName(int ch, int choice)
515     {
516         // gets the msb
517
int msb = getCodepointMSB(ch);
518         int group = getGroup(ch);
519
520         // return this if it is an exact match
521
if (msb == m_groupinfo_[group * m_groupsize_]) {
522             int index = getGroupLengths(group, m_groupoffsets_,
523                                         m_grouplengths_);
524             int offset = ch & GROUP_MASK_;
525             return getGroupName(index + m_groupoffsets_[offset],
526                                 m_grouplengths_[offset], choice);
527         }
528
529         return null;
530     }
531
532     // these are transliterator use methods ---------------------------------
533

534     /**
535      * Gets the maximum length of any codepoint name.
536      * Equivalent to uprv_getMaxCharNameLength.
537      * @return the maximum length of any codepoint name
538      */

539     public int getMaxCharNameLength()
540     {
541         if (initNameSetsLengths()) {
542             return m_maxNameLength_;
543         }
544         else {
545             return 0;
546         }
547     }
548
549     /**
550      * Gets the maximum length of any iso comments.
551      * Equivalent to uprv_getMaxISOCommentLength.
552      * @return the maximum length of any codepoint name
553      */

554     ///CLOVER:OFF
555
public int getMaxISOCommentLength()
556     {
557         if (initNameSetsLengths()) {
558             return m_maxISOCommentLength_;
559         }
560         else {
561             return 0;
562         }
563     }
564     ///CLOVER:ON
565

566     /**
567      * Fills set with characters that are used in Unicode character names.
568      * Equivalent to uprv_getCharNameCharacters.
569      * @param set USet to receive characters. Existing contents are deleted.
570      */

571     public void getCharNameCharacters(UnicodeSet set)
572     {
573         convert(m_nameSet_, set);
574     }
575
576     /**
577      * Fills set with characters that are used in Unicode character names.
578      * Equivalent to uprv_getISOCommentCharacters.
579      * @param set USet to receive characters. Existing contents are deleted.
580      */

581     ///CLOVER:OFF
582
public void getISOCommentCharacters(UnicodeSet set)
583     {
584         convert(m_ISOCommentSet_, set);
585     }
586     ///CLOVER:ON
587

588     // package private inner class --------------------------------------
589

590     /**
591     * Algorithmic name class
592     */

593     static final class AlgorithmName
594     {
595         // package private data members ----------------------------------
596

597         /**
598         * Constant type value of the different AlgorithmName
599         */

600         static final int TYPE_0_ = 0;
601         static final int TYPE_1_ = 1;
602
603         // package private constructors ----------------------------------
604

605         /**
606         * Constructor
607         */

608         AlgorithmName()
609         {
610         }
611
612         // package private methods ---------------------------------------
613

614         /**
615         * Sets the information for accessing the algorithmic names
616         * @param rangestart starting code point that lies within this name group
617         * @param rangeend end code point that lies within this name group
618         * @param type algorithm type. There's 2 kinds of algorithmic type. First
619         * which uses code point as part of its name and the other uses
620         * variant postfix strings
621         * @param variant algorithmic variant
622         * @return true if values are valid
623         */

624         boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
625         {
626             if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
627                 && rangeend <= UCharacter.MAX_VALUE &&
628                 (type == TYPE_0_ || type == TYPE_1_)) {
629                 m_rangestart_ = rangestart;
630                 m_rangeend_ = rangeend;
631                 m_type_ = type;
632                 m_variant_ = variant;
633                 return true;
634             }
635             return false;
636         }
637
638         /**
639         * Sets the factor data
640         * @param factor Array of factor
641         * @return true if factors are valid
642         */

643         boolean setFactor(char factor[])
644         {
645             if (factor.length == m_variant_) {
646                 m_factor_ = factor;
647                 return true;
648             }
649             return false;
650         }
651
652         /**
653         * Sets the name prefix
654         * @param prefix
655         * @return true if prefix is set
656         */

657         boolean setPrefix(String JavaDoc prefix)
658         {
659             if (prefix != null && prefix.length() > 0) {
660                 m_prefix_ = prefix;
661                 return true;
662             }
663             return false;
664         }
665
666         /**
667         * Sets the variant factorized name data
668         * @param string variant factorized name data
669         * @return true if values are set
670         */

671         boolean setFactorString(byte string[])
672         {
673             // factor and variant string can be empty for things like
674
// hanggul code points
675
m_factorstring_ = string;
676             return true;
677         }
678
679         /**
680         * Checks if code point lies in Algorithm object at index
681         * @param ch code point
682         */

683         boolean contains(int ch)
684         {
685             return m_rangestart_ <= ch && ch <= m_rangeend_;
686         }
687
688         /**
689         * Appends algorithm name of code point into StringBuffer.
690         * Note this method does not check for validity of code point in Algorithm,
691         * result is undefined if code point does not belong in Algorithm.
692         * @param ch code point
693         * @param str StringBuffer to append to
694         */

695         void appendName(int ch, StringBuffer JavaDoc str)
696         {
697             str.append(m_prefix_);
698             switch (m_type_)
699             {
700                 case TYPE_0_:
701                     // prefix followed by hex digits indicating variants
702
Utility.hex(ch, m_variant_, str);
703                     break;
704                 case TYPE_1_:
705                     // prefix followed by factorized-elements
706
int offset = ch - m_rangestart_;
707                     int indexes[] = m_utilIntBuffer_;
708                     int factor;
709
710                     // write elements according to the factors
711
// the factorized elements are determined by modulo
712
// arithmetic
713
synchronized (m_utilIntBuffer_) {
714                         for (int i = m_variant_ - 1; i > 0; i --)
715                         {
716                             factor = m_factor_[i] & 0x00FF;
717                             indexes[i] = offset % factor;
718                             offset /= factor;
719                         }
720
721                         // we don't need to calculate the last modulus because
722
// start <= code <= end guarantees here that
723
// code <= factors[0]
724
indexes[0] = offset;
725
726                         // joining up the factorized strings
727
str.append(getFactorString(indexes, m_variant_));
728                     }
729                     break;
730             }
731         }
732
733         /**
734         * Gets the character for the argument algorithmic name
735         * @return the algorithmic char or -1 otherwise.
736         */

737         int getChar(String JavaDoc name)
738         {
739             int prefixlen = m_prefix_.length();
740             if (name.length() < prefixlen ||
741                 !m_prefix_.equals(name.substring(0, prefixlen))) {
742                 return -1;
743             }
744
745             switch (m_type_)
746             {
747                 case TYPE_0_ :
748                 try
749                 {
750                     int result = Integer.parseInt(name.substring(prefixlen),
751                                                   16);
752                     // does it fit into the range?
753
if (m_rangestart_ <= result && result <= m_rangeend_) {
754                         return result;
755                     }
756                 }
757                 catch (NumberFormatException JavaDoc e)
758                 {
759                     return -1;
760                 }
761                 break;
762                 case TYPE_1_ :
763                     // repetitative suffix name comparison done here
764
// offset is the character code - start
765
for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
766                     {
767                         int offset = ch - m_rangestart_;
768                         int indexes[] = m_utilIntBuffer_;
769                         int factor;
770
771                         // write elements according to the factors
772
// the factorized elements are determined by modulo
773
// arithmetic
774
synchronized (m_utilIntBuffer_) {
775                             for (int i = m_variant_ - 1; i > 0; i --)
776                             {
777                                 factor = m_factor_[i] & 0x00FF;
778                                 indexes[i] = offset % factor;
779                                 offset /= factor;
780                             }
781
782                             // we don't need to calculate the last modulus
783
// because start <= code <= end guarantees here that
784
// code <= factors[0]
785
indexes[0] = offset;
786
787                             // joining up the factorized strings
788
if (compareFactorString(indexes, m_variant_, name,
789                                                     prefixlen)) {
790                                 return ch;
791                             }
792                         }
793                     }
794             }
795
796             return -1;
797         }
798
799         /**
800          * Adds all chars in the set of algorithmic names into the set.
801          * Equivalent to part of calcAlgNameSetsLengths.
802          * @param set int set to add the chars of the algorithm names into
803          * @param maxlength maximum length to compare to
804          * @return the length that is either maxlength of the length of this
805          * algorithm name if it is longer than maxlength
806          */

807         int add(int set[], int maxlength)
808         {
809             // prefix length
810
int length = UCharacterName.add(set, m_prefix_);
811             switch (m_type_) {
812                 case TYPE_0_ : {
813                     // name = prefix + (range->variant times) hex-digits
814
// prefix
815
length += m_variant_;
816                     /* synwee to check
817                      * addString(set, (const char *)(range + 1))
818                                        + range->variant;*/

819                     break;
820                 }
821                 case TYPE_1_ : {
822                     // name = prefix factorized-elements
823
// get the set and maximum factor suffix length for each
824
// factor
825
for (int i = m_variant_ - 1; i > 0; i --)
826                     {
827                         int maxfactorlength = 0;
828                         int count = 0;
829                         for (int factor = m_factor_[i]; factor > 0; -- factor) {
830                             synchronized (m_utilStringBuffer_) {
831                                 m_utilStringBuffer_.delete(0,
832                                                 m_utilStringBuffer_.length());
833                                 count
834                                   = UCharacterUtility.getNullTermByteSubString(
835                                                 m_utilStringBuffer_,
836                                                 m_factorstring_, count);
837                                 UCharacterName.add(set, m_utilStringBuffer_);
838                                 if (m_utilStringBuffer_.length()
839                                                             > maxfactorlength)
840                                 {
841                                     maxfactorlength
842                                                 = m_utilStringBuffer_.length();
843                                 }
844                             }
845                         }
846                         length += maxfactorlength;
847                     }
848                 }
849             }
850             if (length > maxlength) {
851                 return length;
852             }
853             return maxlength;
854         }
855
856         // private data members ------------------------------------------
857

858         /**
859         * Algorithmic data information
860         */

861         private int m_rangestart_;
862         private int m_rangeend_;
863         private byte m_type_;
864         private byte m_variant_;
865         private char m_factor_[];
866         private String JavaDoc m_prefix_;
867         private byte m_factorstring_[];
868         /**
869          * Utility StringBuffer
870          */

871         private StringBuffer JavaDoc m_utilStringBuffer_ = new StringBuffer JavaDoc();
872         /**
873          * Utility int buffer
874          */

875         private int m_utilIntBuffer_[] = new int[256];
876
877         // private methods -----------------------------------------------
878

879         /**
880         * Gets the indexth string in each of the argument factor block
881         * @param index array with each index corresponding to each factor block
882         * @param length length of the array index
883         * @return the combined string of the array of indexth factor string in
884         * factor block
885         */

886         private String JavaDoc getFactorString(int index[], int length)
887         {
888             int size = m_factor_.length;
889             if (index == null || length != size) {
890                 return null;
891             }
892
893             synchronized (m_utilStringBuffer_) {
894                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
895                 int count = 0;
896                 int factor;
897                 size --;
898                 for (int i = 0; i <= size; i ++) {
899                     factor = m_factor_[i];
900                     count = UCharacterUtility.skipNullTermByteSubString(
901                                              m_factorstring_, count, index[i]);
902                     count = UCharacterUtility.getNullTermByteSubString(
903                                           m_utilStringBuffer_, m_factorstring_,
904                                           count);
905                     if (i != size) {
906                         count = UCharacterUtility.skipNullTermByteSubString(
907                                                        m_factorstring_, count,
908                                                        factor - index[i] - 1);
909                     }
910                 }
911                 return m_utilStringBuffer_.toString();
912             }
913         }
914
915         /**
916         * Compares the indexth string in each of the argument factor block with
917         * the argument string
918         * @param index array with each index corresponding to each factor block
919         * @param length index array length
920         * @param str string to compare with
921         * @param offset of str to start comparison
922         * @return true if string matches
923         */

924         private boolean compareFactorString(int index[], int length, String JavaDoc str,
925                                             int offset)
926         {
927             int size = m_factor_.length;
928             if (index == null || length != size)
929                 return false;
930
931             int count = 0;
932             int strcount = offset;
933             int factor;
934             size --;
935             for (int i = 0; i <= size; i ++)
936             {
937                 factor = m_factor_[i];
938                 count = UCharacterUtility.skipNullTermByteSubString(
939                                           m_factorstring_, count, index[i]);
940                 strcount = UCharacterUtility.compareNullTermByteSubString(str,
941                                           m_factorstring_, strcount, count);
942                 if (strcount < 0) {
943                     return false;
944                 }
945
946                 if (i != size) {
947                     count = UCharacterUtility.skipNullTermByteSubString(
948                                   m_factorstring_, count, factor - index[i]);
949                 }
950             }
951             if (strcount != str.length()) {
952                 return false;
953             }
954             return true;
955         }
956     }
957
958     // package private data members --------------------------------------
959

960     /**
961      * Size of each groups
962      */

963     int m_groupsize_ = 0;
964
965     // package private methods --------------------------------------------
966

967     /**
968     * Sets the token data
969     * @param token array of tokens
970     * @param tokenstring array of string values of the tokens
971     * @return false if there is a data error
972     */

973     boolean setToken(char token[], byte tokenstring[])
974     {
975         if (token != null && tokenstring != null && token.length > 0 &&
976             tokenstring.length > 0) {
977             m_tokentable_ = token;
978             m_tokenstring_ = tokenstring;
979             return true;
980         }
981         return false;
982     }
983
984     /**
985     * Set the algorithm name information array
986     * @param alg Algorithm information array
987     * @return true if the group string offset has been set correctly
988     */

989     boolean setAlgorithm(AlgorithmName alg[])
990     {
991         if (alg != null && alg.length != 0) {
992             m_algorithm_ = alg;
993             return true;
994         }
995         return false;
996     }
997
998     /**
999     * Sets the number of group and size of each group in number of char
1000    * @param count number of groups
1001    * @param size size of group in char
1002    * @return true if group size is set correctly
1003    */

1004    boolean setGroupCountSize(int count, int size)
1005    {
1006        if (count <= 0 || size <= 0) {
1007            return false;
1008        }
1009        m_groupcount_ = count;
1010        m_groupsize_ = size;
1011        return true;
1012    }
1013
1014    /**
1015    * Sets the group name data
1016    * @param group index information array
1017    * @param groupstring name information array
1018    * @return false if there is a data error
1019    */

1020    boolean setGroup(char group[], byte groupstring[])
1021    {
1022        if (group != null && groupstring != null && group.length > 0 &&
1023            groupstring.length > 0) {
1024            m_groupinfo_ = group;
1025            m_groupstring_ = groupstring;
1026            return true;
1027        }
1028        return false;
1029    }
1030
1031    // private data members ----------------------------------------------
1032

1033    /**
1034    * Data used in unames.icu
1035    */

1036    private char m_tokentable_[];
1037    private byte m_tokenstring_[];
1038    private char m_groupinfo_[];
1039    private byte m_groupstring_[];
1040    private AlgorithmName m_algorithm_[];
1041
1042    /**
1043    * Group use
1044    */

1045    private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
1046    private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
1047
1048    /**
1049    * Default name of the name datafile
1050    */

1051    private static final String JavaDoc NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/unames.icu";
1052    /**
1053    * Shift count to retrieve group information
1054    */

1055    private static final int GROUP_SHIFT_ = 5;
1056    /**
1057    * Mask to retrieve the offset for a particular character within a group
1058    */

1059    private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
1060    /**
1061    * Default buffer size of datafile
1062    */

1063    private static final int NAME_BUFFER_SIZE_ = 100000;
1064
1065    /**
1066    * Position of offsethigh in group information array
1067    */

1068    private static final int OFFSET_HIGH_OFFSET_ = 1;
1069
1070    /**
1071    * Position of offsetlow in group information array
1072    */

1073    private static final int OFFSET_LOW_OFFSET_ = 2;
1074    /**
1075    * Double nibble indicator, any nibble > this number has to be combined
1076    * with its following nibble
1077    */

1078    private static final int SINGLE_NIBBLE_MAX_ = 11;
1079
1080    /*
1081     * Maximum length of character names (regular & 1.0).
1082     */

1083    //private static int MAX_NAME_LENGTH_ = 0;
1084
/*
1085     * Maximum length of ISO comments.
1086     */

1087    //private static int MAX_ISO_COMMENT_LENGTH_ = 0;
1088

1089    /**
1090     * Set of chars used in character names (regular & 1.0).
1091     * Chars are platform-dependent (can be EBCDIC).
1092     */

1093    private int m_nameSet_[] = new int[8];
1094    /**
1095     * Set of chars used in ISO comments. (regular & 1.0).
1096     * Chars are platform-dependent (can be EBCDIC).
1097     */

1098    private int m_ISOCommentSet_[] = new int[8];
1099    /**
1100     * Utility StringBuffer
1101     */

1102    private StringBuffer JavaDoc m_utilStringBuffer_ = new StringBuffer JavaDoc();
1103    /**
1104     * Utility int buffer
1105     */

1106    private int m_utilIntBuffer_[] = new int[2];
1107    /**
1108     * Maximum ISO comment length
1109     */

1110    private int m_maxISOCommentLength_;
1111    /**
1112     * Maximum name length
1113     */

1114    private int m_maxNameLength_;
1115    /**
1116     * Singleton instance
1117     */

1118    private static UCharacterName INSTANCE_ = null;
1119    /**
1120     * Type names used for extended names
1121     */

1122    private static final String JavaDoc TYPE_NAMES_[] = {"unassigned",
1123                                                 "uppercase letter",
1124                                                 "lowercase letter",
1125                                                 "titlecase letter",
1126                                                 "modifier letter",
1127                                                 "other letter",
1128                                                 "non spacing mark",
1129                                                 "enclosing mark",
1130                                                 "combining spacing mark",
1131                                                 "decimal digit number",
1132                                                 "letter number",
1133                                                 "other number",
1134                                                 "space separator",
1135                                                 "line separator",
1136                                                 "paragraph separator",
1137                                                 "control",
1138                                                 "format",
1139                                                 "private use area",
1140                                                 "surrogate",
1141                                                 "dash punctuation",
1142                                                 "start punctuation",
1143                                                 "end punctuation",
1144                                                 "connector punctuation",
1145                                                 "other punctuation",
1146                                                 "math symbol",
1147                                                 "currency symbol",
1148                                                 "modifier symbol",
1149                                                 "other symbol",
1150                                                 "initial punctuation",
1151                                                 "final punctuation",
1152                                                 "noncharacter",
1153                                                 "lead surrogate",
1154                                                 "trail surrogate"};
1155    /**
1156     * Unknown type name
1157     */

1158    private static final String JavaDoc UNKNOWN_TYPE_NAME_ = "unknown";
1159    /**
1160     * Not a character type
1161     */

1162    private static final int NON_CHARACTER_
1163                                    = UCharacterCategory.CHAR_CATEGORY_COUNT;
1164    /**
1165    * Lead surrogate type
1166    */

1167    private static final int LEAD_SURROGATE_
1168                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
1169    /**
1170    * Trail surrogate type
1171    */

1172    private static final int TRAIL_SURROGATE_
1173                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
1174    /**
1175    * Extended category count
1176    */

1177    static final int EXTENDED_CATEGORY_
1178                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
1179
1180    // private constructor ------------------------------------------------
1181

1182    /**
1183    * <p>Protected constructor for use in UCharacter.</p>
1184    * @exception IOException thrown when data reading fails
1185    */

1186    private UCharacterName() throws IOException JavaDoc
1187    {
1188        InputStream JavaDoc is = ICUData.getRequiredStream(NAME_FILE_NAME_);
1189        BufferedInputStream JavaDoc b = new BufferedInputStream JavaDoc(is, NAME_BUFFER_SIZE_);
1190        UCharacterNameReader reader = new UCharacterNameReader(b);
1191        reader.read(this);
1192        b.close();
1193    }
1194
1195    // private methods ---------------------------------------------------
1196

1197    /**
1198    * Gets the algorithmic name for the argument character
1199    * @param ch character to determine name for
1200    * @param choice name choice
1201    * @return the algorithmic name or null if not found
1202    */

1203    private String JavaDoc getAlgName(int ch, int choice)
1204    {
1205        // Do not write algorithmic Unicode 1.0 names because Unihan names are
1206
// the same as the modern ones, extension A was only introduced with
1207
// Unicode 3.0, and the Hangul syllable block was moved and changed
1208
// around Unicode 1.1.5.
1209
if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
1210            // index in terms integer index
1211
synchronized (m_utilStringBuffer_) {
1212                m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
1213
1214                for (int index = m_algorithm_.length - 1; index >= 0; index --)
1215                {
1216                   if (m_algorithm_[index].contains(ch)) {
1217                      m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
1218                      return m_utilStringBuffer_.toString();
1219                   }
1220                }
1221            }
1222        }
1223        return null;
1224    }
1225
1226    /**
1227    * Getting the character with the tokenized argument name
1228    * @param name of the character
1229    * @return character with the tokenized argument name or -1 if character
1230    * is not found
1231    */

1232    private synchronized int getGroupChar(String JavaDoc name, int choice)
1233    {
1234        for (int i = 0; i < m_groupcount_; i ++) {
1235            // populating the data set of grouptable
1236

1237            int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
1238                                                  m_grouplengths_);
1239
1240            // shift out to function
1241
int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
1242                                      choice);
1243            if (result != -1) {
1244                return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
1245                         | result;
1246            }
1247        }
1248        return -1;
1249    }
1250
1251    /**
1252    * Compares and retrieve character if name is found within the argument
1253    * group
1254    * @param index index where the set of names reside in the group block
1255    * @param length list of lengths of the strings
1256    * @param name character name to search for
1257    * @param choice of either 1.0 or the most current unicode name
1258    * @return relative character in the group which matches name, otherwise if
1259    * not found, -1 will be returned
1260    */

1261    private int getGroupChar(int index, char length[], String JavaDoc name,
1262                             int choice)
1263    {
1264        byte b = 0;
1265        char token;
1266        int len;
1267        int namelen = name.length();
1268        int nindex;
1269        int count;
1270
1271        for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
1272            nindex = 0;
1273            len = length[result];
1274
1275            if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
1276                int oldindex = index;
1277                index += UCharacterUtility.skipByteSubString(m_groupstring_,
1278                                                     index, len, (byte)';');
1279                len -= (index - oldindex);
1280            }
1281
1282            // number of tokens is > the length of the name
1283
// write each letter directly, and write a token word per token
1284
for (count = 0; count < len && nindex != -1 && nindex < namelen;
1285                ) {
1286                b = m_groupstring_[index + count];
1287                count ++;
1288
1289                if (b >= m_tokentable_.length) {
1290                    if (name.charAt(nindex ++) != (b & 0xFF)) {
1291                        nindex = -1;
1292                    }
1293                }
1294                else {
1295                    token = m_tokentable_[b & 0xFF];
1296                    if (token == 0xFFFE) {
1297                        // this is a lead byte for a double-byte token
1298
token = m_tokentable_[b << 8 |
1299                                   (m_groupstring_[index + count] & 0x00ff)];
1300                        count ++;
1301                    }
1302                    if (token == 0xFFFF) {
1303                        if (name.charAt(nindex ++) != (b & 0xFF)) {
1304                            nindex = -1;
1305                        }
1306                    }
1307                    else {
1308                        // compare token with name
1309
nindex = UCharacterUtility.compareNullTermByteSubString(
1310                                        name, m_tokenstring_, nindex, token);
1311                    }
1312                }
1313            }
1314
1315            if (namelen == nindex &&
1316                (count == len || m_groupstring_[index + count] == ';')) {
1317                return result;
1318            }
1319
1320            index += len;
1321        }
1322        return -1;
1323    }
1324
1325    /**
1326    * Gets the character extended type
1327    * @param ch character to be tested
1328    * @return extended type it is associated with
1329    */

1330    private static int getType(int ch)
1331    {
1332        if (UCharacterUtility.isNonCharacter(ch)) {
1333            // not a character we return a invalid category count
1334
return NON_CHARACTER_;
1335        }
1336        int result = UCharacter.getType(ch);
1337        if (result == UCharacterCategory.SURROGATE) {
1338            if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
1339                result = LEAD_SURROGATE_;
1340            }
1341            else {
1342                result = TRAIL_SURROGATE_;
1343            }
1344        }
1345        return result;
1346    }
1347
1348    /**
1349    * Getting the character with extended name of the form <....>.
1350    * @param name of the character to be found
1351    * @param choice name choice
1352    * @return character associated with the name, -1 if such character is not
1353    * found and -2 if we should continue with the search.
1354    */

1355    private static int getExtendedChar(String JavaDoc name, int choice)
1356    {
1357        if (name.charAt(0) == '<') {
1358            if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
1359                int endIndex = name.length() - 1;
1360                if (name.charAt(endIndex) == '>') {
1361                    int startIndex = name.lastIndexOf('-');
1362                    if (startIndex >= 0) { // We've got a category.
1363
startIndex ++;
1364                        int result = -1;
1365                        try {
1366                            result = Integer.parseInt(
1367                                        name.substring(startIndex, endIndex),
1368                                        16);
1369                        }
1370                        catch (NumberFormatException JavaDoc e) {
1371                            return -1;
1372                        }
1373                        // Now validate the category name. We could use a
1374
// binary search, or a trie, if we really wanted to.
1375
String JavaDoc type = name.substring(1, startIndex - 1);
1376                        int length = TYPE_NAMES_.length;
1377                        for (int i = 0; i < length; ++ i) {
1378                            if (type.compareTo(TYPE_NAMES_[i]) == 0) {
1379                                if (getType(result) == i) {
1380                                    return result;
1381                                }
1382                                break;
1383                            }
1384                        }
1385                    }
1386                }
1387            }
1388            return -1;
1389        }
1390        return -2;
1391    }
1392
1393    // sets of name characters, maximum name lengths -----------------------
1394

1395    /**
1396     * Adds a codepoint into a set of ints.
1397     * Equivalent to SET_ADD.
1398     * @param set set to add to
1399     * @param ch 16 bit char to add
1400     */

1401    private static void add(int set[], char ch)
1402    {
1403        set[ch >>> 5] |= 1 << (ch & 0x1f);
1404    }
1405
1406    /**
1407     * Checks if a codepoint is a part of a set of ints.
1408     * Equivalent to SET_CONTAINS.
1409     * @param set set to check in
1410     * @param ch 16 bit char to check
1411     * @return true if codepoint is part of the set, false otherwise
1412     */

1413    private static boolean contains(int set[], char ch)
1414    {
1415        return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0;
1416    }
1417
1418    /**
1419     * Adds all characters of the argument str and gets the length
1420     * Equivalent to calcStringSetLength.
1421     * @param set set to add all chars of str to
1422     * @param str string to add
1423     */

1424    private static int add(int set[], String JavaDoc str)
1425    {
1426        int result = str.length();
1427
1428        for (int i = result - 1; i >= 0; i --) {
1429            add(set, str.charAt(i));
1430        }
1431        return result;
1432    }
1433
1434    /**
1435     * Adds all characters of the argument str and gets the length
1436     * Equivalent to calcStringSetLength.
1437     * @param set set to add all chars of str to
1438     * @param str string to add
1439     */

1440    private static int add(int set[], StringBuffer JavaDoc str)
1441    {
1442        int result = str.length();
1443
1444        for (int i = result - 1; i >= 0; i --) {
1445            add(set, str.charAt(i));
1446        }
1447        return result;
1448    }
1449
1450    /**
1451     * Adds all algorithmic names into the name set.
1452     * Equivalent to part of calcAlgNameSetsLengths.
1453     * @param maxlength length to compare to
1454     * @return the maximum length of any possible algorithmic name if it is >
1455     * maxlength, otherwise maxlength is returned.
1456     */

1457    private int addAlgorithmName(int maxlength)
1458    {
1459        int result = 0;
1460        for (int i = m_algorithm_.length - 1; i >= 0; i --) {
1461            result = m_algorithm_[i].add(m_nameSet_, maxlength);
1462            if (result > maxlength) {
1463                maxlength = result;
1464            }
1465        }
1466        return maxlength;
1467    }
1468
1469    /**
1470     * Adds all extended names into the name set.
1471     * Equivalent to part of calcExtNameSetsLengths.
1472     * @param maxlength length to compare to
1473     * @return the maxlength of any possible extended name.
1474     */

1475    private int addExtendedName(int maxlength)
1476    {
1477        for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) {
1478            // for each category, count the length of the category name
1479
// plus 9 =
1480
// 2 for <>
1481
// 1 for -
1482
// 6 for most hex digits per code point
1483
int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
1484            if (length > maxlength) {
1485                maxlength = length;
1486            }
1487        }
1488        return maxlength;
1489    }
1490
1491    /**
1492     * Adds names of a group to the argument set.
1493     * Equivalent to calcNameSetLength.
1494     * @param offset of the group name string in byte count
1495     * @param length of the group name string
1496     * @param tokenlength array to store the length of each token
1497     * @param set to add to
1498     * @return the length of the name string and the length of the group
1499     * string parsed
1500     */

1501    private int[] addGroupName(int offset, int length, byte tokenlength[],
1502                               int set[])
1503    {
1504        int resultnlength = 0;
1505        int resultplength = 0;
1506        while (resultplength < length) {
1507            char b = (char)(m_groupstring_[offset + resultplength] & 0xff);
1508            resultplength ++;
1509            if (b == ';') {
1510                break;
1511            }
1512
1513            if (b >= m_tokentable_.length) {
1514                add(set, b); // implicit letter
1515
resultnlength ++;
1516            }
1517            else {
1518                char token = m_tokentable_[b & 0x00ff];
1519                if (token == 0xFFFE) {
1520                    // this is a lead byte for a double-byte token
1521
b = (char)(b << 8 | (m_groupstring_[offset + resultplength]
1522                                         & 0x00ff));
1523                    token = m_tokentable_[b];
1524                    resultplength ++;
1525                }
1526                if (token == 0xFFFF) {
1527                    add(set, b);
1528                    resultnlength ++;
1529                }
1530                else {
1531                    // count token word
1532
// use cached token length
1533
byte tlength = tokenlength[b];
1534                    if (tlength == 0) {
1535                        synchronized (m_utilStringBuffer_) {
1536                            m_utilStringBuffer_.delete(0,
1537                                                 m_utilStringBuffer_.length());
1538                            UCharacterUtility.getNullTermByteSubString(
1539                                           m_utilStringBuffer_, m_tokenstring_,
1540                                           token);
1541                            tlength = (byte)add(set, m_utilStringBuffer_);
1542                        }
1543                        tokenlength[b] = tlength;
1544                    }
1545                    resultnlength += tlength;
1546                }
1547            }
1548        }
1549        m_utilIntBuffer_[0] = resultnlength;
1550        m_utilIntBuffer_[1] = resultplength;
1551        return m_utilIntBuffer_;
1552    }
1553
1554    /**
1555     * Adds names of all group to the argument set.
1556     * Sets the data member m_max*Length_.
1557     * Method called only once.
1558     * Equivalent to calcGroupNameSetsLength.
1559     * @param maxlength length to compare to
1560     */

1561    private void addGroupName(int maxlength)
1562    {
1563        int maxisolength = 0;
1564        char offsets[] = new char[LINES_PER_GROUP_ + 2];
1565        char lengths[] = new char[LINES_PER_GROUP_ + 2];
1566        byte tokenlengths[] = new byte[m_tokentable_.length];
1567
1568        // enumerate all groups
1569
// for (int i = m_groupcount_ - 1; i >= 0; i --) {
1570
for (int i = 0; i < m_groupcount_ ; i ++) {
1571            int offset = getGroupLengths(i, offsets, lengths);
1572            // enumerate all lines in each group
1573
// for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
1574
// linenumber --) {
1575
for (int linenumber = 0; linenumber < LINES_PER_GROUP_;
1576                linenumber ++) {
1577                int lineoffset = offset + offsets[linenumber];
1578                int length = lengths[linenumber];
1579                if (length == 0) {
1580                    continue;
1581                }
1582
1583                // read regular name
1584
int parsed[] = addGroupName(lineoffset, length, tokenlengths,
1585                                            m_nameSet_);
1586                if (parsed[0] > maxlength) {
1587                    // 0 for name length
1588
maxlength = parsed[0];
1589                }
1590                lineoffset += parsed[1];
1591                if (parsed[1] >= length) {
1592                    // 1 for parsed group string length
1593
continue;
1594                }
1595                length -= parsed[1];
1596                // read Unicode 1.0 name
1597
parsed = addGroupName(lineoffset, length, tokenlengths,
1598                                      m_nameSet_);
1599                if (parsed[0] > maxlength) {
1600                    // 0 for name length
1601
maxlength = parsed[0];
1602                }
1603                lineoffset += parsed[1];
1604                if (parsed[1] >= length) {
1605                    // 1 for parsed group string length
1606
continue;
1607                }
1608                length -= parsed[1];
1609                // read ISO comment
1610
parsed = addGroupName(lineoffset, length, tokenlengths,
1611                                      m_ISOCommentSet_);
1612                if (parsed[1] > maxisolength) {
1613                    maxisolength = length;
1614                }
1615            }
1616        }
1617
1618        // set gMax... - name length last for threading
1619
m_maxISOCommentLength_ = maxisolength;
1620        m_maxNameLength_ = maxlength;
1621    }
1622
1623    /**
1624     * Sets up the name sets and the calculation of the maximum lengths.
1625     * Equivalent to calcNameSetsLengths.
1626     */

1627    private boolean initNameSetsLengths()
1628    {
1629        if (m_maxNameLength_ > 0) {
1630            return true;
1631        }
1632
1633        String JavaDoc extra = "0123456789ABCDEF<>-";
1634        // set hex digits, used in various names, and <>-, used in extended
1635
// names
1636
for (int i = extra.length() - 1; i >= 0; i --) {
1637            add(m_nameSet_, extra.charAt(i));
1638        }
1639
1640        // set sets and lengths from algorithmic names
1641
m_maxNameLength_ = addAlgorithmName(0);
1642        // set sets and lengths from extended names
1643
m_maxNameLength_ = addExtendedName(m_maxNameLength_);
1644        // set sets and lengths from group names, set global maximum values
1645
addGroupName(m_maxNameLength_);
1646        return true;
1647    }
1648
1649    /**
1650     * Converts the char set cset into a Unicode set uset.
1651     * Equivalent to charSetToUSet.
1652     * @param set Set of 256 bit flags corresponding to a set of chars.
1653     * @param uset USet to receive characters. Existing contents are deleted.
1654     */

1655    private void convert(int set[], UnicodeSet uset)
1656    {
1657        uset.clear();
1658        if (!initNameSetsLengths()) {
1659            return;
1660        }
1661
1662        // build a char string with all chars that are used in character names
1663
for (char c = 255; c > 0; c --) {
1664            if (contains(set, c)) {
1665                uset.add(c);
1666            }
1667        }
1668    }
1669}
1670
Popular Tags