UCharacterName


1   /**
2   *******************************************************************************
3   * Copyright (C) 1996-2006, International Business Machines Corporation and    *
4   * others. All Rights Reserved.                                                *
5   *******************************************************************************
6   */
7   package com.ibm.icu.impl;
8   
9   import java.io.InputStream  ;
10  import java.io.BufferedInputStream  ;
11  import java.io.IOException  ;
12  import java.util.MissingResourceException  ;
13  
14  import com.ibm.icu.text.UTF16;
15  import com.ibm.icu.text.UnicodeSet;
16  import com.ibm.icu.lang.UCharacter;
17  import com.ibm.icu.lang.UCharacterCategory;
18  
19  /**
20  * Internal class to manage character names.
21  * Since data for names are stored
22  * in an array of char, by default indexes used in this class is refering to
23  * a 2 byte count, unless otherwise stated. Cases where the index is refering
24  * to a byte count, the index is halved and depending on whether the index is
25  * even or odd, the MSB or LSB of the result char at the halved index is
26  * returned. For indexes to an array of int, the index is multiplied by 2,
27  * result char at the multiplied index and its following char is returned as an
28  * int.
29  * <a HREF=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class
30  * Note : 0 - 0x1F are control characters without names in Unicode 3.0
31  * @author Syn Wee Quek
32  * @since nov0700
33  */
34  
35  public final class UCharacterName
36  {
37      // public data members ----------------------------------------------
38  
39      /**
40      * Number of lines per group
41      * 1 << GROUP_SHIFT_
42      */
43      public static final int LINES_PER_GROUP_ = 1 << 5;
44      /**
45       * Maximum number of groups
46       */
47      public int m_groupcount_ = 0;
48  
49      // public methods ---------------------------------------------------
50  
51      /**
52       * Gets the only instance of UCharacterName
53       * @return only instance of UCharacterName
54       * @exception MissingResourceException thrown when reading of name data fails
55       */
56      public static UCharacterName getInstance()
57      {
58          if (INSTANCE_ == null) {
59              try {
60                  INSTANCE_ = new UCharacterName();
61              }catch(IOException   e){
62                  throw new MissingResourceException  ("Could not construct UCharacterName. Missing unames.icu","","");
63              }
64              catch (Exception   e) {
65                  throw new MissingResourceException  (e.getMessage(),"","");
66              }
67          }
68          return INSTANCE_;
69      }
70  
71      /**
72      * Retrieve the name of a Unicode code point.
73      * Depending on <code>choice</code>, the character name written into the
74      * buffer is the "modern" name or the name that was defined in Unicode
75      * version 1.0.
76      * The name contains only "invariant" characters
77      * like A-Z, 0-9, space, and '-'.
78      *
79      * @param ch the code point for which to get the name.
80      * @param choice Selector for which name to get.
81      * @return if code point is above 0x1fff, null is returned
82      */
83      public String   getName(int ch, int choice)
84      {
85          if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
86              choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
87              return null;
88          }
89  
90          String   result = null;
91  
92          result = getAlgName(ch, choice);
93  
94          // getting normal character name
95          if (result == null || result.length() == 0) {
96              if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
97                  result = getExtendedName(ch);
98              } else {
99                  result = getGroupName(ch, choice);
100             }
101         }
102 
103         return result;
104     }
105 
106     /**
107     * Find a character by its name and return its code point value
108     * @param choice selector to indicate if argument name is a Unicode 1.0
109     *        or the most current version
110     * @param name the name to search for
111     * @return code point
112     */
113     public int getCharFromName(int choice, String   name)
114     {
115         // checks for illegal arguments
116         if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT ||
117             name == null || name.length() == 0) {
118             return -1;
119         }
120 
121         // try extended names first
122         int result = getExtendedChar(name.toLowerCase(), choice);
123         if (result >= -1) {
124             return result;
125         }
126 
127         String   upperCaseName = name.toUpperCase();
128         // try algorithmic names first, if fails then try group names
129         // int result = getAlgorithmChar(choice, uppercasename);
130 
131         if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
132             int count = 0;
133             if (m_algorithm_ != null) {
134                 count = m_algorithm_.length;
135             }
136             for (count --; count >= 0; count --) {
137                 result = m_algorithm_[count].getChar(upperCaseName);
138                 if (result >= 0) {
139                     return result;
140                 }
141             }
142         }
143 
144         if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
145             result = getGroupChar(upperCaseName,
146                                   UCharacterNameChoice.UNICODE_CHAR_NAME);
147             if (result == -1) {
148                 result = getGroupChar(upperCaseName,
149                                   UCharacterNameChoice.UNICODE_10_CHAR_NAME);
150             }
151         }
152         else {
153             result = getGroupChar(upperCaseName, choice);
154         }
155         return result;
156     }
157 
158     // these are all UCharacterNameIterator use methods -------------------
159 
160     /**
161     * Reads a block of compressed lengths of 32 strings and expands them into
162     * offsets and lengths for each string. Lengths are stored with a
163     * variable-width encoding in consecutive nibbles:
164     * If a nibble<0xc, then it is the length itself (0 = empty string).
165     * If a nibble>=0xc, then it forms a length value with the following
166     * nibble.
167     * The offsets and lengths arrays must be at least 33 (one more) long
168     * because there is no check here at the end if the last nibble is still
169     * used.
170     * @param index of group string object in array
171     * @param offsets array to store the value of the string offsets
172     * @param lengths array to store the value of the string length
173     * @return next index of the data string immediately after the lengths
174     *         in terms of byte address
175     */
176     public int getGroupLengths(int index, char offsets[], char lengths[])
177     {
178         char length = 0xffff;
179         byte b = 0,
180             n = 0;
181         int shift;
182         index = index * m_groupsize_; // byte count offsets of group strings
183         int stringoffset = UCharacterUtility.toInt(
184                                  m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
185                                  m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
186 
187         offsets[0] = 0;
188 
189         // all 32 lengths must be read to get the offset of the first group
190         // string
191         for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
192             b = m_groupstring_[stringoffset];
193             shift = 4;
194 
195             while (shift >= 0) {
196                 // getting nibble
197                 n = (byte)((b >> shift) & 0x0F);
198                 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
199                     length = (char)((n - 12) << 4);
200                 }
201                 else {
202                     if (length != 0xffff) {
203                        lengths[i] = (char)((length | n) + 12);
204                     }
205                     else {
206                        lengths[i] = (char)n;
207                     }
208 
209                     if (i < LINES_PER_GROUP_) {
210                        offsets[i + 1] = (char)(offsets[i] + lengths[i]);
211                     }
212 
213                     length = 0xffff;
214                     i ++;
215                 }
216 
217                 shift -= 4;
218             }
219         }
220         return stringoffset;
221     }
222 
223     /**
224     * Gets the name of the argument group index.
225     * UnicodeData.txt uses ';' as a field separator, so no field can contain
226     * ';' as part of its contents. In unames.icu, it is marked as
227     * token[';'] == -1 only if the semicolon is used in the data file - which
228     * is iff we have Unicode 1.0 names or ISO comments.
229     * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments
230     * although we know that it will never be part of a name.
231     * Equivalent to ICU4C's expandName.
232     * @param index of the group name string in byte count
233     * @param length of the group name string
234     * @param choice of Unicode 1.0 name or the most current name
235     * @return name of the group
236     */
237     public String   getGroupName(int index, int length, int choice)
238     {
239         if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME
240             || choice == UCharacterNameChoice.ISO_COMMENT_) {
241             if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) {
242                 // skip the modern name
243                 int oldindex = index;
244                 index += UCharacterUtility.skipByteSubString(m_groupstring_,
245                                                    index, length, (byte)';');
246                 length -= (index - oldindex);
247                 if (choice == UCharacterNameChoice.ISO_COMMENT_) {
248                     // skips the 1.0 Name to the iso comment part
249                     oldindex = index;
250                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
251                                                     index, length, (byte)';');
252                     length -= (index - oldindex);
253                 }
254             }
255             else {
256                 // the semicolon byte is a token number, therefore only modern
257                 // names are stored in unames.dat and there is no such
258                 // requested Unicode 1.0 name here
259                 length = 0;
260             }
261         }
262 
263         synchronized (m_utilStringBuffer_) {
264             m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
265             byte b;
266             char token;
267             for (int i = 0; i < length;) {
268                 b = m_groupstring_[index + i];
269                 i ++;
270 
271                 if (b >= m_tokentable_.length) {
272                     if (b == ';') {
273                         break;
274                     }
275                     m_utilStringBuffer_.append(b); // implicit letter
276                 }
277                 else {
278                     token = m_tokentable_[b & 0x00ff];
279                     if (token == 0xFFFE) {
280                         // this is a lead byte for a double-byte token
281                         token = m_tokentable_[b << 8 |
282                                           (m_groupstring_[index + i] & 0x00ff)];
283                         i ++;
284                     }
285                     if (token == 0xFFFF) {
286                         if (b == ';') {
287                             // skip the semicolon if we are seeking extended
288                             // names and there was no 2.0 name but there
289                             // is a 1.0 name.
290                             if (m_utilStringBuffer_.length() == 0 && choice ==
291                                    UCharacterNameChoice.EXTENDED_CHAR_NAME) {
292                                 continue;
293                             }
294                             break;
295                         }
296                         // explicit letter
297                         m_utilStringBuffer_.append((char)(b & 0x00ff));
298                     }
299                     else { // write token word
300                         UCharacterUtility.getNullTermByteSubString(
301                                 m_utilStringBuffer_, m_tokenstring_, token);
302                     }
303                 }
304             }
305 
306             if (m_utilStringBuffer_.length() > 0) {
307                 return m_utilStringBuffer_.toString();
308             }
309         }
310         return null;
311     }
312 
313     /**
314     * Retrieves the extended name
315     */
316     public String   getExtendedName(int ch)
317     {
318         String   result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
319         if (result == null) {
320             if (getType(ch) == UCharacterCategory.CONTROL) {
321                 result = getName(ch,
322                                  UCharacterNameChoice.UNICODE_10_CHAR_NAME);
323             }
324             if (result == null) {
325                 result = getExtendedOr10Name(ch);
326             }
327         }
328         return result;
329     }
330 
331     /**
332      * Gets the group index for the codepoint, or the group before it.
333      * @param codepoint
334      * @return group index containing codepoint or the group before it.
335      */
336     public int getGroup(int codepoint)
337     {
338         int endGroup = m_groupcount_;
339         int msb      = getCodepointMSB(codepoint);
340         int result   = 0;
341         // binary search for the group of names that contains the one for
342         // code
343         // find the group that contains codepoint, or the highest before it
344         while (result < endGroup - 1) {
345             int gindex = (result + endGroup) >> 1;
346             if (msb < getGroupMSB(gindex)) {
347                 endGroup = gindex;
348             }
349             else {
350                 result = gindex;
351             }
352         }
353         return result;
354     }
355 
356     /**
357      * Gets the extended and 1.0 name when the most current unicode names
358      * fail
359      * @param ch codepoint
360      * @return name of codepoint extended or 1.0
361      */
362     public String   getExtendedOr10Name(int ch)
363     {
364         String   result = null;
365         if (getType(ch) == UCharacterCategory.CONTROL) {
366             result = getName(ch,
367                              UCharacterNameChoice.UNICODE_10_CHAR_NAME);
368         }
369         if (result == null) {
370             int type = getType(ch);
371             // Return unknown if the table of names above is not up to
372             // date.
373             if (type >= TYPE_NAMES_.length) {
374                 result = UNKNOWN_TYPE_NAME_;
375             }
376             else {
377                 result = TYPE_NAMES_[type];
378             }
379             synchronized (m_utilStringBuffer_) {
380                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
381                 m_utilStringBuffer_.append('<');
382                 m_utilStringBuffer_.append(result);
383                 m_utilStringBuffer_.append('-');
384                 String   chStr = Integer.toHexString(ch).toUpperCase();
385                 int zeros = 4 - chStr.length();
386                 while (zeros > 0) {
387                     m_utilStringBuffer_.append('0');
388                     zeros --;
389                 }
390                 m_utilStringBuffer_.append(chStr);
391                 m_utilStringBuffer_.append('>');
392                 result = m_utilStringBuffer_.toString();
393             }
394         }
395         return result;
396     }
397 
398     /**
399      * Gets the MSB from the group index
400      * @param gindex group index
401      * @return the MSB of the group if gindex is valid, -1 otherwise
402      */
403     public int getGroupMSB(int gindex)
404     {
405         if (gindex >= m_groupcount_) {
406             return -1;
407         }
408         return m_groupinfo_[gindex * m_groupsize_];
409     }
410 
411     /**
412      * Gets the MSB of the codepoint
413      * @param codepoint
414      * @return the MSB of the codepoint
415      */
416     public static int getCodepointMSB(int codepoint)
417     {
418         return codepoint >> GROUP_SHIFT_;
419     }
420 
421     /**
422      * Gets the maximum codepoint + 1 of the group
423      * @param msb most significant byte of the group
424      * @return limit codepoint of the group
425      */
426     public static int getGroupLimit(int msb)
427     {
428         return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
429     }
430 
431     /**
432      * Gets the minimum codepoint of the group
433      * @param msb most significant byte of the group
434      * @return minimum codepoint of the group
435      */
436     public static int getGroupMin(int msb)
437     {
438         return msb << GROUP_SHIFT_;
439     }
440 
441     /**
442      * Gets the offset to a group
443      * @param codepoint
444      * @return offset to a group
445      */
446     public static int getGroupOffset(int codepoint)
447     {
448         return codepoint & GROUP_MASK_;
449     }
450 
451     /**
452      * Gets the minimum codepoint of a group
453      * @param codepoint
454      * @return minimum codepoint in the group which codepoint belongs to
455      */
456     ///CLOVER:OFF
457     public static int getGroupMinFromCodepoint(int codepoint)
458     {
459         return codepoint & ~GROUP_MASK_;
460     }
461     ///CLOVER:ON
462 
463     /**
464      * Get the Algorithm range length
465      * @return Algorithm range length
466      */
467     public int getAlgorithmLength()
468     {
469         return m_algorithm_.length;
470     }
471 
472     /**
473      * Gets the start of the range
474      * @param index algorithm index
475      * @return algorithm range start
476      */
477     public int getAlgorithmStart(int index)
478     {
479         return m_algorithm_[index].m_rangestart_;
480     }
481 
482     /**
483      * Gets the end of the range
484      * @param index algorithm index
485      * @return algorithm range end
486      */
487     public int getAlgorithmEnd(int index)
488     {
489         return m_algorithm_[index].m_rangeend_;
490     }
491 
492     /**
493      * Gets the Algorithmic name of the codepoint
494      * @param index algorithmic range index
495      * @param codepoint
496      * @return algorithmic name of codepoint
497      */
498     public String   getAlgorithmName(int index, int codepoint)
499     {
500         String   result = null;
501         synchronized (m_utilStringBuffer_) {
502             m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
503             m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
504             result = m_utilStringBuffer_.toString();
505         }
506         return result;
507     }
508 
509     /**
510     * Gets the group name of the character
511     * @param ch character to get the group name
512     * @param choice name choice selector to choose a unicode 1.0 or newer name
513     */
514     public String   getGroupName(int ch, int choice)
515     {
516         // gets the msb
517         int msb   = getCodepointMSB(ch);
518         int group = getGroup(ch);
519 
520         // return this if it is an exact match
521         if (msb == m_groupinfo_[group * m_groupsize_]) {
522             int index = getGroupLengths(group, m_groupoffsets_,
523                                         m_grouplengths_);
524             int offset = ch & GROUP_MASK_;
525             return getGroupName(index + m_groupoffsets_[offset],
526                                 m_grouplengths_[offset], choice);
527         }
528 
529         return null;
530     }
531 
532     // these are transliterator use methods ---------------------------------
533 
534     /**
535      * Gets the maximum length of any codepoint name.
536      * Equivalent to uprv_getMaxCharNameLength.
537      * @return the maximum length of any codepoint name
538      */
539     public int getMaxCharNameLength()
540     {
541         if (initNameSetsLengths()) {
542             return m_maxNameLength_;
543         }
544         else {
545             return 0;
546         }
547     }
548 
549     /**
550      * Gets the maximum length of any iso comments.
551      * Equivalent to uprv_getMaxISOCommentLength.
552      * @return the maximum length of any codepoint name
553      */
554     ///CLOVER:OFF
555     public int getMaxISOCommentLength()
556     {
557         if (initNameSetsLengths()) {
558             return m_maxISOCommentLength_;
559         }
560         else {
561             return 0;
562         }
563     }
564     ///CLOVER:ON
565 
566     /**
567      * Fills set with characters that are used in Unicode character names.
568      * Equivalent to uprv_getCharNameCharacters.
569      * @param set USet to receive characters. Existing contents are deleted.
570      */
571     public void getCharNameCharacters(UnicodeSet set)
572     {
573         convert(m_nameSet_, set);
574     }
575 
576     /**
577      * Fills set with characters that are used in Unicode character names.
578      * Equivalent to uprv_getISOCommentCharacters.
579      * @param set USet to receive characters. Existing contents are deleted.
580      */
581     ///CLOVER:OFF
582     public void getISOCommentCharacters(UnicodeSet set)
583     {
584         convert(m_ISOCommentSet_, set);
585     }
586     ///CLOVER:ON
587 
588     // package private inner class --------------------------------------
589 
590     /**
591     * Algorithmic name class
592     */
593     static final class AlgorithmName
594     {
595         // package private data members ----------------------------------
596 
597         /**
598         * Constant type value of the different AlgorithmName
599         */
600         static final int TYPE_0_ = 0;
601         static final int TYPE_1_ = 1;
602 
603         // package private constructors ----------------------------------
604 
605         /**
606         * Constructor
607         */
608         AlgorithmName()
609         {
610         }
611 
612         // package private methods ---------------------------------------
613 
614         /**
615         * Sets the information for accessing the algorithmic names
616         * @param rangestart starting code point that lies within this name group
617         * @param rangeend end code point that lies within this name group
618         * @param type algorithm type. There's 2 kinds of algorithmic type. First
619         *        which uses code point as part of its name and the other uses
620         *        variant postfix strings
621         * @param variant algorithmic variant
622         * @return true if values are valid
623         */
624         boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
625         {
626             if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
627                 && rangeend <= UCharacter.MAX_VALUE &&
628                 (type == TYPE_0_ || type == TYPE_1_)) {
629                 m_rangestart_ = rangestart;
630                 m_rangeend_ = rangeend;
631                 m_type_ = type;
632                 m_variant_ = variant;
633                 return true;
634             }
635             return false;
636         }
637 
638         /**
639         * Sets the factor data
640         * @param factor Array of factor
641         * @return true if factors are valid
642         */
643         boolean setFactor(char factor[])
644         {
645             if (factor.length == m_variant_) {
646                 m_factor_ = factor;
647                 return true;
648             }
649             return false;
650         }
651 
652         /**
653         * Sets the name prefix
654         * @param prefix
655         * @return true if prefix is set
656         */
657         boolean setPrefix(String   prefix)
658         {
659             if (prefix != null && prefix.length() > 0) {
660                 m_prefix_ = prefix;
661                 return true;
662             }
663             return false;
664         }
665 
666         /**
667         * Sets the variant factorized name data
668         * @param string variant factorized name data
669         * @return true if values are set
670         */
671         boolean setFactorString(byte string[])
672         {
673             // factor and variant string can be empty for things like
674             // hanggul code points
675             m_factorstring_ = string;
676             return true;
677         }
678 
679         /**
680         * Checks if code point lies in Algorithm object at index
681         * @param ch code point
682         */
683         boolean contains(int ch)
684         {
685             return m_rangestart_ <= ch && ch <= m_rangeend_;
686         }
687 
688         /**
689         * Appends algorithm name of code point into StringBuffer.
690         * Note this method does not check for validity of code point in Algorithm,
691         * result is undefined if code point does not belong in Algorithm.
692         * @param ch code point
693         * @param str StringBuffer to append to
694         */
695         void appendName(int ch, StringBuffer   str)
696         {
697             str.append(m_prefix_);
698             switch (m_type_)
699             {
700                 case TYPE_0_:
701                     // prefix followed by hex digits indicating variants
702                     Utility.hex(ch, m_variant_, str);
703                     break;
704                 case TYPE_1_:
705                     // prefix followed by factorized-elements
706                     int offset = ch - m_rangestart_;
707                     int indexes[] = m_utilIntBuffer_;
708                     int factor;
709 
710                     // write elements according to the factors
711                     // the factorized elements are determined by modulo
712                     // arithmetic
713                     synchronized (m_utilIntBuffer_) {
714                         for (int i = m_variant_ - 1; i > 0; i --)
715                         {
716                             factor = m_factor_[i] & 0x00FF;
717                             indexes[i] = offset % factor;
718                             offset /= factor;
719                         }
720 
721                         // we don't need to calculate the last modulus because
722                         // start <= code <= end guarantees here that
723                         // code <= factors[0]
724                         indexes[0] = offset;
725 
726                         // joining up the factorized strings
727                         str.append(getFactorString(indexes, m_variant_));
728                     }
729                     break;
730             }
731         }
732 
733         /**
734         * Gets the character for the argument algorithmic name
735         * @return the algorithmic char or -1 otherwise.
736         */
737         int getChar(String   name)
738         {
739             int prefixlen = m_prefix_.length();
740             if (name.length() < prefixlen ||
741                 !m_prefix_.equals(name.substring(0, prefixlen))) {
742                 return -1;
743             }
744 
745             switch (m_type_)
746             {
747                 case TYPE_0_ :
748                 try
749                 {
750                     int result = Integer.parseInt(name.substring(prefixlen),
751                                                   16);
752                     // does it fit into the range?
753                     if (m_rangestart_ <= result && result <= m_rangeend_) {
754                         return result;
755                     }
756                 }
757                 catch (NumberFormatException   e)
758                 {
759                     return -1;
760                 }
761                 break;
762                 case TYPE_1_ :
763                     // repetitative suffix name comparison done here
764                     // offset is the character code - start
765                     for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
766                     {
767                         int offset = ch - m_rangestart_;
768                         int indexes[] = m_utilIntBuffer_;
769                         int factor;
770 
771                         // write elements according to the factors
772                         // the factorized elements are determined by modulo
773                         // arithmetic
774                         synchronized (m_utilIntBuffer_) {
775                             for (int i = m_variant_ - 1; i > 0; i --)
776                             {
777                                 factor = m_factor_[i] & 0x00FF;
778                                 indexes[i] = offset % factor;
779                                 offset /= factor;
780                             }
781 
782                             // we don't need to calculate the last modulus
783                             // because start <= code <= end guarantees here that
784                             // code <= factors[0]
785                             indexes[0] = offset;
786 
787                             // joining up the factorized strings
788                             if (compareFactorString(indexes, m_variant_, name,
789                                                     prefixlen)) {
790                                 return ch;
791                             }
792                         }
793                     }
794             }
795 
796             return -1;
797         }
798 
799         /**
800          * Adds all chars in the set of algorithmic names into the set.
801          * Equivalent to part of calcAlgNameSetsLengths.
802          * @param set int set to add the chars of the algorithm names into
803          * @param maxlength maximum length to compare to
804          * @return the length that is either maxlength of the length of this
805          *         algorithm name if it is longer than maxlength
806          */
807         int add(int set[], int maxlength)
808         {
809             // prefix length
810             int length = UCharacterName.add(set, m_prefix_);
811             switch (m_type_) {
812                 case TYPE_0_ : {
813                     // name = prefix + (range->variant times) hex-digits
814                     // prefix
815                     length += m_variant_;
816                     /* synwee to check
817                      * addString(set, (const char *)(range + 1))
818                                        + range->variant;*/
819                     break;
820                 }
821                 case TYPE_1_ : {
822                     // name = prefix factorized-elements
823                     // get the set and maximum factor suffix length for each
824                     // factor
825                     for (int i = m_variant_ - 1; i > 0; i --)
826                     {
827                         int maxfactorlength = 0;
828                         int count = 0;
829                         for (int factor = m_factor_[i]; factor > 0; -- factor) {
830                             synchronized (m_utilStringBuffer_) {
831                                 m_utilStringBuffer_.delete(0,
832                                                 m_utilStringBuffer_.length());
833                                 count
834                                   = UCharacterUtility.getNullTermByteSubString(
835                                                 m_utilStringBuffer_,
836                                                 m_factorstring_, count);
837                                 UCharacterName.add(set, m_utilStringBuffer_);
838                                 if (m_utilStringBuffer_.length()
839                                                             > maxfactorlength)
840                                 {
841                                     maxfactorlength
842                                                 = m_utilStringBuffer_.length();
843                                 }
844                             }
845                         }
846                         length += maxfactorlength;
847                     }
848                 }
849             }
850             if (length > maxlength) {
851                 return length;
852             }
853             return maxlength;
854         }
855 
856         // private data members ------------------------------------------
857 
858         /**
859         * Algorithmic data information
860         */
861         private int m_rangestart_;
862         private int m_rangeend_;
863         private byte m_type_;
864         private byte m_variant_;
865         private char m_factor_[];
866         private String   m_prefix_;
867         private byte m_factorstring_[];
868         /**
869          * Utility StringBuffer
870          */
871         private StringBuffer   m_utilStringBuffer_ = new StringBuffer  ();
872         /**
873          * Utility int buffer
874          */
875         private int m_utilIntBuffer_[] = new int[256];
876 
877         // private methods -----------------------------------------------
878 
879         /**
880         * Gets the indexth string in each of the argument factor block
881         * @param index array with each index corresponding to each factor block
882         * @param length length of the array index
883         * @return the combined string of the array of indexth factor string in
884         *         factor block
885         */
886         private String   getFactorString(int index[], int length)
887         {
888             int size = m_factor_.length;
889             if (index == null || length != size) {
890                 return null;
891             }
892 
893             synchronized (m_utilStringBuffer_) {
894                 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
895                 int count = 0;
896                 int factor;
897                 size --;
898                 for (int i = 0; i <= size; i ++) {
899                     factor = m_factor_[i];
900                     count = UCharacterUtility.skipNullTermByteSubString(
901                                              m_factorstring_, count, index[i]);
902                     count = UCharacterUtility.getNullTermByteSubString(
903                                           m_utilStringBuffer_, m_factorstring_,
904                                           count);
905                     if (i != size) {
906                         count = UCharacterUtility.skipNullTermByteSubString(
907                                                        m_factorstring_, count,
908                                                        factor - index[i] - 1);
909                     }
910                 }
911                 return m_utilStringBuffer_.toString();
912             }
913         }
914 
915         /**
916         * Compares the indexth string in each of the argument factor block with
917         * the argument string
918         * @param index array with each index corresponding to each factor block
919         * @param length index array length
920         * @param str string to compare with
921         * @param offset of str to start comparison
922         * @return true if string matches
923         */
924         private boolean compareFactorString(int index[], int length, String   str,
925                                             int offset)
926         {
927             int size = m_factor_.length;
928             if (index == null || length != size)
929                 return false;
930 
931             int count = 0;
932             int strcount = offset;
933             int factor;
934             size --;
935             for (int i = 0; i <= size; i ++)
936             {
937                 factor = m_factor_[i];
938                 count = UCharacterUtility.skipNullTermByteSubString(
939                                           m_factorstring_, count, index[i]);
940                 strcount = UCharacterUtility.compareNullTermByteSubString(str,
941                                           m_factorstring_, strcount, count);
942                 if (strcount < 0) {
943                     return false;
944                 }
945 
946                 if (i != size) {
947                     count = UCharacterUtility.skipNullTermByteSubString(
948                                   m_factorstring_, count, factor - index[i]);
949                 }
950             }
951             if (strcount != str.length()) {
952                 return false;
953             }
954             return true;
955         }
956     }
957 
958     // package private data members --------------------------------------
959 
960     /**
961      * Size of each groups
962      */
963     int m_groupsize_ = 0;
964 
965     // package private methods --------------------------------------------
966 
967     /**
968     * Sets the token data
969     * @param token array of tokens
970     * @param tokenstring array of string values of the tokens
971     * @return false if there is a data error
972     */
973     boolean setToken(char token[], byte tokenstring[])
974     {
975         if (token != null && tokenstring != null && token.length > 0 &&
976             tokenstring.length > 0) {
977             m_tokentable_ = token;
978             m_tokenstring_ = tokenstring;
979             return true;
980         }
981         return false;
982     }
983 
984     /**
985     * Set the algorithm name information array
986     * @param alg Algorithm information array
987     * @return true if the group string offset has been set correctly
988     */
989     boolean setAlgorithm(AlgorithmName alg[])
990     {
991         if (alg != null && alg.length != 0) {
992             m_algorithm_ = alg;
993             return true;
994         }
995         return false;
996     }
997 
998     /**
999     * Sets the number of group and size of each group in number of char
1000    * @param count number of groups
1001    * @param size size of group in char
1002    * @return true if group size is set correctly
1003    */
1004    boolean setGroupCountSize(int count, int size)
1005    {
1006        if (count <= 0 || size <= 0) {
1007            return false;
1008        }
1009        m_groupcount_ = count;
1010        m_groupsize_ = size;
1011        return true;
1012    }
1013
1014    /**
1015    * Sets the group name data
1016    * @param group index information array
1017    * @param groupstring name information array
1018    * @return false if there is a data error
1019    */
1020    boolean setGroup(char group[], byte groupstring[])
1021    {
1022        if (group != null && groupstring != null && group.length > 0 &&
1023            groupstring.length > 0) {
1024            m_groupinfo_ = group;
1025            m_groupstring_ = groupstring;
1026            return true;
1027        }
1028        return false;
1029    }
1030
1031    // private data members ----------------------------------------------
1032
1033    /**
1034    * Data used in unames.icu
1035    */
1036    private char m_tokentable_[];
1037    private byte m_tokenstring_[];
1038    private char m_groupinfo_[];
1039    private byte m_groupstring_[];
1040    private AlgorithmName m_algorithm_[];
1041
1042    /**
1043    * Group use
1044    */
1045    private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
1046    private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
1047
1048    /**
1049    * Default name of the name datafile
1050    */
1051    private static final String   NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/unames.icu";
1052    /**
1053    * Shift count to retrieve group information
1054    */
1055    private static final int GROUP_SHIFT_ = 5;
1056    /**
1057    * Mask to retrieve the offset for a particular character within a group
1058    */
1059    private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
1060    /**
1061    * Default buffer size of datafile
1062    */
1063    private static final int NAME_BUFFER_SIZE_ = 100000;
1064
1065    /**
1066    * Position of offsethigh in group information array
1067    */
1068    private static final int OFFSET_HIGH_OFFSET_ = 1;
1069
1070    /**
1071    * Position of offsetlow in group information array
1072    */
1073    private static final int OFFSET_LOW_OFFSET_ = 2;
1074    /**
1075    * Double nibble indicator, any nibble > this number has to be combined
1076    * with its following nibble
1077    */
1078    private static final int SINGLE_NIBBLE_MAX_ = 11;
1079
1080    /*
1081     * Maximum length of character names (regular & 1.0).
1082     */
1083    //private static int MAX_NAME_LENGTH_ = 0;
1084    /*
1085     * Maximum length of ISO comments.
1086     */
1087    //private static int MAX_ISO_COMMENT_LENGTH_ = 0;
1088
1089    /**
1090     * Set of chars used in character names (regular & 1.0).
1091     * Chars are platform-dependent (can be EBCDIC).
1092     */
1093    private int m_nameSet_[] = new int[8];
1094    /**
1095     * Set of chars used in ISO comments. (regular & 1.0).
1096     * Chars are platform-dependent (can be EBCDIC).
1097     */
1098    private int m_ISOCommentSet_[] = new int[8];
1099    /**
1100     * Utility StringBuffer
1101     */
1102    private StringBuffer   m_utilStringBuffer_ = new StringBuffer  ();
1103    /**
1104     * Utility int buffer
1105     */
1106    private int m_utilIntBuffer_[] = new int[2];
1107    /**
1108     * Maximum ISO comment length
1109     */
1110    private int m_maxISOCommentLength_;
1111    /**
1112     * Maximum name length
1113     */
1114    private int m_maxNameLength_;
1115    /**
1116     * Singleton instance
1117     */
1118    private static UCharacterName INSTANCE_ = null;
1119    /**
1120     * Type names used for extended names
1121     */
1122    private static final String   TYPE_NAMES_[] = {"unassigned",
1123                                                 "uppercase letter",
1124                                                 "lowercase letter",
1125                                                 "titlecase letter",
1126                                                 "modifier letter",
1127                                                 "other letter",
1128                                                 "non spacing mark",
1129                                                 "enclosing mark",
1130                                                 "combining spacing mark",
1131                                                 "decimal digit number",
1132                                                 "letter number",
1133                                                 "other number",
1134                                                 "space separator",
1135                                                 "line separator",
1136                                                 "paragraph separator",
1137                                                 "control",
1138                                                 "format",
1139                                                 "private use area",
1140                                                 "surrogate",
1141                                                 "dash punctuation",
1142                                                 "start punctuation",
1143                                                 "end punctuation",
1144                                                 "connector punctuation",
1145                                                 "other punctuation",
1146                                                 "math symbol",
1147                                                 "currency symbol",
1148                                                 "modifier symbol",
1149                                                 "other symbol",
1150                                                 "initial punctuation",
1151                                                 "final punctuation",
1152                                                 "noncharacter",
1153                                                 "lead surrogate",
1154                                                 "trail surrogate"};
1155    /**
1156     * Unknown type name
1157     */
1158    private static final String   UNKNOWN_TYPE_NAME_ = "unknown";
1159    /**
1160     * Not a character type
1161     */
1162    private static final int NON_CHARACTER_
1163                                    = UCharacterCategory.CHAR_CATEGORY_COUNT;
1164    /**
1165    * Lead surrogate type
1166    */
1167    private static final int LEAD_SURROGATE_
1168                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
1169    /**
1170    * Trail surrogate type
1171    */
1172    private static final int TRAIL_SURROGATE_
1173                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
1174    /**
1175    * Extended category count
1176    */
1177    static final int EXTENDED_CATEGORY_
1178                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
1179
1180    // private constructor ------------------------------------------------
1181
1182    /**
1183    * <p>Protected constructor for use in UCharacter.</p>
1184    * @exception IOException thrown when data reading fails
1185    */
1186    private UCharacterName() throws IOException  
1187    {
1188        InputStream   is = ICUData.getRequiredStream(NAME_FILE_NAME_);
1189        BufferedInputStream   b = new BufferedInputStream  (is, NAME_BUFFER_SIZE_);
1190        UCharacterNameReader reader = new UCharacterNameReader(b);
1191        reader.read(this);
1192        b.close();
1193    }
1194
1195    // private methods ---------------------------------------------------
1196
1197    /**
1198    * Gets the algorithmic name for the argument character
1199    * @param ch character to determine name for
1200    * @param choice name choice
1201    * @return the algorithmic name or null if not found
1202    */
1203    private String   getAlgName(int ch, int choice)
1204    {
1205        // Do not write algorithmic Unicode 1.0 names because Unihan names are
1206        // the same as the modern ones, extension A was only introduced with
1207        // Unicode 3.0, and the Hangul syllable block was moved and changed
1208        // around Unicode 1.1.5.
1209        if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
1210            // index in terms integer index
1211            synchronized (m_utilStringBuffer_) {
1212                m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
1213
1214                for (int index = m_algorithm_.length - 1; index >= 0; index --)
1215                {
1216                   if (m_algorithm_[index].contains(ch)) {
1217                      m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
1218                      return m_utilStringBuffer_.toString();
1219                   }
1220                }
1221            }
1222        }
1223        return null;
1224    }
1225
1226    /**
1227    * Getting the character with the tokenized argument name
1228    * @param name of the character
1229    * @return character with the tokenized argument name or -1 if character
1230    *         is not found
1231    */
1232    private synchronized int getGroupChar(String   name, int choice)
1233    {
1234        for (int i = 0; i < m_groupcount_; i ++) {
1235            // populating the data set of grouptable
1236
1237            int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
1238                                                  m_grouplengths_);
1239
1240            // shift out to function
1241            int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
1242                                      choice);
1243            if (result != -1) {
1244                return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
1245                         | result;
1246            }
1247        }
1248        return -1;
1249    }
1250
1251    /**
1252    * Compares and retrieve character if name is found within the argument
1253    * group
1254    * @param index index where the set of names reside in the group block
1255    * @param length list of lengths of the strings
1256    * @param name character name to search for
1257    * @param choice of either 1.0 or the most current unicode name
1258    * @return relative character in the group which matches name, otherwise if
1259    *         not found, -1 will be returned
1260    */
1261    private int getGroupChar(int index, char length[], String   name,
1262                             int choice)
1263    {
1264        byte b = 0;
1265        char token;
1266        int len;
1267        int namelen = name.length();
1268        int nindex;
1269        int count;
1270
1271        for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
1272            nindex = 0;
1273            len = length[result];
1274
1275            if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
1276                int oldindex = index;
1277                index += UCharacterUtility.skipByteSubString(m_groupstring_,
1278                                                     index, len, (byte)';');
1279                len -= (index - oldindex);
1280            }
1281
1282            // number of tokens is > the length of the name
1283            // write each letter directly, and write a token word per token
1284            for (count = 0; count < len && nindex != -1 && nindex < namelen;
1285                ) {
1286                b = m_groupstring_[index + count];
1287                count ++;
1288
1289                if (b >= m_tokentable_.length) {
1290                    if (name.charAt(nindex ++) != (b & 0xFF)) {
1291                        nindex = -1;
1292                    }
1293                }
1294                else {
1295                    token = m_tokentable_[b & 0xFF];
1296                    if (token == 0xFFFE) {
1297                        // this is a lead byte for a double-byte token
1298                        token = m_tokentable_[b << 8 |
1299                                   (m_groupstring_[index + count] & 0x00ff)];
1300                        count ++;
1301                    }
1302                    if (token == 0xFFFF) {
1303                        if (name.charAt(nindex ++) != (b & 0xFF)) {
1304                            nindex = -1;
1305                        }
1306                    }
1307                    else {
1308                        // compare token with name
1309                        nindex = UCharacterUtility.compareNullTermByteSubString(
1310                                        name, m_tokenstring_, nindex, token);
1311                    }
1312                }
1313            }
1314
1315            if (namelen == nindex &&
1316                (count == len || m_groupstring_[index + count] == ';')) {
1317                return result;
1318            }
1319
1320            index += len;
1321        }
1322        return -1;
1323    }
1324
1325    /**
1326    * Gets the character extended type
1327    * @param ch character to be tested
1328    * @return extended type it is associated with
1329    */
1330    private static int getType(int ch)
1331    {
1332        if (UCharacterUtility.isNonCharacter(ch)) {
1333            // not a character we return a invalid category count
1334            return NON_CHARACTER_;
1335        }
1336        int result = UCharacter.getType(ch);
1337        if (result == UCharacterCategory.SURROGATE) {
1338            if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
1339                result = LEAD_SURROGATE_;
1340            }
1341            else {
1342                result = TRAIL_SURROGATE_;
1343            }
1344        }
1345        return result;
1346    }
1347
1348    /**
1349    * Getting the character with extended name of the form <....>.
1350    * @param name of the character to be found
1351    * @param choice name choice
1352    * @return character associated with the name, -1 if such character is not
1353    *                   found and -2 if we should continue with the search.
1354    */
1355    private static int getExtendedChar(String   name, int choice)
1356    {
1357        if (name.charAt(0) == '<') {
1358            if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
1359                int endIndex = name.length() - 1;
1360                if (name.charAt(endIndex) == '>') {
1361                    int startIndex = name.lastIndexOf('-');
1362                    if (startIndex >= 0) { // We've got a category.
1363                        startIndex ++;
1364                        int result = -1;
1365                        try {
1366                            result = Integer.parseInt(
1367                                        name.substring(startIndex, endIndex),
1368                                        16);
1369                        }
1370                        catch (NumberFormatException   e) {
1371                            return -1;
1372                        }
1373                        // Now validate the category name. We could use a
1374                        // binary search, or a trie, if we really wanted to.
1375                        String   type = name.substring(1, startIndex - 1);
1376                        int length = TYPE_NAMES_.length;
1377                        for (int i = 0; i < length; ++ i) {
1378                            if (type.compareTo(TYPE_NAMES_[i]) == 0) {
1379                                if (getType(result) == i) {
1380                                    return result;
1381                                }
1382                                break;
1383                            }
1384                        }
1385                    }
1386                }
1387            }
1388            return -1;
1389        }
1390        return -2;
1391    }
1392
1393    // sets of name characters, maximum name lengths -----------------------
1394
1395    /**
1396     * Adds a codepoint into a set of ints.
1397     * Equivalent to SET_ADD.
1398     * @param set set to add to
1399     * @param ch 16 bit char to add
1400     */
1401    private static void add(int set[], char ch)
1402    {
1403        set[ch >>> 5] |= 1 << (ch & 0x1f);
1404    }
1405
1406    /**
1407     * Checks if a codepoint is a part of a set of ints.
1408     * Equivalent to SET_CONTAINS.
1409     * @param set set to check in
1410     * @param ch 16 bit char to check
1411     * @return true if codepoint is part of the set, false otherwise
1412     */
1413    private static boolean contains(int set[], char ch)
1414    {
1415        return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0;
1416    }
1417
1418    /**
1419     * Adds all characters of the argument str and gets the length
1420     * Equivalent to calcStringSetLength.
1421     * @param set set to add all chars of str to
1422     * @param str string to add
1423     */
1424    private static int add(int set[], String   str)
1425    {
1426        int result = str.length();
1427
1428        for (int i = result - 1; i >= 0; i --) {
1429            add(set, str.charAt(i));
1430        }
1431        return result;
1432    }
1433
1434    /**
1435     * Adds all characters of the argument str and gets the length
1436     * Equivalent to calcStringSetLength.
1437     * @param set set to add all chars of str to
1438     * @param str string to add
1439     */
1440    private static int add(int set[], StringBuffer   str)
1441    {
1442        int result = str.length();
1443
1444        for (int i = result - 1; i >= 0; i --) {
1445            add(set, str.charAt(i));
1446        }
1447        return result;
1448    }
1449
1450    /**
1451     * Adds all algorithmic names into the name set.
1452     * Equivalent to part of calcAlgNameSetsLengths.
1453     * @param maxlength length to compare to
1454     * @return the maximum length of any possible algorithmic name if it is >
1455     *         maxlength, otherwise maxlength is returned.
1456     */
1457    private int addAlgorithmName(int maxlength)
1458    {
1459        int result = 0;
1460        for (int i = m_algorithm_.length - 1; i >= 0; i --) {
1461            result = m_algorithm_[i].add(m_nameSet_, maxlength);
1462            if (result > maxlength) {
1463                maxlength = result;
1464            }
1465        }
1466        return maxlength;
1467    }
1468
1469    /**
1470     * Adds all extended names into the name set.
1471     * Equivalent to part of calcExtNameSetsLengths.
1472     * @param maxlength length to compare to
1473     * @return the maxlength of any possible extended name.
1474     */
1475    private int addExtendedName(int maxlength)
1476    {
1477        for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) {
1478            // for each category, count the length of the category name
1479            // plus 9 =
1480            // 2 for <>
1481            // 1 for -
1482            // 6 for most hex digits per code point
1483            int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
1484            if (length > maxlength) {
1485                maxlength = length;
1486            }
1487        }
1488        return maxlength;
1489    }
1490
1491    /**
1492     * Adds names of a group to the argument set.
1493     * Equivalent to calcNameSetLength.
1494     * @param offset of the group name string in byte count
1495     * @param length of the group name string
1496     * @param tokenlength array to store the length of each token
1497     * @param set to add to
1498     * @return the length of the name string and the length of the group
1499     *         string parsed
1500     */
1501    private int[] addGroupName(int offset, int length, byte tokenlength[],
1502                               int set[])
1503    {
1504        int resultnlength = 0;
1505        int resultplength = 0;
1506        while (resultplength < length) {
1507            char b = (char)(m_groupstring_[offset + resultplength] & 0xff);
1508            resultplength ++;
1509            if (b == ';') {
1510                break;
1511            }
1512
1513            if (b >= m_tokentable_.length) {
1514                add(set, b); // implicit letter
1515                resultnlength ++;
1516            }
1517            else {
1518                char token = m_tokentable_[b & 0x00ff];
1519                if (token == 0xFFFE) {
1520                    // this is a lead byte for a double-byte token
1521                    b = (char)(b << 8 | (m_groupstring_[offset + resultplength]
1522                                         & 0x00ff));
1523                    token = m_tokentable_[b];
1524                    resultplength ++;
1525                }
1526                if (token == 0xFFFF) {
1527                    add(set, b);
1528                    resultnlength ++;
1529                }
1530                else {
1531                    // count token word
1532                    // use cached token length
1533                    byte tlength = tokenlength[b];
1534                    if (tlength == 0) {
1535                        synchronized (m_utilStringBuffer_) {
1536                            m_utilStringBuffer_.delete(0,
1537                                                 m_utilStringBuffer_.length());
1538                            UCharacterUtility.getNullTermByteSubString(
1539                                           m_utilStringBuffer_, m_tokenstring_,
1540                                           token);
1541                            tlength = (byte)add(set, m_utilStringBuffer_);
1542                        }
1543                        tokenlength[b] = tlength;
1544                    }
1545                    resultnlength += tlength;
1546                }
1547            }
1548        }
1549        m_utilIntBuffer_[0] = resultnlength;
1550        m_utilIntBuffer_[1] = resultplength;
1551        return m_utilIntBuffer_;
1552    }
1553
1554    /**
1555     * Adds names of all group to the argument set.
1556     * Sets the data member m_max*Length_.
1557     * Method called only once.
1558     * Equivalent to calcGroupNameSetsLength.
1559     * @param maxlength length to compare to
1560     */
1561    private void addGroupName(int maxlength)
1562    {
1563        int maxisolength = 0;
1564        char offsets[] = new char[LINES_PER_GROUP_ + 2];
1565        char lengths[] = new char[LINES_PER_GROUP_ + 2];
1566        byte tokenlengths[] = new byte[m_tokentable_.length];
1567
1568        // enumerate all groups
1569        // for (int i = m_groupcount_ - 1; i >= 0; i --) {
1570        for (int i = 0; i < m_groupcount_ ; i ++) {
1571            int offset = getGroupLengths(i, offsets, lengths);
1572            // enumerate all lines in each group
1573            // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
1574            //    linenumber --) {
1575            for (int linenumber = 0; linenumber < LINES_PER_GROUP_;
1576                linenumber ++) {
1577                int lineoffset = offset + offsets[linenumber];
1578                int length = lengths[linenumber];
1579                if (length == 0) {
1580                    continue;
1581                }
1582
1583                // read regular name
1584                int parsed[] = addGroupName(lineoffset, length, tokenlengths,
1585                                            m_nameSet_);
1586                if (parsed[0] > maxlength) {
1587                    // 0 for name length
1588                    maxlength = parsed[0];
1589                }
1590                lineoffset += parsed[1];
1591                if (parsed[1] >= length) {
1592                    // 1 for parsed group string length
1593                    continue;
1594                }
1595                length -= parsed[1];
1596                // read Unicode 1.0 name
1597                parsed = addGroupName(lineoffset, length, tokenlengths,
1598                                      m_nameSet_);
1599                if (parsed[0] > maxlength) {
1600                    // 0 for name length
1601                    maxlength = parsed[0];
1602                }
1603                lineoffset += parsed[1];
1604                if (parsed[1] >= length) {
1605                    // 1 for parsed group string length
1606                    continue;
1607                }
1608                length -= parsed[1];
1609                // read ISO comment
1610                parsed = addGroupName(lineoffset, length, tokenlengths,
1611                                      m_ISOCommentSet_);
1612                if (parsed[1] > maxisolength) {
1613                    maxisolength = length;
1614                }
1615            }
1616        }
1617
1618        // set gMax... - name length last for threading
1619        m_maxISOCommentLength_ = maxisolength;
1620        m_maxNameLength_ = maxlength;
1621    }
1622
1623    /**
1624     * Sets up the name sets and the calculation of the maximum lengths.
1625     * Equivalent to calcNameSetsLengths.
1626     */
1627    private boolean initNameSetsLengths()
1628    {
1629        if (m_maxNameLength_ > 0) {
1630            return true;
1631        }
1632
1633        String   extra = "0123456789ABCDEF<>-";
1634        // set hex digits, used in various names, and <>-, used in extended
1635        // names
1636        for (int i = extra.length() - 1; i >= 0; i --) {
1637            add(m_nameSet_, extra.charAt(i));
1638        }
1639
1640        // set sets and lengths from algorithmic names
1641        m_maxNameLength_ = addAlgorithmName(0);
1642        // set sets and lengths from extended names
1643        m_maxNameLength_ = addExtendedName(m_maxNameLength_);
1644        // set sets and lengths from group names, set global maximum values
1645        addGroupName(m_maxNameLength_);
1646        return true;
1647    }
1648
1649    /**
1650     * Converts the char set cset into a Unicode set uset.
1651     * Equivalent to charSetToUSet.
1652     * @param set Set of 256 bit flags corresponding to a set of chars.
1653     * @param uset USet to receive characters. Existing contents are deleted.
1654     */
1655    private void convert(int set[], UnicodeSet uset)
1656    {
1657        uset.clear();
1658        if (!initNameSetsLengths()) {
1659            return;
1660        }
1661
1662        // build a char string with all chars that are used in character names
1663        for (char c = 255; c > 0; c --) {
1664            if (contains(set, c)) {
1665                uset.add(c);
1666            }
1667        }
1668    }
1669}
1670
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags