UCharacter


1   //##header 1189099963000 FOUNDATION
2   /**
3   *******************************************************************************
4   * Copyright (C) 1996-2007, International Business Machines Corporation and    *
5   * others. All Rights Reserved.                                                *
6   *******************************************************************************
7   */
8   
9   package com.ibm.icu.lang;
10  
11  import java.io.IOException  ;
12  import java.lang.ref.SoftReference  ;
13  import java.util.HashMap  ;
14  import java.util.Locale  ;
15  import java.util.Map  ;
16  import java.util.MissingResourceException  ;
17  
18  import com.ibm.icu.impl.UBiDiProps;
19  import com.ibm.icu.impl.UCaseProps;
20  import com.ibm.icu.impl.NormalizerImpl;
21  import com.ibm.icu.impl.UCharacterUtility;
22  import com.ibm.icu.impl.UCharacterName;
23  import com.ibm.icu.impl.UCharacterNameChoice;
24  import com.ibm.icu.impl.UPropertyAliases;
25  import com.ibm.icu.lang.UCharacterEnums.*;
26  import com.ibm.icu.text.BreakIterator;
27  import com.ibm.icu.text.UTF16;
28  import com.ibm.icu.impl.UCharacterProperty;
29  import com.ibm.icu.util.RangeValueIterator;
30  import com.ibm.icu.util.ULocale;
31  import com.ibm.icu.util.ValueIterator;
32  import com.ibm.icu.util.VersionInfo;
33  
34  /**
35   * <p>
36   * The UCharacter class provides extensions to the 
37   * <a HREF="http://java.sun.com/j2se/1.5/docs/api/java/lang/Character.html">
38   * java.lang.Character</a> class. These extensions provide support for 
39   * more Unicode properties and together with the <a HREF=../text/UTF16.html>UTF16</a> 
40   * class, provide support for supplementary characters (those with code 
41   * points above U+FFFF).
42   * Each ICU release supports the latest version of Unicode available at that time.
43   * </p>
44   * <p>
45   * Code points are represented in these API using ints. While it would be 
46   * more convenient in Java to have a separate primitive datatype for them, 
47   * ints suffice in the meantime.
48   * </p>
49   * <p>
50   * To use this class please add the jar file name icu4j.jar to the 
51   * class path, since it contains data files which supply the information used 
52   * by this file.<br>
53   * E.g. In Windows <br>
54   * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
55   * Otherwise, another method would be to copy the files uprops.dat and 
56   * unames.icu from the icu4j source subdirectory
57   * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 
58   * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
59   * </p>
60   * <p>
61   * Aside from the additions for UTF-16 support, and the updated Unicode
62   * properties, the main differences between UCharacter and Character are:
63   * <ul>
64   * <li> UCharacter is not designed to be a char wrapper and does not have 
65   *      APIs to which involves management of that single char.<br>
66   *      These include: 
67   *      <ul>
68   *        <li> char charValue(), 
69   *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
70   *      </ul>
71   * <li> UCharacter does not include Character APIs that are deprecated, nor
72   *      does it include the Java-specific character information, such as 
73   *      boolean isJavaIdentifierPart(char ch).
74   * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 
75   *      values '10' - '35'. UCharacter also does this in digit and
76   *      getNumericValue, to adhere to the java semantics of these
77   *      methods.  New methods unicodeDigit, and
78   *      getUnicodeNumericValue do not treat the above code points 
79   *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
80   * </ul>
81   * <p>
82   * Further detail differences can be determined from the program 
83   *        <a HREF="http://dev.icu-project.org/cgi-bin/viewcvs.cgi/~checkout~/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
84   *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
85   * </p>
86   * <p>
87   * In addition to Java compatibility functions, which calculate derived properties,
88   * this API provides low-level access to the Unicode Character Database.
89   * </p>
90   * <p>
91   * Unicode assigns each code point (not just assigned character) values for
92   * many properties.
93   * Most of them are simple boolean flags, or constants from a small enumerated list.
94   * For some properties, values are strings or other relatively more complex types.
95   * </p>
96   * <p>
97   * For more information see
98   * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
99   * and the ICU User Guide chapter on Properties (http://icu.sourceforge.net/userguide/properties.html).
100  * </p>
101  * <p>
102  * There are also functions that provide easy migration from C/POSIX functions
103  * like isblank(). Their use is generally discouraged because the C/POSIX
104  * standards do not define their semantics beyond the ASCII range, which means
105  * that different implementations exhibit very different behavior.
106  * Instead, Unicode properties should be used directly.
107  * </p>
108  * <p>
109  * There are also only a few, broad C/POSIX character classes, and they tend
110  * to be used for conflicting purposes. For example, the "isalpha()" class
111  * is sometimes used to determine word boundaries, while a more sophisticated
112  * approach would at least distinguish initial letters from continuation
113  * characters (the latter including combining marks).
114  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
115  * Another example: There is no "istitle()" class for titlecase characters.
116  * </p>
117  * <p>
118  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
119  * ICU implements them according to the Standard Recommendations in
120  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
121  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
122  * </p>
123  * <p>
124  * API access for C/POSIX character classes is as follows:
125  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
126  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
127  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
128  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|(1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|(1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
129  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
130  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
131  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
132  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
133  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
134  * - cntrl:     getType(c)==CONTROL
135  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
136  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)
137  * </p>
138  * <p>
139  * The C/POSIX character classes are also available in UnicodeSet patterns,
140  * using patterns like [:graph:] or \p{graph}.
141  * </p>
142  * <p>
143  * Note: There are several ICU (and Java) whitespace functions.
144  * Comparison:
145  * - isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
146  *       most of general categories "Z" (separators) + most whitespace ISO controls
147  *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
148  * - isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
149  * - isSpaceChar: just Z (including no-break spaces)
150  * </p>
151  * <p>
152  * This class is not subclassable
153  * </p>
154  * @author Syn Wee Quek
155  * @stable ICU 2.1
156  * @see com.ibm.icu.lang.UCharacterEnums
157  */
158 
159 public final class UCharacter implements ECharacterCategory, ECharacterDirection
160 { 
161     // public inner classes ----------------------------------------------
162       
163     /**
164      * A family of character subsets representing the character blocks in the 
165      * Unicode specification, generated from Unicode Data file Blocks.txt. 
166      * Character blocks generally define characters used for a specific script 
167      * or purpose. A character is contained by at most one Unicode block. 
168      * @stable ICU 2.4
169      */
170     public static final class UnicodeBlock extends Character.Subset   
171     {
172         // blocks objects ---------------------------------------------------
173         
174         /** 
175          * @stable ICU 2.6
176          */
177         public static final UnicodeBlock NO_BLOCK 
178             = new UnicodeBlock("NO_BLOCK", 0);
179 
180         /** 
181          * @stable ICU 2.4
182          */
183         public static final UnicodeBlock BASIC_LATIN 
184             = new UnicodeBlock("BASIC_LATIN", 1);
185         /** 
186          * @stable ICU 2.4 
187          */
188         public static final UnicodeBlock LATIN_1_SUPPLEMENT 
189             = new UnicodeBlock("LATIN_1_SUPPLEMENT", 2);
190         /** 
191          * @stable ICU 2.4 
192          */
193         public static final UnicodeBlock LATIN_EXTENDED_A
194             = new UnicodeBlock("LATIN_EXTENDED_A", 3);
195         /** 
196          * @stable ICU 2.4 
197          */
198         public static final UnicodeBlock LATIN_EXTENDED_B 
199             = new UnicodeBlock("LATIN_EXTENDED_B", 4);
200         /** 
201          * @stable ICU 2.4 
202          */
203         public static final UnicodeBlock IPA_EXTENSIONS 
204             = new UnicodeBlock("IPA_EXTENSIONS", 5);
205         /** 
206          * @stable ICU 2.4 
207          */
208         public static final UnicodeBlock SPACING_MODIFIER_LETTERS 
209             = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 6);
210         /** 
211          * @stable ICU 2.4 
212          */
213         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 
214             = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 7);
215         /**
216          * Unicode 3.2 renames this block to "Greek and Coptic".
217          * @stable ICU 2.4
218          */
219         public static final UnicodeBlock GREEK
220             = new UnicodeBlock("GREEK", 8);
221         /** 
222          * @stable ICU 2.4 
223          */
224         public static final UnicodeBlock CYRILLIC 
225             = new UnicodeBlock("CYRILLIC", 9);
226         /** 
227          * @stable ICU 2.4 
228          */
229         public static final UnicodeBlock ARMENIAN 
230             = new UnicodeBlock("ARMENIAN", 10);
231         /** 
232          * @stable ICU 2.4 
233          */
234         public static final UnicodeBlock HEBREW 
235             = new UnicodeBlock("HEBREW", 11);  
236         /** 
237          * @stable ICU 2.4 
238          */
239         public static final UnicodeBlock ARABIC
240             = new UnicodeBlock("ARABIC", 12);
241         /** 
242          * @stable ICU 2.4 
243          */
244         public static final UnicodeBlock SYRIAC 
245             = new UnicodeBlock("SYRIAC", 13);
246         /** 
247          * @stable ICU 2.4 
248          */
249         public static final UnicodeBlock THAANA 
250             = new UnicodeBlock("THAANA", 14);
251         /** 
252          * @stable ICU 2.4 
253          */
254         public static final UnicodeBlock DEVANAGARI 
255             = new UnicodeBlock("DEVANAGARI", 15);
256         /** 
257          * @stable ICU 2.4 
258          */
259         public static final UnicodeBlock BENGALI 
260             = new UnicodeBlock("BENGALI", 16);
261         /** 
262          * @stable ICU 2.4 
263          */
264         public static final UnicodeBlock GURMUKHI 
265             = new UnicodeBlock("GURMUKHI", 17);
266         /** 
267          * @stable ICU 2.4 
268          */
269         public static final UnicodeBlock GUJARATI 
270             = new UnicodeBlock("GUJARATI", 18);
271         /** 
272          * @stable ICU 2.4 
273          */
274         public static final UnicodeBlock ORIYA 
275             = new UnicodeBlock("ORIYA", 19);
276         /** 
277          * @stable ICU 2.4 
278          */
279         public static final UnicodeBlock TAMIL 
280             = new UnicodeBlock("TAMIL", 20);
281         /** 
282          * @stable ICU 2.4 
283          */
284         public static final UnicodeBlock TELUGU 
285             = new UnicodeBlock("TELUGU", 21);
286         /** 
287          * @stable ICU 2.4 
288          */
289         public static final UnicodeBlock KANNADA 
290             = new UnicodeBlock("KANNADA", 22);
291         /** 
292          * @stable ICU 2.4 
293          */
294         public static final UnicodeBlock MALAYALAM 
295             = new UnicodeBlock("MALAYALAM", 23);
296         /** 
297          * @stable ICU 2.4 
298          */
299         public static final UnicodeBlock SINHALA 
300             = new UnicodeBlock("SINHALA", 24);
301         /** 
302          * @stable ICU 2.4 
303          */
304         public static final UnicodeBlock THAI 
305             = new UnicodeBlock("THAI", 25);
306         /** 
307          * @stable ICU 2.4 
308          */
309         public static final UnicodeBlock LAO 
310             = new UnicodeBlock("LAO", 26);
311         /** 
312          * @stable ICU 2.4 
313          */
314         public static final UnicodeBlock TIBETAN 
315             = new UnicodeBlock("TIBETAN", 27);
316         /** 
317          * @stable ICU 2.4 
318          */
319         public static final UnicodeBlock MYANMAR 
320             = new UnicodeBlock("MYANMAR", 28);
321         /** 
322          * @stable ICU 2.4 
323          */
324         public static final UnicodeBlock GEORGIAN 
325             = new UnicodeBlock("GEORGIAN", 29);
326         /** 
327          * @stable ICU 2.4 
328          */
329         public static final UnicodeBlock HANGUL_JAMO 
330             = new UnicodeBlock("HANGUL_JAMO", 30);
331         /** 
332          * @stable ICU 2.4 
333          */
334         public static final UnicodeBlock ETHIOPIC 
335             = new UnicodeBlock("ETHIOPIC", 31);
336         /** 
337          * @stable ICU 2.4 
338          */
339         public static final UnicodeBlock CHEROKEE 
340             = new UnicodeBlock("CHEROKEE", 32);
341         /** 
342          * @stable ICU 2.4 
343          */
344         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 
345             = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 33);
346         /** 
347          * @stable ICU 2.4 
348          */
349         public static final UnicodeBlock OGHAM 
350             = new UnicodeBlock("OGHAM", 34);
351         /** 
352          * @stable ICU 2.4 
353          */
354         public static final UnicodeBlock RUNIC 
355             = new UnicodeBlock("RUNIC", 35);
356         /** 
357          * @stable ICU 2.4 
358          */
359         public static final UnicodeBlock KHMER 
360             = new UnicodeBlock("KHMER", 36);
361         /** 
362          * @stable ICU 2.4 
363          */
364         public static final UnicodeBlock MONGOLIAN 
365             = new UnicodeBlock("MONGOLIAN", 37);
366         /** 
367          * @stable ICU 2.4 
368          */
369         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 
370             = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 38);
371         /** 
372          * @stable ICU 2.4 
373          */
374         public static final UnicodeBlock GREEK_EXTENDED 
375             = new UnicodeBlock("GREEK_EXTENDED", 39);
376         /** 
377          * @stable ICU 2.4 
378          */
379         public static final UnicodeBlock GENERAL_PUNCTUATION 
380             = new UnicodeBlock("GENERAL_PUNCTUATION", 40);
381         /** 
382          * @stable ICU 2.4 
383          */
384         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 
385             = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 41);
386         /** 
387          * @stable ICU 2.4 
388          */
389         public static final UnicodeBlock CURRENCY_SYMBOLS 
390             = new UnicodeBlock("CURRENCY_SYMBOLS", 42);
391         /**
392          * Unicode 3.2 renames this block to "Combining Diacritical Marks for 
393          * Symbols".
394          * @stable ICU 2.4
395          */
396         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 
397             = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 43);
398         /** 
399          * @stable ICU 2.4 
400          */
401         public static final UnicodeBlock LETTERLIKE_SYMBOLS 
402             = new UnicodeBlock("LETTERLIKE_SYMBOLS", 44);
403         /** 
404          * @stable ICU 2.4 
405          */
406         public static final UnicodeBlock NUMBER_FORMS 
407             = new UnicodeBlock("NUMBER_FORMS", 45);
408         /** 
409          * @stable ICU 2.4 
410          */
411         public static final UnicodeBlock ARROWS 
412             = new UnicodeBlock("ARROWS", 46);
413         /** 
414          * @stable ICU 2.4 
415          */
416         public static final UnicodeBlock MATHEMATICAL_OPERATORS 
417             = new UnicodeBlock("MATHEMATICAL_OPERATORS", 47);
418         /** 
419          * @stable ICU 2.4 
420          */
421         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 
422             = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 48);
423         /** 
424          * @stable ICU 2.4 
425          */
426         public static final UnicodeBlock CONTROL_PICTURES 
427             = new UnicodeBlock("CONTROL_PICTURES", 49);
428         /** 
429          * @stable ICU 2.4 
430          */
431         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 
432             = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 50);
433         /** 
434          * @stable ICU 2.4 
435          */
436         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 
437             = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 51);
438         /** 
439          * @stable ICU 2.4 
440          */
441         public static final UnicodeBlock BOX_DRAWING 
442             = new UnicodeBlock("BOX_DRAWING", 52);
443         /** 
444          * @stable ICU 2.4 
445          */
446         public static final UnicodeBlock BLOCK_ELEMENTS 
447             = new UnicodeBlock("BLOCK_ELEMENTS", 53);
448         /** 
449          * @stable ICU 2.4 
450          */
451         public static final UnicodeBlock GEOMETRIC_SHAPES 
452             = new UnicodeBlock("GEOMETRIC_SHAPES", 54);
453         /** 
454          * @stable ICU 2.4 
455          */
456         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 
457             = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 55);
458         /** 
459          * @stable ICU 2.4 
460          */
461         public static final UnicodeBlock DINGBATS 
462             = new UnicodeBlock("DINGBATS", 56);
463         /** 
464          * @stable ICU 2.4 
465          */
466         public static final UnicodeBlock BRAILLE_PATTERNS 
467             = new UnicodeBlock("BRAILLE_PATTERNS", 57);
468         /** 
469          * @stable ICU 2.4 
470          */
471         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 
472             = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 58);
473         /** 
474          * @stable ICU 2.4 
475          */
476         public static final UnicodeBlock KANGXI_RADICALS 
477             = new UnicodeBlock("KANGXI_RADICALS", 59);
478         /** 
479          * @stable ICU 2.4 
480          */
481         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 
482             = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 60);
483         /** 
484          * @stable ICU 2.4 
485          */
486         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 
487             = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 61);
488         /** 
489          * @stable ICU 2.4 
490          */
491         public static final UnicodeBlock HIRAGANA 
492             = new UnicodeBlock("HIRAGANA", 62);
493         /** 
494          * @stable ICU 2.4 
495          */
496         public static final UnicodeBlock KATAKANA 
497             = new UnicodeBlock("KATAKANA", 63);
498         /** 
499          * @stable ICU 2.4 
500          */
501         public static final UnicodeBlock BOPOMOFO 
502             = new UnicodeBlock("BOPOMOFO", 64);
503         /** 
504          * @stable ICU 2.4 
505          */
506         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 
507             = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 65);
508         /** 
509          * @stable ICU 2.4 
510          */
511         public static final UnicodeBlock KANBUN 
512             = new UnicodeBlock("KANBUN", 66);
513         /** 
514          * @stable ICU 2.4 
515          */
516         public static final UnicodeBlock BOPOMOFO_EXTENDED 
517             = new UnicodeBlock("BOPOMOFO_EXTENDED", 67);
518         /** 
519          * @stable ICU 2.4 
520          */
521         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 
522             = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 68);
523         /** 
524          * @stable ICU 2.4 
525          */
526         public static final UnicodeBlock CJK_COMPATIBILITY 
527             = new UnicodeBlock("CJK_COMPATIBILITY", 69);
528         /** 
529          * @stable ICU 2.4 
530          */
531         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 
532             = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 70);
533         /** 
534          * @stable ICU 2.4 
535          */
536         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 
537             = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 71);
538         /** 
539          * @stable ICU 2.4 
540          */
541         public static final UnicodeBlock YI_SYLLABLES 
542             = new UnicodeBlock("YI_SYLLABLES", 72);
543         /** 
544          * @stable ICU 2.4 
545          */
546         public static final UnicodeBlock YI_RADICALS 
547             = new UnicodeBlock("YI_RADICALS", 73);
548         /** 
549          * @stable ICU 2.4 
550          */
551         public static final UnicodeBlock HANGUL_SYLLABLES 
552             = new UnicodeBlock("HANGUL_SYLLABLES", 74);
553         /** 
554          * @stable ICU 2.4 
555          */
556         public static final UnicodeBlock HIGH_SURROGATES 
557             = new UnicodeBlock("HIGH_SURROGATES", 75);
558         /** 
559          * @stable ICU 2.4 
560          */
561         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 
562             = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 76);
563         /** 
564          * @stable ICU 2.4 
565          */
566         public static final UnicodeBlock LOW_SURROGATES 
567             = new UnicodeBlock("LOW_SURROGATES", 77);
568         /**
569          * Same as public static final int PRIVATE_USE.
570          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
571          * and multiple code point ranges had this block.
572          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 
573          * and adds separate blocks for the supplementary PUAs.
574          * @stable ICU 2.4
575          */
576         public static final UnicodeBlock PRIVATE_USE_AREA 
577             = new UnicodeBlock("PRIVATE_USE_AREA",  78);
578         /**
579          * Same as public static final int PRIVATE_USE_AREA.
580          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
581          * and multiple code point ranges had this block.
582          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 
583          * and adds separate blocks for the supplementary PUAs.
584          * @stable ICU 2.4
585          */
586         public static final UnicodeBlock PRIVATE_USE 
587             = PRIVATE_USE_AREA;
588         /** 
589          * @stable ICU 2.4 
590          */
591         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 
592             = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 79);
593         /** 
594          * @stable ICU 2.4 
595          */
596         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 
597             = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 80);
598         /** 
599          * @stable ICU 2.4 
600          */
601         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 
602             = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 81);
603         /** 
604          * @stable ICU 2.4 
605          */
606         public static final UnicodeBlock COMBINING_HALF_MARKS 
607             = new UnicodeBlock("COMBINING_HALF_MARKS", 82);
608         /** 
609          * @stable ICU 2.4 
610          */
611         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 
612             = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 83);
613         /** 
614          * @stable ICU 2.4 
615          */
616         public static final UnicodeBlock SMALL_FORM_VARIANTS 
617             = new UnicodeBlock("SMALL_FORM_VARIANTS", 84);
618         /** 
619          * @stable ICU 2.4 
620          */
621         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 
622             = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 85);
623         /** 
624          * @stable ICU 2.4 
625          */
626         public static final UnicodeBlock SPECIALS 
627             = new UnicodeBlock("SPECIALS", 86);
628         /** 
629          * @stable ICU 2.4 
630          */
631         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 
632             = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 87);
633         /** 
634          * @stable ICU 2.4 
635          */
636         public static final UnicodeBlock OLD_ITALIC 
637             = new UnicodeBlock("OLD_ITALIC", 88);
638         /** 
639          * @stable ICU 2.4 
640          */
641         public static final UnicodeBlock GOTHIC 
642             = new UnicodeBlock("GOTHIC", 89);
643         /** 
644          * @stable ICU 2.4 
645          */
646         public static final UnicodeBlock DESERET 
647             = new UnicodeBlock("DESERET", 90);
648         /** 
649          * @stable ICU 2.4 
650          */
651         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 
652             = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 91);
653         /** 
654          * @stable ICU 2.4 
655          */
656         public static final UnicodeBlock MUSICAL_SYMBOLS 
657             = new UnicodeBlock("MUSICAL_SYMBOLS", 92);
658         /** 
659          * @stable ICU 2.4 
660          */
661         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 
662             = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 93);
663         /** 
664          * @stable ICU 2.4 
665          */
666         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  
667             = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 94);
668         /** 
669          * @stable ICU 2.4 
670          */
671         public static final UnicodeBlock 
672             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 
673             = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 95);
674         /** 
675          * @stable ICU 2.4 
676          */
677         public static final UnicodeBlock TAGS 
678             = new UnicodeBlock("TAGS", 96);
679     
680         // New blocks in Unicode 3.2
681     
682         /** 
683          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
684          * @stable ICU 2.4 
685          */
686         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 
687             = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 97);
688         /** 
689          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
690          * @stable ICU 3.0
691          */
692         public static final UnicodeBlock CYRILLIC_SUPPLEMENT 
693             = new UnicodeBlock("CYRILLIC_SUPPLEMENT", 97);
694         /** 
695          * @stable ICU 2.4 
696          */
697         public static final UnicodeBlock TAGALOG 
698             = new UnicodeBlock("TAGALOG", 98);
699         /** 
700          * @stable ICU 2.4 
701          */
702         public static final UnicodeBlock HANUNOO 
703             = new UnicodeBlock("HANUNOO", 99);
704         /** 
705          * @stable ICU 2.4 
706          */
707         public static final UnicodeBlock BUHID 
708             = new UnicodeBlock("BUHID", 100);
709         /** 
710          * @stable ICU 2.4 
711          */
712         public static final UnicodeBlock TAGBANWA 
713             = new UnicodeBlock("TAGBANWA", 101);
714         /** 
715          * @stable ICU 2.4 
716          */
717         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 
718             = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 102);
719         /** 
720          * @stable ICU 2.4 
721          */
722         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 
723             = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 103);
724         /** 
725          * @stable ICU 2.4 
726          */
727         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 
728             = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 104);
729         /** 
730          * @stable ICU 2.4 
731          */
732         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 
733             = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 105);
734         /** 
735          * @stable ICU 2.4 
736          */
737         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 
738             = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 106);
739         /** 
740          * @stable ICU 2.4 
741          */
742         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 
743             = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 107);
744         /** 
745          * @stable ICU 2.4 
746          */
747         public static final UnicodeBlock VARIATION_SELECTORS 
748             = new UnicodeBlock("VARIATION_SELECTORS", 108);
749         /** 
750          * @stable ICU 2.4 
751          */
752         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 
753             = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 109);
754         /** 
755          * @stable ICU 2.4 
756          */
757         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 
758             = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 110);
759    
760         /** 
761          * @stable ICU 2.6 
762          */
763         public static final UnicodeBlock LIMBU 
764             = new UnicodeBlock("LIMBU", 111);
765         /** 
766          * @stable ICU 2.6 
767          */
768         public static final UnicodeBlock TAI_LE 
769             = new UnicodeBlock("TAI LE", 112);
770         /** 
771          * @stable ICU 2.6 
772          */
773         public static final UnicodeBlock KHMER_SYMBOLS 
774             = new UnicodeBlock("KHMER SYMBOLS", 113);
775 
776         /** 
777          * @stable ICU 2.6 
778          */
779         public static final UnicodeBlock PHONETIC_EXTENSIONS 
780             = new UnicodeBlock("PHONETIC EXTENSIONS", 114);
781 
782         /** 
783          * @stable ICU 2.6 
784          */
785         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 
786             = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 115);
787         /** 
788          * @stable ICU 2.6 
789          */
790         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 
791             = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 116);
792         /** 
793          * @stable ICU 2.6 
794          */
795         public static final UnicodeBlock LINEAR_B_SYLLABARY 
796             = new UnicodeBlock("LINEAR_B_SYLLABARY", 117);
797         /** 
798          * @stable ICU 2.6 
799          */
800         public static final UnicodeBlock LINEAR_B_IDEOGRAMS 
801             = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 118);
802         /** 
803          * @stable ICU 2.6 
804          */
805         public static final UnicodeBlock AEGEAN_NUMBERS 
806             = new UnicodeBlock("AEGEAN_NUMBERS", 119);                                               
807         /** 
808          * @stable ICU 2.6 
809          */
810         public static final UnicodeBlock UGARITIC 
811             = new UnicodeBlock("UGARITIC", 120);
812         /** 
813          * @stable ICU 2.6 
814          */
815         public static final UnicodeBlock SHAVIAN 
816             = new UnicodeBlock("SHAVIAN", 121);
817         /** 
818          * @stable ICU 2.6 
819          */
820         public static final UnicodeBlock OSMANYA 
821             = new UnicodeBlock("OSMANYA", 122);
822         /** 
823          * @stable ICU 2.6 
824          */
825         public static final UnicodeBlock CYPRIOT_SYLLABARY 
826             = new UnicodeBlock("CYPRIOT_SYLLABARY", 123);
827         /** 
828          * @stable ICU 2.6 
829          */
830         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 
831             = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 124);
832         
833         /** 
834          * @stable ICU 2.6 
835          */
836         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 
837             = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 125);                                      
838 
839         /* New blocks in Unicode 4.1 */
840 
841         /**
842          * @draft ICU 3.4
843          * @provisional This API might change or be removed in a future release.
844          */
845         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 126); /*[1D200]*/
846 
847         /**
848          * @draft ICU 3.4
849          * @provisional This API might change or be removed in a future release.
850          */
851         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 127); /*[10140]*/
852 
853         /**
854          * @draft ICU 3.4
855          * @provisional This API might change or be removed in a future release.
856          */
857         public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT", 128); /*[0750]*/
858 
859         /**
860          * @draft ICU 3.4
861          * @provisional This API might change or be removed in a future release.
862          */
863         public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE", 129); /*[1A00]*/
864 
865         /**
866          * @draft ICU 3.4
867          * @provisional This API might change or be removed in a future release.
868          */
869         public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES", 130); /*[31C0]*/
870 
871         /**
872          * @draft ICU 3.4
873          * @provisional This API might change or be removed in a future release.
874          */
875         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 131); /*[1DC0]*/
876 
877         /**
878          * @draft ICU 3.4
879          * @provisional This API might change or be removed in a future release.
880          */
881         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", 132); /*[2C80]*/
882 
883         /**
884          * @draft ICU 3.4
885          * @provisional This API might change or be removed in a future release.
886          */
887         public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED", 133); /*[2D80]*/
888 
889         /**
890          * @draft ICU 3.4
891          * @provisional This API might change or be removed in a future release.
892          */
893         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 134); /*[1380]*/
894 
895         /**
896          * @draft ICU 3.4
897          * @provisional This API might change or be removed in a future release.
898          */
899         public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT", 135); /*[2D00]*/
900 
901         /**
902          * @draft ICU 3.4
903          * @provisional This API might change or be removed in a future release.
904          */
905         public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC", 136); /*[2C00]*/
906 
907         /**
908          * @draft ICU 3.4
909          * @provisional This API might change or be removed in a future release.
910          */
911         public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI", 137); /*[10A00]*/
912 
913         /**
914          * @draft ICU 3.4
915          * @provisional This API might change or be removed in a future release.
916          */
917         public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS", 138); /*[A700]*/
918 
919         /**
920          * @draft ICU 3.4
921          * @provisional This API might change or be removed in a future release.
922          */
923         public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE", 139); /*[1980]*/
924 
925         /**
926          * @draft ICU 3.4
927          * @provisional This API might change or be removed in a future release.
928          */
929         public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN", 140); /*[103A0]*/
930 
931         /**
932          * @draft ICU 3.4
933          * @provisional This API might change or be removed in a future release.
934          */
935         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 141); /*[1D80]*/
936 
937         /**
938          * @draft ICU 3.4
939          * @provisional This API might change or be removed in a future release.
940          */
941         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 142); /*[2E00]*/
942 
943         /**
944          * @draft ICU 3.4
945          * @provisional This API might change or be removed in a future release.
946          */
947         public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI", 143); /*[A800]*/
948 
949         /**
950          * @draft ICU 3.4
951          * @provisional This API might change or be removed in a future release.
952          */
953         public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH", 144); /*[2D30]*/
954 
955         /**
956          * @draft ICU 3.4
957          * @provisional This API might change or be removed in a future release.
958          */
959         public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS", 145); /*[FE10]*/
960 
961         /** 
962          * @draft ICU 3.6 
963          * @provisional This API might change or be removed in a future release.
964          */
965         public static final UnicodeBlock NKO = new UnicodeBlock( "NKO", 146); /*[07C0]*/
966         /** 
967          * @draft ICU 3.6 
968          * @provisional This API might change or be removed in a future release.
969          */
970         public static final UnicodeBlock BALINESE = new UnicodeBlock( "BALINESE", 147); /*[1B00]*/
971         /** 
972          * @draft ICU 3.6 
973          * @provisional This API might change or be removed in a future release.
974          */
975         public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock( "LATIN_EXTENDED_C", 148); /*[2C60]*/
976         /** 
977          * @draft ICU 3.6 
978          * @provisional This API might change or be removed in a future release. 
979          */
980         public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock( "LATIN_EXTENDED_D", 149); /*[A720]*/
981         /** 
982          * @draft ICU 3.6 
983          * @provisional This API might change or be removed in a future release.
984          */
985         public static final UnicodeBlock PHAGS_PA = new UnicodeBlock( "PHAGS_PA", 150); /*[A840]*/
986         /** 
987          * @draft ICU 3.6 
988          * @provisional This API might change or be removed in a future release.
989          */
990         public static final UnicodeBlock PHOENICIAN = new UnicodeBlock( "PHOENICIAN", 151); /*[10900]*/
991         /** 
992          * @draft ICU 3.6 
993          * @provisional This API might change or be removed in a future release.
994          */
995         public static final UnicodeBlock CUNEIFORM = new UnicodeBlock( "CUNEIFORM", 152); /*[12000]*/
996         /** 
997          * @draft ICU 3.6 
998          * @provisional This API might change or be removed in a future release.
999          */
1000        public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock( "CUNEIFORM_NUMBERS_AND_PUNCTUATION", 153); /*[12400]*/
1001        /**
1002         * @draft ICU 3.6 
1003         * @provisional This API might change or be removed in a future release.
1004         */
1005        public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock( "COUNTING_ROD_NUMERALS", 154); /*[1D360]*/
1006        
1007        /** 
1008         * @stable ICU 2.4 
1009         */
1010        public static final UnicodeBlock INVALID_CODE 
1011            = new UnicodeBlock("INVALID_CODE", -1);
1012           
1013        // block id corresponding to icu4c -----------------------------------
1014           
1015        /** 
1016         * @stable ICU 2.4 
1017         */
1018        public static final int INVALID_CODE_ID = -1;                          
1019        /** 
1020         * @stable ICU 2.4
1021         */
1022        public static final int BASIC_LATIN_ID = 1;
1023        /** 
1024         * @stable ICU 2.4 
1025         */
1026        public static final int LATIN_1_SUPPLEMENT_ID = 2;
1027        /** 
1028         * @stable ICU 2.4 
1029         */
1030        public static final int LATIN_EXTENDED_A_ID = 3;
1031        /** 
1032         * @stable ICU 2.4 
1033         */
1034        public static final int LATIN_EXTENDED_B_ID = 4;
1035        /** 
1036         * @stable ICU 2.4 
1037         */
1038        public static final int IPA_EXTENSIONS_ID = 5;
1039        /** 
1040         * @stable ICU 2.4 
1041         */
1042        public static final int SPACING_MODIFIER_LETTERS_ID = 6;
1043        /** 
1044         * @stable ICU 2.4 
1045         */
1046        public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
1047        /**
1048         * Unicode 3.2 renames this block to "Greek and Coptic".
1049         * @stable ICU 2.4
1050         */
1051        public static final int GREEK_ID = 8;
1052        /** 
1053         * @stable ICU 2.4 
1054         */
1055        public static final int CYRILLIC_ID = 9;
1056        /** 
1057         * @stable ICU 2.4 
1058         */
1059        public static final int ARMENIAN_ID = 10;
1060        /** 
1061         * @stable ICU 2.4 
1062         */
1063        public static final int HEBREW_ID = 11;  
1064        /** 
1065         * @stable ICU 2.4 
1066         */
1067        public static final int ARABIC_ID = 12;
1068        /** 
1069         * @stable ICU 2.4 
1070         */
1071        public static final int SYRIAC_ID = 13;
1072        /** 
1073         * @stable ICU 2.4 
1074         */
1075        public static final int THAANA_ID = 14;
1076        /** 
1077         * @stable ICU 2.4 
1078         */
1079        public static final int DEVANAGARI_ID = 15;
1080        /** 
1081         * @stable ICU 2.4 
1082         */
1083        public static final int BENGALI_ID = 16;
1084        /** 
1085         * @stable ICU 2.4 
1086         */
1087        public static final int GURMUKHI_ID = 17;
1088        /** 
1089         * @stable ICU 2.4 
1090         */
1091        public static final int GUJARATI_ID = 18;
1092        /** 
1093         * @stable ICU 2.4 
1094         */
1095        public static final int ORIYA_ID = 19;
1096        /** 
1097         * @stable ICU 2.4 
1098         */
1099        public static final int TAMIL_ID = 20;
1100        /** 
1101         * @stable ICU 2.4 
1102         */
1103        public static final int TELUGU_ID = 21;
1104        /** 
1105         * @stable ICU 2.4 
1106         */
1107        public static final int KANNADA_ID = 22;
1108        /** 
1109         * @stable ICU 2.4 
1110         */
1111        public static final int MALAYALAM_ID = 23;
1112        /** 
1113         * @stable ICU 2.4 
1114         */
1115        public static final int SINHALA_ID = 24;
1116        /** 
1117         * @stable ICU 2.4 
1118         */
1119        public static final int THAI_ID = 25;
1120        /** 
1121         * @stable ICU 2.4 
1122         */
1123        public static final int LAO_ID = 26;
1124        /** 
1125         * @stable ICU 2.4 
1126         */
1127        public static final int TIBETAN_ID = 27;
1128        /** 
1129         * @stable ICU 2.4 
1130         */
1131        public static final int MYANMAR_ID = 28;
1132        /** 
1133         * @stable ICU 2.4 
1134         */
1135        public static final int GEORGIAN_ID = 29;
1136        /** 
1137         * @stable ICU 2.4 
1138         */
1139        public static final int HANGUL_JAMO_ID = 30;
1140        /** 
1141         * @stable ICU 2.4 
1142         */
1143        public static final int ETHIOPIC_ID = 31;
1144        /** 
1145         * @stable ICU 2.4 
1146         */
1147        public static final int CHEROKEE_ID = 32;
1148        /** 
1149         * @stable ICU 2.4 
1150         */
1151        public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
1152        /** 
1153         * @stable ICU 2.4 
1154         */
1155        public static final int OGHAM_ID = 34;
1156        /** 
1157         * @stable ICU 2.4 
1158         */
1159        public static final int RUNIC_ID = 35;
1160        /** 
1161         * @stable ICU 2.4 
1162         */
1163        public static final int KHMER_ID = 36;
1164        /** 
1165         * @stable ICU 2.4 
1166         */
1167        public static final int MONGOLIAN_ID = 37;
1168        /** 
1169         * @stable ICU 2.4 
1170         */
1171        public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
1172        /** 
1173         * @stable ICU 2.4 
1174         */
1175        public static final int GREEK_EXTENDED_ID = 39;
1176        /** 
1177         * @stable ICU 2.4 
1178         */
1179        public static final int GENERAL_PUNCTUATION_ID = 40;
1180        /** 
1181         * @stable ICU 2.4 
1182         */
1183        public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
1184        /** 
1185         * @stable ICU 2.4 
1186         */
1187        public static final int CURRENCY_SYMBOLS_ID = 42;
1188        /**
1189         * Unicode 3.2 renames this block to "Combining Diacritical Marks for 
1190         * Symbols".
1191         * @stable ICU 2.4
1192         */
1193        public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
1194        /** 
1195         * @stable ICU 2.4 
1196         */
1197        public static final int LETTERLIKE_SYMBOLS_ID = 44;
1198        /** 
1199         * @stable ICU 2.4 
1200         */
1201        public static final int NUMBER_FORMS_ID = 45;
1202        /** 
1203         * @stable ICU 2.4 
1204         */
1205        public static final int ARROWS_ID = 46;
1206        /** 
1207         * @stable ICU 2.4 
1208         */
1209        public static final int MATHEMATICAL_OPERATORS_ID = 47;
1210        /** 
1211         * @stable ICU 2.4 
1212         */
1213        public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
1214        /** 
1215         * @stable ICU 2.4 
1216         */
1217        public static final int CONTROL_PICTURES_ID = 49;
1218        /** 
1219         * @stable ICU 2.4 
1220         */
1221        public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
1222        /** 
1223         * @stable ICU 2.4 
1224         */
1225        public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
1226        /** 
1227         * @stable ICU 2.4 
1228         */
1229        public static final int BOX_DRAWING_ID = 52;
1230        /** 
1231         * @stable ICU 2.4 
1232         */
1233        public static final int BLOCK_ELEMENTS_ID = 53;
1234        /** 
1235         * @stable ICU 2.4 
1236         */
1237        public static final int GEOMETRIC_SHAPES_ID = 54;
1238        /** 
1239         * @stable ICU 2.4 
1240         */
1241        public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
1242        /** 
1243         * @stable ICU 2.4 
1244         */
1245        public static final int DINGBATS_ID = 56;
1246        /** 
1247         * @stable ICU 2.4 
1248         */
1249        public static final int BRAILLE_PATTERNS_ID = 57;
1250        /** 
1251         * @stable ICU 2.4 
1252         */
1253        public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
1254        /** 
1255         * @stable ICU 2.4 
1256         */
1257        public static final int KANGXI_RADICALS_ID = 59;
1258        /** 
1259         * @stable ICU 2.4 
1260         */
1261        public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
1262        /** 
1263         * @stable ICU 2.4 
1264         */
1265        public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
1266        /** 
1267         * @stable ICU 2.4 
1268         */
1269        public static final int HIRAGANA_ID = 62;
1270        /** 
1271         * @stable ICU 2.4 
1272         */
1273        public static final int KATAKANA_ID = 63;
1274        /** 
1275         * @stable ICU 2.4 
1276         */
1277        public static final int BOPOMOFO_ID = 64;
1278        /** 
1279         * @stable ICU 2.4 
1280         */
1281        public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
1282        /** 
1283         * @stable ICU 2.4 
1284         */
1285        public static final int KANBUN_ID = 66;
1286        /** 
1287         * @stable ICU 2.4 
1288         */
1289        public static final int BOPOMOFO_EXTENDED_ID = 67;
1290        /** 
1291         * @stable ICU 2.4 
1292         */
1293        public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
1294        /** 
1295         * @stable ICU 2.4 
1296         */
1297        public static final int CJK_COMPATIBILITY_ID = 69;
1298        /** 
1299         * @stable ICU 2.4 
1300         */
1301        public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
1302        /** 
1303         * @stable ICU 2.4 
1304         */
1305        public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
1306        /** 
1307         * @stable ICU 2.4 
1308         */
1309        public static final int YI_SYLLABLES_ID = 72;
1310        /** 
1311         * @stable ICU 2.4 
1312         */
1313        public static final int YI_RADICALS_ID = 73;
1314        /** 
1315         * @stable ICU 2.4 
1316         */
1317        public static final int HANGUL_SYLLABLES_ID = 74;
1318        /** 
1319         * @stable ICU 2.4 
1320         */
1321        public static final int HIGH_SURROGATES_ID = 75;
1322        /** 
1323         * @stable ICU 2.4 
1324         */
1325        public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
1326        /** 
1327         * @stable ICU 2.4 
1328         */
1329        public static final int LOW_SURROGATES_ID = 77;
1330        /**
1331         * Same as public static final int PRIVATE_USE.
1332         * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1333         * and multiple code point ranges had this block.
1334         * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 
1335         * and adds separate blocks for the supplementary PUAs.
1336         * @stable ICU 2.4
1337         */
1338        public static final int PRIVATE_USE_AREA_ID = 78;
1339        /**
1340         * Same as public static final int PRIVATE_USE_AREA.
1341         * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1342         * and multiple code point ranges had this block.
1343         * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 
1344         * and adds separate blocks for the supplementary PUAs.
1345         * @stable ICU 2.4
1346         */
1347        public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
1348        /** 
1349         * @stable ICU 2.4 
1350         */
1351        public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
1352        /** 
1353         * @stable ICU 2.4 
1354         */
1355        public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
1356        /** 
1357         * @stable ICU 2.4 
1358         */
1359        public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
1360        /** 
1361         * @stable ICU 2.4 
1362         */
1363        public static final int COMBINING_HALF_MARKS_ID = 82;
1364        /** 
1365         * @stable ICU 2.4 
1366         */
1367        public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
1368        /** 
1369         * @stable ICU 2.4 
1370         */
1371        public static final int SMALL_FORM_VARIANTS_ID = 84;
1372        /** 
1373         * @stable ICU 2.4 
1374         */
1375        public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
1376        /** 
1377         * @stable ICU 2.4 
1378         */
1379        public static final int SPECIALS_ID = 86;
1380        /** 
1381         * @stable ICU 2.4 
1382         */
1383        public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
1384        /** 
1385         * @stable ICU 2.4 
1386         */
1387        public static final int OLD_ITALIC_ID = 88;
1388        /** 
1389         * @stable ICU 2.4 
1390         */
1391        public static final int GOTHIC_ID = 89;
1392        /** 
1393         * @stable ICU 2.4 
1394         */
1395        public static final int DESERET_ID = 90;
1396        /** 
1397         * @stable ICU 2.4 
1398         */
1399        public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
1400        /** 
1401         * @stable ICU 2.4 
1402         */
1403        public static final int MUSICAL_SYMBOLS_ID = 92;
1404        /** 
1405         * @stable ICU 2.4 
1406         */
1407        public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
1408        /** 
1409         * @stable ICU 2.4 
1410         */
1411        public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
1412        /** 
1413         * @stable ICU 2.4 
1414         */
1415        public static final int 
1416            CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
1417        /** 
1418         * @stable ICU 2.4 
1419         */
1420        public static final int TAGS_ID = 96;
1421    
1422        // New blocks in Unicode 3.2
1423    
1424        /** 
1425         * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1426         * @stable ICU 2.4 
1427         */
1428        public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
1429        /** 
1430         * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1431         * @stable ICU 3.0
1432         */
1433
1434        public static final int CYRILLIC_SUPPLEMENT_ID = 97;
1435        /** 
1436         * @stable ICU 2.4 
1437         */
1438        public static final int TAGALOG_ID = 98;
1439        /** 
1440         * @stable ICU 2.4 
1441         */
1442        public static final int HANUNOO_ID = 99;
1443        /** 
1444         * @stable ICU 2.4 
1445         */
1446        public static final int BUHID_ID = 100;
1447        /** 
1448         * @stable ICU 2.4 
1449         */
1450        public static final int TAGBANWA_ID = 101;
1451        /** 
1452         * @stable ICU 2.4 
1453         */
1454        public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
1455        /** 
1456         * @stable ICU 2.4 
1457         */
1458        public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
1459        /** 
1460         * @stable ICU 2.4 
1461         */
1462        public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
1463        /** 
1464         * @stable ICU 2.4 
1465         */
1466        public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
1467        /** 
1468         * @stable ICU 2.4 
1469         */
1470        public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
1471        /** 
1472         * @stable ICU 2.4 
1473         */
1474        public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
1475        /** 
1476         * @stable ICU 2.4 
1477         */
1478        public static final int VARIATION_SELECTORS_ID = 108;
1479        /** 
1480         * @stable ICU 2.4 
1481         */
1482        public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
1483        /** 
1484         * @stable ICU 2.4 
1485         */
1486        public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
1487        
1488        /** 
1489         * @stable ICU 2.6 
1490         */
1491        public static final int LIMBU_ID = 111; /*[1900]*/
1492        /**
1493         * @stable ICU 2.6 
1494         */
1495        public static final int TAI_LE_ID = 112; /*[1950]*/
1496        /** 
1497         * @stable ICU 2.6 
1498         */
1499        public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
1500        /** 
1501         * @stable ICU 2.6
1502         */
1503        public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
1504        /** 
1505         * @stable ICU 2.6 
1506         */
1507        public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
1508        /**
1509         * @stable ICU 2.6 
1510         */
1511        public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
1512        /** 
1513         * @stable ICU 2.6 
1514         */
1515        public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
1516        /**
1517         * @stable ICU 2.6 
1518         */
1519        public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
1520        /** 
1521         * @stable ICU 2.6
1522         */
1523        public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
1524        /**
1525         * @stable ICU 2.6
1526         */
1527        public static final int UGARITIC_ID = 120; /*[10380]*/
1528        /**
1529         * @stable ICU 2.6
1530         */
1531        public static final int SHAVIAN_ID = 121; /*[10450]*/
1532        /**
1533         * @stable ICU 2.6
1534         */
1535        public static final int OSMANYA_ID = 122; /*[10480]*/
1536        /**
1537         * @stable ICU 2.6
1538         */
1539        public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
1540        /**
1541         * @stable ICU 2.6
1542         */
1543        public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
1544        /**
1545         * @stable ICU 2.6
1546         */
1547        public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
1548
1549        /* New blocks in Unicode 4.1 */
1550
1551        /**
1552         * @draft ICU 3.4
1553         * @provisional This API might change or be removed in a future release.
1554         */
1555        public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
1556
1557        /**
1558         * @draft ICU 3.4
1559         * @provisional This API might change or be removed in a future release.
1560         */
1561        public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
1562
1563        /**
1564         * @draft ICU 3.4
1565         * @provisional This API might change or be removed in a future release.
1566         */
1567        public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
1568
1569        /**
1570         * @draft ICU 3.4
1571         * @provisional This API might change or be removed in a future release.
1572         */
1573        public static final int BUGINESE_ID = 129; /*[1A00]*/
1574
1575        /**
1576         * @draft ICU 3.4
1577         * @provisional This API might change or be removed in a future release.
1578         */
1579        public static final int CJK_STROKES_ID = 130; /*[31C0]*/
1580
1581        /**
1582         * @draft ICU 3.4
1583         * @provisional This API might change or be removed in a future release.
1584         */
1585        public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
1586
1587        /**
1588         * @draft ICU 3.4
1589         * @provisional This API might change or be removed in a future release.
1590         */
1591        public static final int COPTIC_ID = 132; /*[2C80]*/
1592
1593        /**
1594         * @draft ICU 3.4
1595         * @provisional This API might change or be removed in a future release.
1596         */
1597        public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
1598
1599        /**
1600         * @draft ICU 3.4
1601         * @provisional This API might change or be removed in a future release.
1602         */
1603        public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
1604
1605        /**
1606         * @draft ICU 3.4
1607         * @provisional This API might change or be removed in a future release.
1608         */
1609        public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
1610
1611        /**
1612         * @draft ICU 3.4
1613         * @provisional This API might change or be removed in a future release.
1614         */
1615        public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
1616
1617        /**
1618         * @draft ICU 3.4
1619         * @provisional This API might change or be removed in a future release.
1620         */
1621        public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
1622
1623        /**
1624         * @draft ICU 3.4
1625         * @provisional This API might change or be removed in a future release.
1626         */
1627        public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
1628
1629        /**
1630         * @draft ICU 3.4
1631         * @provisional This API might change or be removed in a future release.
1632         */
1633        public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
1634
1635        /**
1636         * @draft ICU 3.4
1637         * @provisional This API might change or be removed in a future release.
1638         */
1639        public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
1640
1641        /**
1642         * @draft ICU 3.4
1643         * @provisional This API might change or be removed in a future release.
1644         */
1645        public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
1646
1647        /**
1648         * @draft ICU 3.4
1649         * @provisional This API might change or be removed in a future release.
1650         */
1651        public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
1652
1653        /**
1654         * @draft ICU 3.4
1655         * @provisional This API might change or be removed in a future release.
1656         */
1657        public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
1658
1659        /**
1660         * @draft ICU 3.4
1661         * @provisional This API might change or be removed in a future release.
1662         */
1663        public static final int TIFINAGH_ID = 144; /*[2D30]*/
1664
1665        /**
1666         * @draft ICU 3.4
1667         * @provisional This API might change or be removed in a future release.
1668         */
1669        public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
1670
1671        /* New blocks in Unicode 5.0 */
1672
1673        /** 
1674         * @draft ICU 3.6 
1675         * @provisional This API might change or be removed in a future release.
1676         */
1677        public static final int NKO_ID = 146; /*[07C0]*/
1678        /** 
1679         * @draft ICU 3.6 
1680         * @provisional This API might change or be removed in a future release.
1681         */
1682        public static final int BALINESE_ID = 147; /*[1B00]*/
1683        /** 
1684         * @draft ICU 3.6 
1685         * @provisional This API might change or be removed in a future release.
1686         */
1687        public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
1688        /** 
1689         * @draft ICU 3.6 
1690         * @provisional This API might change or be removed in a future release. 
1691         */
1692        public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
1693        /** 
1694         * @draft ICU 3.6 
1695         * @provisional This API might change or be removed in a future release.
1696         */
1697        public static final int PHAGS_PA_ID = 150; /*[A840]*/
1698        /** 
1699         * @draft ICU 3.6 
1700         * @provisional This API might change or be removed in a future release.
1701         */
1702        public static final int PHOENICIAN_ID = 151; /*[10900]*/
1703        /** 
1704         * @draft ICU 3.6 
1705         * @provisional This API might change or be removed in a future release.
1706         */
1707        public static final int CUNEIFORM_ID = 152; /*[12000]*/
1708        /** 
1709         * @draft ICU 3.6 
1710         * @provisional This API might change or be removed in a future release.
1711         */
1712        public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
1713        /**
1714         * @draft ICU 3.6 
1715         * @provisional This API might change or be removed in a future release.
1716         */
1717        public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
1718        
1719        /** 
1720         * @stable ICU 2.4 
1721         */
1722        public static final int COUNT = 155;
1723        
1724        // public methods --------------------------------------------------
1725        
1726        /** 
1727         * Gets the only instance of the UnicodeBlock with the argument ID.
1728         * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
1729         * @param id UnicodeBlock ID
1730         * @return the only instance of the UnicodeBlock with the argument ID
1731         *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be 
1732         *         returned.
1733         * @stable ICU 2.4
1734         */
1735        public static UnicodeBlock getInstance(int id)
1736        {
1737            if (id >= 0 && id < BLOCKS_.length) {
1738                return BLOCKS_[id];
1739            }
1740            return INVALID_CODE;
1741        }
1742        
1743        /**
1744         * Returns the Unicode allocation block that contains the code point,
1745         * or null if the code point is not a member of a defined block.
1746         * @param ch code point to be tested
1747         * @return the Unicode allocation block that contains the code point
1748         * @stable ICU 2.4
1749         */
1750        public static UnicodeBlock of(int ch)
1751        {
1752            if (ch > MAX_VALUE) {
1753                return INVALID_CODE;
1754            }
1755
1756            return UnicodeBlock.getInstance((PROPERTY_.getAdditional(ch, 0)
1757                         & BLOCK_MASK_) >> BLOCK_SHIFT_);
1758        }
1759
1760        /**
1761         * Internal function returning of(ch).getID().
1762         *
1763         * @param ch
1764         * @return numeric block value
1765         * @internal
1766         */
1767        static int idOf(int ch) {
1768            if (ch < 0 || ch > MAX_VALUE) {
1769                return -1;
1770            }
1771
1772            return (PROPERTY_.getAdditional(ch, 0) & BLOCK_MASK_) >> BLOCK_SHIFT_;
1773        }
1774
1775        /**
1776         * Cover the JDK 1.5 API.  Return the Unicode block with the
1777         * given name.  <br/><b>Note</b>: Unlike JDK 1.5, this only matches
1778         * against the official UCD name and the Java block name
1779         * (ignoring case).
1780         * @param blockName the name of the block to match
1781         * @return the UnicodeBlock with that name
1782         * @throws IllegalArgumentException if the blockName could not be matched
1783         * @stable ICU 3.0
1784         */
1785        public static final UnicodeBlock forName(String   blockName) {
1786            Map   m = null;
1787            if (mref != null) {
1788                m = (Map  )mref.get();
1789            }
1790            if (m == null) {
1791                m = new HashMap  (BLOCKS_.length);
1792                for (int i = 0; i < BLOCKS_.length; ++i) {
1793                    UnicodeBlock b = BLOCKS_[i];
1794                    String   name = getPropertyValueName(UProperty.BLOCK, b.getID(), UProperty.NameChoice.LONG);
1795                    m.put(name.toUpperCase(), b);
1796            m.put(name.replace('_',' ').toUpperCase(), b);
1797                    m.put(b.toString().toUpperCase(), b);
1798                }
1799                mref = new SoftReference  (m);
1800            }
1801            UnicodeBlock b = (UnicodeBlock)m.get(blockName.toUpperCase());
1802            if (b == null) {
1803                throw new IllegalArgumentException  ();
1804            }
1805            return b;
1806        }
1807        private static SoftReference   mref;
1808
1809        /**
1810         * Returns the type ID of this Unicode block
1811         * @return integer type ID of this Unicode block
1812         * @stable ICU 2.4
1813         */
1814        public int getID()
1815        {
1816            return m_id_;
1817        }
1818        
1819        // private data members ---------------------------------------------
1820        
1821        /**
1822         * Array of UnicodeBlocks, for easy access in getInstance(int)
1823         */
1824        private final static UnicodeBlock BLOCKS_[] = {
1825            NO_BLOCK, BASIC_LATIN, 
1826            LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A, 
1827            LATIN_EXTENDED_B, IPA_EXTENSIONS, 
1828            SPACING_MODIFIER_LETTERS, COMBINING_DIACRITICAL_MARKS,
1829            GREEK, CYRILLIC,
1830            ARMENIAN, HEBREW,
1831            ARABIC, SYRIAC, 
1832            THAANA, DEVANAGARI, 
1833            BENGALI, GURMUKHI, 
1834            GUJARATI, ORIYA, 
1835            TAMIL, TELUGU, 
1836            KANNADA, MALAYALAM, 
1837            SINHALA, THAI, 
1838            LAO, TIBETAN, 
1839            MYANMAR, GEORGIAN, 
1840            HANGUL_JAMO, ETHIOPIC, 
1841            CHEROKEE, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1842            OGHAM, RUNIC, 
1843            KHMER, MONGOLIAN, 
1844            LATIN_EXTENDED_ADDITIONAL, GREEK_EXTENDED, 
1845            GENERAL_PUNCTUATION, SUPERSCRIPTS_AND_SUBSCRIPTS,
1846            CURRENCY_SYMBOLS, COMBINING_MARKS_FOR_SYMBOLS, 
1847            LETTERLIKE_SYMBOLS, NUMBER_FORMS, 
1848            ARROWS, MATHEMATICAL_OPERATORS, 
1849            MISCELLANEOUS_TECHNICAL, CONTROL_PICTURES,
1850            OPTICAL_CHARACTER_RECOGNITION, ENCLOSED_ALPHANUMERICS,
1851            BOX_DRAWING, BLOCK_ELEMENTS,
1852            GEOMETRIC_SHAPES, MISCELLANEOUS_SYMBOLS,
1853            DINGBATS, BRAILLE_PATTERNS,
1854            CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS,
1855            IDEOGRAPHIC_DESCRIPTION_CHARACTERS, CJK_SYMBOLS_AND_PUNCTUATION,
1856            HIRAGANA, KATAKANA, 
1857            BOPOMOFO, HANGUL_COMPATIBILITY_JAMO,
1858            KANBUN, BOPOMOFO_EXTENDED, 
1859            ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY,
1860            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, CJK_UNIFIED_IDEOGRAPHS,
1861            YI_SYLLABLES, YI_RADICALS, 
1862            HANGUL_SYLLABLES, HIGH_SURROGATES,
1863            HIGH_PRIVATE_USE_SURROGATES, LOW_SURROGATES,
1864            PRIVATE_USE_AREA, CJK_COMPATIBILITY_IDEOGRAPHS,
1865            ALPHABETIC_PRESENTATION_FORMS, ARABIC_PRESENTATION_FORMS_A,
1866            COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS,
1867            SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B,
1868            SPECIALS, HALFWIDTH_AND_FULLWIDTH_FORMS,
1869            OLD_ITALIC, GOTHIC, 
1870            DESERET, BYZANTINE_MUSICAL_SYMBOLS,
1871            MUSICAL_SYMBOLS, MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1872            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 
1873            CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 
1874            TAGS, CYRILLIC_SUPPLEMENT,
1875            TAGALOG, HANUNOO, 
1876            BUHID, TAGBANWA, 
1877            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, SUPPLEMENTAL_ARROWS_A,
1878            SUPPLEMENTAL_ARROWS_B, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1879            SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 
1880            KATAKANA_PHONETIC_EXTENSIONS,
1881            VARIATION_SELECTORS, SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1882            SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1883            LIMBU, TAI_LE, KHMER_SYMBOLS, PHONETIC_EXTENSIONS,
1884            MISCELLANEOUS_SYMBOLS_AND_ARROWS, YIJING_HEXAGRAM_SYMBOLS,
1885            LINEAR_B_SYLLABARY, LINEAR_B_IDEOGRAMS, AEGEAN_NUMBERS,
1886            UGARITIC, SHAVIAN, OSMANYA, CYPRIOT_SYLLABARY,
1887            TAI_XUAN_JING_SYMBOLS, VARIATION_SELECTORS_SUPPLEMENT,
1888
1889            /* New blocks in Unicode 4.1 */
1890            ANCIENT_GREEK_MUSICAL_NOTATION,
1891            ANCIENT_GREEK_NUMBERS,
1892            ARABIC_SUPPLEMENT,
1893            BUGINESE,
1894            CJK_STROKES,
1895            COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
1896            COPTIC,
1897            ETHIOPIC_EXTENDED,
1898            ETHIOPIC_SUPPLEMENT,
1899            GEORGIAN_SUPPLEMENT,
1900            GLAGOLITIC,
1901            KHAROSHTHI,
1902            MODIFIER_TONE_LETTERS,
1903            NEW_TAI_LUE,
1904            OLD_PERSIAN,
1905            PHONETIC_EXTENSIONS_SUPPLEMENT,
1906            SUPPLEMENTAL_PUNCTUATION,
1907            SYLOTI_NAGRI,
1908            TIFINAGH,
1909            VERTICAL_FORMS,
1910            NKO,
1911            BALINESE,
1912            LATIN_EXTENDED_C,
1913            LATIN_EXTENDED_D,
1914            PHAGS_PA,
1915            PHOENICIAN,
1916            CUNEIFORM,
1917            CUNEIFORM_NUMBERS_AND_PUNCTUATION,
1918            COUNTING_ROD_NUMERALS
1919        };
1920
1921        static {
1922            if (COUNT!=BLOCKS_.length) {
1923                throw new java.lang.IllegalStateException  ("UnicodeBlock fields are inconsistent!");
1924            }
1925        }
1926        /**
1927         * Identification code for this UnicodeBlock
1928         */
1929        private int m_id_;
1930        
1931        // private constructor ----------------------------------------------
1932        
1933        /**
1934         * UnicodeBlock constructor
1935         * @param name name of this UnicodeBlock
1936         * @param id unique id of this UnicodeBlock
1937         * @exception NullPointerException if name is <code>null</code>
1938         */
1939        private UnicodeBlock(String   name, int id)
1940        {
1941            super(name);
1942            m_id_ = id;
1943        }
1944    }
1945    
1946    /**
1947     * East Asian Width constants.
1948     * @see UProperty#EAST_ASIAN_WIDTH
1949     * @see UCharacter#getIntPropertyValue
1950     * @stable ICU 2.4
1951     */
1952    public static interface EastAsianWidth 
1953    {
1954        /**
1955         * @stable ICU 2.4
1956         */
1957        public static final int NEUTRAL = 0;
1958        /**
1959         * @stable ICU 2.4
1960         */
1961        public static final int AMBIGUOUS = 1;
1962        /**
1963         * @stable ICU 2.4
1964         */
1965        public static final int HALFWIDTH = 2;
1966        /**
1967         * @stable ICU 2.4
1968         */
1969        public static final int FULLWIDTH = 3;
1970        /**
1971         * @stable ICU 2.4
1972         */
1973        public static final int NARROW = 4;
1974        /**
1975         * @stable ICU 2.4
1976         */
1977        public static final int WIDE = 5;
1978        /**
1979         * @stable ICU 2.4
1980         */
1981        public static final int COUNT = 6;
1982    }
1983
1984    /**
1985     * Decomposition Type constants.
1986     * @see UProperty#DECOMPOSITION_TYPE
1987     * @stable ICU 2.4
1988     */
1989    public static interface DecompositionType 
1990    {
1991        /**
1992         * @stable ICU 2.4
1993         */
1994        public static final int NONE = 0;
1995        /**
1996         * @stable ICU 2.4
1997         */
1998        public static final int CANONICAL = 1;
1999        /**
2000         * @stable ICU 2.4
2001         */
2002        public static final int COMPAT = 2;
2003        /**
2004         * @stable ICU 2.4
2005         */
2006        public static final int CIRCLE = 3;
2007        /**
2008         * @stable ICU 2.4
2009         */
2010        public static final int FINAL = 4;
2011        /**
2012         * @stable ICU 2.4
2013         */
2014        public static final int FONT = 5;
2015        /**
2016         * @stable ICU 2.4
2017         */
2018        public static final int FRACTION = 6;
2019        /**
2020         * @stable ICU 2.4
2021         */
2022        public static final int INITIAL = 7;
2023        /**
2024         * @stable ICU 2.4
2025         */
2026        public static final int ISOLATED = 8;
2027        /**
2028         * @stable ICU 2.4
2029         */
2030        public static final int MEDIAL = 9;
2031        /**
2032         * @stable ICU 2.4
2033         */
2034        public static final int NARROW = 10;
2035        /**
2036         * @stable ICU 2.4
2037         */
2038        public static final int NOBREAK = 11;
2039        /**
2040         * @stable ICU 2.4
2041         */
2042        public static final int SMALL = 12;
2043        /**
2044         * @stable ICU 2.4
2045         */
2046        public static final int SQUARE = 13;
2047        /**
2048         * @stable ICU 2.4
2049         */
2050        public static final int SUB = 14;
2051        /**
2052         * @stable ICU 2.4
2053         */
2054        public static final int SUPER = 15;
2055        /**
2056         * @stable ICU 2.4
2057         */
2058        public static final int VERTICAL = 16;
2059        /**
2060         * @stable ICU 2.4
2061         */
2062        public static final int WIDE = 17;
2063        /**
2064         * @stable ICU 2.4
2065         */
2066        public static final int COUNT = 18;
2067    }
2068    
2069    /**
2070     * Joining Type constants.
2071     * @see UProperty#JOINING_TYPE
2072     * @stable ICU 2.4
2073     */
2074    public static interface JoiningType 
2075    {
2076        /**
2077         * @stable ICU 2.4
2078         */
2079        public static final int NON_JOINING = 0;
2080        /**
2081         * @stable ICU 2.4
2082         */
2083        public static final int JOIN_CAUSING = 1;
2084        /**
2085         * @stable ICU 2.4
2086         */
2087        public static final int DUAL_JOINING = 2;
2088        /**
2089         * @stable ICU 2.4
2090         */
2091        public static final int LEFT_JOINING = 3;
2092        /**
2093         * @stable ICU 2.4
2094         */
2095        public static final int RIGHT_JOINING = 4;
2096        /**
2097         * @stable ICU 2.4
2098         */
2099        public static final int TRANSPARENT = 5;
2100        /**
2101         * @stable ICU 2.4
2102         */
2103        public static final int COUNT = 6;
2104    }
2105    
2106    /**
2107     * Joining Group constants.
2108     * @see UProperty#JOINING_GROUP
2109     * @stable ICU 2.4
2110     */
2111    public static interface JoiningGroup 
2112    {
2113        /**
2114         * @stable ICU 2.4
2115         */
2116        public static final int NO_JOINING_GROUP = 0;
2117        /**
2118         * @stable ICU 2.4
2119         */
2120        public static final int AIN = 1;
2121        /**
2122         * @stable ICU 2.4
2123         */
2124        public static final int ALAPH = 2;
2125        /**
2126         * @stable ICU 2.4
2127         */
2128        public static final int ALEF = 3;
2129        /**
2130         * @stable ICU 2.4
2131         */
2132        public static final int BEH = 4;
2133        /**
2134         * @stable ICU 2.4
2135         */
2136        public static final int BETH = 5;
2137        /**
2138         * @stable ICU 2.4
2139         */
2140        public static final int DAL = 6;
2141        /**
2142         * @stable ICU 2.4
2143         */
2144        public static final int DALATH_RISH = 7;
2145        /**
2146         * @stable ICU 2.4
2147         */
2148        public static final int E = 8;
2149        /**
2150         * @stable ICU 2.4
2151         */
2152        public static final int FEH = 9;
2153        /**
2154         * @stable ICU 2.4
2155         */
2156        public static final int FINAL_SEMKATH = 10;
2157        /**
2158         * @stable ICU 2.4
2159         */
2160        public static final int GAF = 11;
2161        /**
2162         * @stable ICU 2.4
2163         */
2164        public static final int GAMAL = 12;
2165        /** 
2166         * @stable ICU 2.4
2167         */
2168        public static final int HAH = 13;
2169        /**
2170         * @stable ICU 2.4
2171         */
2172        public static final int HAMZA_ON_HEH_GOAL = 14;
2173        /**
2174         * @stable ICU 2.4
2175         */
2176        public static final int HE = 15;
2177        /**
2178         * @stable ICU 2.4
2179         */
2180        public static final int HEH = 16;
2181        /**
2182         * @stable ICU 2.4
2183         */
2184        public static final int HEH_GOAL = 17;
2185        /**
2186         * @stable ICU 2.4
2187         */
2188        public static final int HETH = 18;
2189        /**
2190         * @stable ICU 2.4
2191         */
2192        public static final int KAF = 19;
2193        /**
2194         * @stable ICU 2.4
2195         */
2196        public static final int KAPH = 20;
2197        /**
2198         * @stable ICU 2.4
2199         */
2200        public static final int KNOTTED_HEH = 21;
2201        /**
2202         * @stable ICU 2.4
2203         */
2204        public static final int LAM = 22;
2205        /**
2206         * @stable ICU 2.4
2207         */
2208        public static final int LAMADH = 23;
2209        /**
2210         * @stable ICU 2.4
2211         */
2212        public static final int MEEM = 24;
2213        /**
2214         * @stable ICU 2.4
2215         */
2216        public static final int MIM = 25;
2217        /**
2218         * @stable ICU 2.4
2219         */
2220        public static final int NOON = 26;
2221        /**
2222         * @stable ICU 2.4
2223         */
2224        public static final int NUN = 27;
2225        /**
2226         * @stable ICU 2.4
2227         */
2228        public static final int PE = 28;
2229        /**
2230         * @stable ICU 2.4
2231         */
2232        public static final int QAF = 29;
2233        /**
2234         * @stable ICU 2.4
2235         */
2236        public static final int QAPH = 30;
2237        /**
2238         * @stable ICU 2.4
2239         */
2240        public static final int REH = 31;
2241        /**
2242         * @stable ICU 2.4
2243         */
2244        public static final int REVERSED_PE = 32;
2245        /**
2246         * @stable ICU 2.4
2247         */
2248        public static final int SAD = 33;
2249        /**
2250         * @stable ICU 2.4
2251         */
2252        public static final int SADHE = 34;
2253        /**
2254         * @stable ICU 2.4
2255         */
2256        public static final int SEEN = 35;
2257        /**
2258         * @stable ICU 2.4
2259         */
2260        public static final int SEMKATH = 36;
2261        /**
2262         * @stable ICU 2.4
2263         */
2264        public static final int SHIN = 37;
2265        /**
2266         * @stable ICU 2.4
2267         */
2268        public static final int SWASH_KAF = 38;
2269        /**
2270         * @stable ICU 2.4
2271         */
2272        public static final int SYRIAC_WAW = 39;
2273        /**
2274         * @stable ICU 2.4
2275         */
2276        public static final int TAH = 40;
2277        /**
2278         * @stable ICU 2.4
2279         */
2280        public static final int TAW = 41;
2281        /**
2282         * @stable ICU 2.4
2283         */
2284        public static final int TEH_MARBUTA = 42;
2285        /**
2286         * @stable ICU 2.4
2287         */
2288        public static final int TETH = 43;
2289        /**
2290         * @stable ICU 2.4
2291         */
2292        public static final int WAW = 44;
2293        /**
2294         * @stable ICU 2.4
2295         */
2296        public static final int YEH = 45;
2297        /**
2298         * @stable ICU 2.4
2299         */
2300        public static final int YEH_BARREE = 46;
2301        /**
2302         * @stable ICU 2.4
2303         */
2304        public static final int YEH_WITH_TAIL = 47;
2305        /**
2306         * @stable ICU 2.4
2307         */
2308        public static final int YUDH = 48;
2309        /**
2310         * @stable ICU 2.4
2311         */
2312        public static final int YUDH_HE = 49;
2313        /**
2314         * @stable ICU 2.4
2315         */
2316        public static final int ZAIN = 50;
2317        /** 
2318         * @stable ICU 2.6 
2319         */
2320        public static final int FE = 51;        
2321        /** 
2322         * @stable ICU 2.6 
2323         */
2324        public static final int KHAPH = 52;
2325        /**
2326         * @stable ICU 2.6 
2327         */
2328        public static final int ZHAIN =53;   
2329        /**
2330         * @stable ICU 2.4
2331         */
2332        public static final int COUNT = 54;
2333    }
2334
2335    /**
2336     * Grapheme Cluster Break constants.
2337     * @see UProperty#GRAPHEME_CLUSTER_BREAK
2338     * @draft ICU 3.4
2339     * @provisional This API might change or be removed in a future release.
2340     */
2341    public static interface GraphemeClusterBreak {
2342        /**
2343         * @draft ICU 3.4
2344         * @provisional This API might change or be removed in a future release.
2345         */
2346        public static final int OTHER = 0;
2347        /**
2348         * @draft ICU 3.4
2349         * @provisional This API might change or be removed in a future release.
2350         */
2351        public static final int CONTROL = 1;
2352        /**
2353         * @draft ICU 3.4
2354         * @provisional This API might change or be removed in a future release.
2355         */
2356        public static final int CR = 2;
2357        /**
2358         * @draft ICU 3.4
2359         * @provisional This API might change or be removed in a future release.
2360         */
2361        public static final int EXTEND = 3;
2362        /**
2363         * @draft ICU 3.4
2364         * @provisional This API might change or be removed in a future release.
2365         */
2366        public static final int L = 4;
2367        /**
2368         * @draft ICU 3.4
2369         * @provisional This API might change or be removed in a future release.
2370         */
2371        public static final int LF = 5;
2372        /**
2373         * @draft ICU 3.4
2374         * @provisional This API might change or be removed in a future release.
2375         */
2376        public static final int LV = 6;
2377        /**
2378         * @draft ICU 3.4
2379         * @provisional This API might change or be removed in a future release.
2380         */
2381        public static final int LVT = 7;
2382        /**
2383         * @draft ICU 3.4
2384         * @provisional This API might change or be removed in a future release.
2385         */
2386        public static final int T = 8;
2387        /**
2388         * @draft ICU 3.4
2389         * @provisional This API might change or be removed in a future release.
2390         */
2391        public static final int V = 9;
2392        /**
2393         * @draft ICU 3.4
2394         * @provisional This API might change or be removed in a future release.
2395         */
2396        public static final int COUNT = 10;
2397    }
2398
2399    /**
2400     * Word Break constants.
2401     * @see UProperty#WORD_BREAK
2402     * @draft ICU 3.4
2403     * @provisional This API might change or be removed in a future release.
2404     */
2405    public static interface WordBreak {
2406        /**
2407         * @draft ICU 3.4
2408         * @provisional This API might change or be removed in a future release.
2409         */
2410        public static final int OTHER = 0;
2411        /**
2412         * @draft ICU 3.4
2413         * @provisional This API might change or be removed in a future release.
2414         */
2415        public static final int ALETTER = 1;
2416        /**
2417         * @draft ICU 3.4
2418         * @provisional This API might change or be removed in a future release.
2419         */
2420        public static final int FORMAT = 2;
2421        /**
2422         * @draft ICU 3.4
2423         * @provisional This API might change or be removed in a future release.
2424         */
2425        public static final int KATAKANA = 3;
2426        /**
2427         * @draft ICU 3.4
2428         * @provisional This API might change or be removed in a future release.
2429         */
2430        public static final int MIDLETTER = 4;
2431        /**
2432         * @draft ICU 3.4
2433         * @provisional This API might change or be removed in a future release.
2434         */
2435        public static final int MIDNUM = 5;
2436        /**
2437         * @draft ICU 3.4
2438         * @provisional This API might change or be removed in a future release.
2439         */
2440        public static final int NUMERIC = 6;
2441        /**
2442         * @draft ICU 3.4
2443         * @provisional This API might change or be removed in a future release.
2444         */
2445        public static final int EXTENDNUMLET = 7;
2446        /**
2447         * @draft ICU 3.4
2448         * @provisional This API might change or be removed in a future release.
2449         */
2450        public static final int COUNT = 8;
2451    }
2452
2453    /**
2454     * Sentence Break constants.
2455     * @see UProperty#SENTENCE_BREAK
2456     * @draft ICU 3.4
2457     * @provisional This API might change or be removed in a future release.
2458     */
2459    public static interface SentenceBreak {
2460        /**
2461         * @draft ICU 3.4
2462         * @provisional This API might change or be removed in a future release.
2463         */
2464        public static final int OTHER = 0;
2465        /**
2466         * @draft ICU 3.4
2467         * @provisional This API might change or be removed in a future release.
2468         */
2469        public static final int ATERM = 1;
2470        /**
2471         * @draft ICU 3.4
2472         * @provisional This API might change or be removed in a future release.
2473         */
2474        public static final int CLOSE = 2;
2475        /**
2476         * @draft ICU 3.4
2477         * @provisional This API might change or be removed in a future release.
2478         */
2479        public static final int FORMAT = 3;
2480        /**
2481         * @draft ICU 3.4
2482         * @provisional This API might change or be removed in a future release.
2483         */
2484        public static final int LOWER = 4;
2485        /**
2486         * @draft ICU 3.4
2487         * @provisional This API might change or be removed in a future release.
2488         */
2489        public static final int NUMERIC = 5;
2490        /**
2491         * @draft ICU 3.4
2492         * @provisional This API might change or be removed in a future release.
2493         */
2494        public static final int OLETTER = 6;
2495        /**
2496         * @draft ICU 3.4
2497         * @provisional This API might change or be removed in a future release.
2498         */
2499        public static final int SEP = 7;
2500        /**
2501         * @draft ICU 3.4
2502         * @provisional This API might change or be removed in a future release.
2503         */
2504        public static final int SP = 8;
2505        /**
2506         * @draft ICU 3.4
2507         * @provisional This API might change or be removed in a future release.
2508         */
2509        public static final int STERM = 9;
2510        /**
2511         * @draft ICU 3.4
2512         * @provisional This API might change or be removed in a future release.
2513         */
2514        public static final int UPPER = 10;
2515        /**
2516         * @draft ICU 3.4
2517         * @provisional This API might change or be removed in a future release.
2518         */
2519        public static final int COUNT = 11;
2520    }
2521
2522    /**
2523     * Line Break constants.
2524     * @see UProperty#LINE_BREAK
2525     * @stable ICU 2.4
2526     */
2527    public static interface LineBreak 
2528    {
2529        /**
2530         * @stable ICU 2.4
2531         */
2532        public static final int UNKNOWN = 0;
2533        /**
2534         * @stable ICU 2.4
2535         */
2536        public static final int AMBIGUOUS = 1;
2537        /**
2538         * @stable ICU 2.4
2539         */
2540        public static final int ALPHABETIC = 2;
2541        /**
2542         * @stable ICU 2.4
2543         */
2544        public static final int BREAK_BOTH = 3;
2545        /**
2546         * @stable ICU 2.4
2547         */
2548        public static final int BREAK_AFTER = 4;
2549        /**
2550         * @stable ICU 2.4
2551         */
2552        public static final int BREAK_BEFORE = 5;
2553        /**
2554         * @stable ICU 2.4
2555         */
2556        public static final int MANDATORY_BREAK = 6;
2557        /**
2558         * @stable ICU 2.4
2559         */
2560        public static final int CONTINGENT_BREAK = 7;
2561        /**
2562         * @stable ICU 2.4
2563         */
2564        public static final int CLOSE_PUNCTUATION = 8;
2565        /**
2566         * @stable ICU 2.4
2567         */
2568        public static final int COMBINING_MARK = 9;
2569        /**
2570         * @stable ICU 2.4
2571         */
2572        public static final int CARRIAGE_RETURN = 10;
2573        /**
2574         * @stable ICU 2.4
2575         */
2576        public static final int EXCLAMATION = 11;
2577        /**
2578         * @stable ICU 2.4
2579         */
2580        public static final int GLUE = 12;
2581        /**
2582         * @stable ICU 2.4
2583         */
2584        public static final int HYPHEN = 13;
2585        /**
2586         * @stable ICU 2.4
2587         */
2588        public static final int IDEOGRAPHIC = 14;
2589        /**
2590         * @see #INSEPARABLE
2591         * @stable ICU 2.4
2592         */
2593        public static final int INSEPERABLE = 15;
2594        /**
2595         * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
2596         * @stable ICU 3.0
2597         */
2598        public static final int INSEPARABLE = 15;
2599        /**
2600         * @stable ICU 2.4
2601         */
2602        public static final int INFIX_NUMERIC = 16;
2603        /**
2604         * @stable ICU 2.4
2605         */
2606        public static final int LINE_FEED = 17;
2607        /**
2608         * @stable ICU 2.4
2609         */
2610        public static final int NONSTARTER = 18;
2611        /**
2612         * @stable ICU 2.4
2613         */
2614        public static final int NUMERIC = 19;
2615        /**
2616         * @stable ICU 2.4
2617         */
2618        public static final int OPEN_PUNCTUATION = 20;
2619        /**
2620         * @stable ICU 2.4
2621         */
2622        public static final int POSTFIX_NUMERIC = 21;
2623        /**
2624         * @stable ICU 2.4
2625         */
2626        public static final int PREFIX_NUMERIC = 22;
2627        /**
2628         * @stable ICU 2.4
2629         */
2630        public static final int QUOTATION = 23;
2631        /**
2632         * @stable ICU 2.4
2633         */
2634        public static final int COMPLEX_CONTEXT = 24;
2635        /**
2636         * @stable ICU 2.4
2637         */
2638        public static final int SURROGATE = 25;
2639        /**
2640         * @stable ICU 2.4
2641         */
2642        public static final int SPACE = 26;
2643        /**
2644         * @stable ICU 2.4
2645         */
2646        public static final int BREAK_SYMBOLS = 27;
2647        /**
2648         * @stable ICU 2.4
2649         */
2650        public static final int ZWSPACE = 28;
2651        
2652        /**
2653         * @stable ICU 2.6
2654         */
2655        public static final int NEXT_LINE = 29;       /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
2656        
2657        /**
2658         * @stable ICU 2.6
2659         */
2660        public static final int  WORD_JOINER = 30;      /*[WJ]*/
2661        
2662        /* from here on: new in Unicode 4.1/ICU 3.4 */
2663
2664        /**
2665         * @draft ICU 3.4
2666         * @provisional This API might change or be removed in a future release.
2667         */
2668        public static final int  H2 = 31;
2669        /**
2670         * @draft ICU 3.4
2671         * @provisional This API might change or be removed in a future release.
2672         */
2673        public static final int  H3 = 32;
2674        /**
2675         * @draft ICU 3.4
2676         * @provisional This API might change or be removed in a future release.
2677         */
2678        public static final int  JL = 33;
2679        /**
2680         * @draft ICU 3.4
2681         * @provisional This API might change or be removed in a future release.
2682         */
2683        public static final int  JT = 34;
2684        /**
2685         * @draft ICU 3.4
2686         * @provisional This API might change or be removed in a future release.
2687         */
2688        public static final int  JV = 35;
2689
2690        /**
2691         * @stable ICU 2.4
2692         */
2693        public static final int COUNT = 36;
2694    }
2695    
2696    /**
2697     * Numeric Type constants.
2698     * @see UProperty#NUMERIC_TYPE
2699     * @stable ICU 2.4
2700     */
2701    public static interface NumericType 
2702    {
2703        /**
2704         * @stable ICU 2.4
2705         */
2706        public static final int NONE = 0;
2707        /**
2708         * @stable ICU 2.4
2709         */
2710        public static final int DECIMAL = 1;
2711        /**
2712         * @stable ICU 2.4
2713         */
2714        public static final int DIGIT = 2;
2715        /**
2716         * @stable ICU 2.4
2717         */
2718        public static final int NUMERIC = 3;
2719        /**
2720         * @stable ICU 2.4
2721         */
2722        public static final int COUNT = 4;
2723    }
2724    
2725    /**
2726     * Hangul Syllable Type constants.
2727     *
2728     * @see UProperty#HANGUL_SYLLABLE_TYPE
2729     * @stable ICU 2.6
2730     */
2731    public static interface HangulSyllableType 
2732    {
2733        /**
2734         * @stable ICU 2.6
2735         */
2736        public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
2737        /**
2738         * @stable ICU 2.6
2739         */
2740        public static final int LEADING_JAMO        = 1;   /*[L]*/
2741        /**
2742         * @stable ICU 2.6
2743         */
2744        public static final int VOWEL_JAMO          = 2;   /*[V]*/
2745        /**
2746         * @stable ICU 2.6
2747         */
2748        public static final int TRAILING_JAMO       = 3;   /*[T]*/
2749        /**
2750         * @stable ICU 2.6
2751         */
2752        public static final int LV_SYLLABLE         = 4;   /*[LV]*/
2753        /**
2754         * @stable ICU 2.6
2755         */
2756        public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
2757        /**
2758         * @stable ICU 2.6
2759         */
2760        public static final int COUNT               = 6;
2761    }
2762
2763    // public data members -----------------------------------------------
2764  
2765    /** 
2766     * The lowest Unicode code point value.
2767     * @stable ICU 2.1
2768     */
2769    public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE;
2770
2771    /**
2772     * The highest Unicode code point value (scalar value) according to the 
2773     * Unicode Standard. 
2774     * This is a 21-bit value (21 bits, rounded up).<br>
2775     * Up-to-date Unicode implementation of java.lang.Character.MIN_VALUE
2776     * @stable ICU 2.1
2777     */
2778    public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE; 
2779      
2780    /**
2781     * The minimum value for Supplementary code points
2782     * @stable ICU 2.1
2783     */
2784    public static final int SUPPLEMENTARY_MIN_VALUE = 
2785        UTF16.SUPPLEMENTARY_MIN_VALUE;
2786      
2787    /**
2788     * Unicode value used when translating into Unicode encoding form and there 
2789     * is no existing character.
2790     * @stable ICU 2.1
2791     */
2792    public static final int REPLACEMENT_CHAR = '\uFFFD';
2793        
2794    /**
2795     * Special value that is returned by getUnicodeNumericValue(int) when no 
2796     * numeric value is defined for a code point.
2797     * @stable ICU 2.4
2798     * @see #getUnicodeNumericValue
2799     */
2800    public static final double NO_NUMERIC_VALUE = -123456789;
2801
2802    /**
2803     * Compatibility constant for Java Character's MIN_RADIX.
2804     * @draft ICU 3.4
2805     * @provisional This API might change or be removed in a future release.
2806     */
2807    public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
2808
2809    /**
2810     * Compatibility constant for Java Character's MAX_RADIX.
2811     * @draft ICU 3.4
2812     * @provisional This API might change or be removed in a future release.
2813     */
2814    public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
2815
2816    // public methods ----------------------------------------------------
2817      
2818    /**
2819     * Retrieves the numeric value of a decimal digit code point.
2820     * <br>This method observes the semantics of
2821     * <code>java.lang.Character.digit()</code>.  Note that this
2822     * will return positive values for code points for which isDigit
2823     * returns false, just like java.lang.Character.
2824     * <br><em>Semantic Change:</em> In release 1.3.1 and
2825     * prior, this did not treat the European letters as having a
2826     * digit value, and also treated numeric letters and other numbers as 
2827     * digits.  
2828     * This has been changed to conform to the java semantics.
2829     * <br>A code point is a valid digit if and only if:
2830     * <ul>
2831     *   <li>ch is a decimal digit or one of the european letters, and
2832     *   <li>the value of ch is less than the specified radix.
2833     * </ul>
2834     * @param ch the code point to query
2835     * @param radix the radix
2836     * @return the numeric value represented by the code point in the
2837     * specified radix, or -1 if the code point is not a decimal digit
2838     * or if its value is too large for the radix
2839     * @stable ICU 2.1
2840     */
2841    public static int digit(int ch, int radix)
2842    {
2843        // when ch is out of bounds getProperty == 0
2844        int props = getProperty(ch);
2845        int value;        
2846        if (getNumericType(props) == NumericType.DECIMAL) {
2847            value = UCharacterProperty.getUnsignedValue(props);
2848        } else {
2849            value = getEuropeanDigit(ch);
2850        }
2851        return (0 <= value && value < radix) ? value : -1;
2852    }
2853    
2854    /**
2855     * Retrieves the numeric value of a decimal digit code point.
2856     * <br>This is a convenience overload of <code>digit(int, int)</code> 
2857     * that provides a decimal radix.
2858     * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
2859     * treated numeric letters and other numbers as digits.  This has
2860     * been changed to conform to the java semantics.
2861     * @param ch the code point to query
2862     * @return the numeric value represented by the code point,
2863     * or -1 if the code point is not a decimal digit or if its
2864     * value is too large for a decimal radix 
2865     * @stable ICU 2.1
2866     */
2867    public static int digit(int ch)
2868    {
2869        int props = getProperty(ch);
2870        if (getNumericType(props) == NumericType.DECIMAL) {
2871            return UCharacterProperty.getUnsignedValue(props);
2872        } else {
2873            return -1;
2874        }
2875    }
2876
2877    /** 
2878     * Returns the numeric value of the code point as a nonnegative 
2879     * integer.
2880     * <br>If the code point does not have a numeric value, then -1 is returned. 
2881     * <br>
2882     * If the code point has a numeric value that cannot be represented as a 
2883     * nonnegative integer (for example, a fractional value), then -2 is 
2884     * returned.
2885     * @param ch the code point to query
2886     * @return the numeric value of the code point, or -1 if it has no numeric 
2887     * value, or -2 if it has a numeric value that cannot be represented as a 
2888     * nonnegative integer
2889     * @stable ICU 2.1
2890     */
2891    public static int getNumericValue(int ch)
2892    {
2893        // slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit()
2894        int props = PROPERTY_.getProperty(ch);
2895        int numericType = getNumericType(props);
2896
2897        if(numericType==0) {
2898            return getEuropeanDigit(ch);
2899        }
2900        if(numericType==UCharacterProperty.NT_FRACTION || numericType>=UCharacterProperty.NT_COUNT) {
2901            return -2;
2902        }
2903
2904        int numericValue = UCharacterProperty.getUnsignedValue(props);
2905
2906        if(numericType<NumericType.COUNT) {
2907            /* normal type, the value is stored directly */
2908            return numericValue;
2909        } else /* numericType==NT_LARGE */ {
2910            /* large value with exponent */
2911            long numValue;
2912            int mant, exp;
2913
2914            mant=numericValue>>LARGE_MANT_SHIFT;
2915            exp=numericValue&LARGE_EXP_MASK;
2916            if(mant==0) {
2917                mant=1;
2918                exp+=LARGE_EXP_OFFSET_EXTRA;
2919            } else if(mant>9) {
2920                return -2; /* reserved mantissa value */
2921            } else {
2922                exp+=LARGE_EXP_OFFSET;
2923            }
2924            if(exp>9) {
2925                return -2;
2926            }
2927
2928            numValue=mant;
2929
2930            /* multiply by 10^exp without math.h */
2931            while(exp>=4) {
2932                numValue*=10000.;
2933                exp-=4;
2934            }
2935            switch(exp) {
2936            case 3:
2937                numValue*=1000.;
2938                break;
2939            case 2:
2940                numValue*=100.;
2941                break;
2942            case 1:
2943                numValue*=10.;
2944                break;
2945            case 0:
2946            default:
2947                break;
2948            }
2949            if(numValue<=Integer.MAX_VALUE) {
2950                return (int)numValue;
2951            } else {
2952                return -2;
2953            }
2954        }
2955    }
2956    
2957    /**
2958     * <p>Get the numeric value for a Unicode code point as defined in the 
2959     * Unicode Character Database.</p>
2960     * <p>A "double" return type is necessary because some numeric values are 
2961     * fractions, negative, or too large for int.</p>
2962     * <p>For characters without any numeric values in the Unicode Character 
2963     * Database, this function will return NO_NUMERIC_VALUE.</p>
2964     * <p><em>API Change:</em> In release 2.2 and prior, this API has a
2965     * return type int and returns -1 when the argument ch does not have a 
2966     * corresponding numeric value. This has been changed to synch with ICU4C
2967     * </p>
2968     * This corresponds to the ICU4C function u_getNumericValue.
2969     * @param ch Code point to get the numeric value for.
2970     * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
2971     * @stable ICU 2.4
2972     */
2973    public static double getUnicodeNumericValue(int ch)
2974    {
2975        // equivalent to c version double u_getNumericValue(UChar32 c)
2976        int props = PROPERTY_.getProperty(ch);
2977        int numericType = getNumericType(props);
2978
2979        if(numericType==0 || numericType>=UCharacterProperty.NT_COUNT) {
2980            return NO_NUMERIC_VALUE;
2981        }
2982
2983        int numericValue = UCharacterProperty.getUnsignedValue(props);
2984
2985        if(numericType<NumericType.COUNT) {
2986            /* normal type, the value is stored directly */
2987            return numericValue;
2988        } else if(numericType==UCharacterProperty.NT_FRACTION) {
2989            /* fraction value */
2990            int numerator, denominator;
2991
2992            numerator=numericValue>>FRACTION_NUM_SHIFT;
2993            denominator=(numericValue&FRACTION_DEN_MASK)+FRACTION_DEN_OFFSET;
2994
2995            if(numerator==0) {
2996                numerator=-1;
2997            }
2998            return (double)numerator/(double)denominator;
2999        } else /* numericType==NT_LARGE */ {
3000            /* large value with exponent */
3001            double numValue;
3002            int mant, exp;
3003
3004            mant=numericValue>>LARGE_MANT_SHIFT;
3005            exp=numericValue&LARGE_EXP_MASK;
3006            if(mant==0) {
3007                mant=1;
3008                exp+=LARGE_EXP_OFFSET_EXTRA;
3009            } else if(mant>9) {
3010                return NO_NUMERIC_VALUE; /* reserved mantissa value */
3011            } else {
3012                exp+=LARGE_EXP_OFFSET;
3013            }
3014
3015            numValue=mant;
3016
3017            /* multiply by 10^exp without math.h */
3018            while(exp>=4) {
3019                numValue*=10000.;
3020                exp-=4;
3021            }
3022            switch(exp) {
3023            case 3:
3024                numValue*=1000.;
3025                break;
3026            case 2:
3027                numValue*=100.;
3028                break;
3029            case 1:
3030                numValue*=10.;
3031                break;
3032            case 0:
3033            default:
3034                break;
3035            }
3036
3037            return numValue;
3038        }
3039    }
3040  
3041    /**
3042     * Compatibility override of Java deprecated method.  This
3043     * method will always remain deprecated.  Delegates to
3044     * java.lang.Character.isSpace.
3045     * @param ch the code point
3046     * @return true if the code point is a space character as
3047     * defined by java.lang.Character.isSpace.
3048     * @deprecated ICU 3.4 (Java)
3049     */
3050    public static boolean isSpace(int ch) {
3051        return ch <= 0x20 &&
3052            (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3053    }
3054
3055    /**
3056     * Returns a value indicating a code point's Unicode category.
3057     * Up-to-date Unicode implementation of java.lang.Character.getType() 
3058     * except for the above mentioned code points that had their category 
3059     * changed.<br>
3060     * Return results are constants from the interface 
3061     * <a HREF=UCharacterCategory.html>UCharacterCategory</a><br>
3062     * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3063     * those returned by java.lang.Character.getType.  UCharacterCategory values
3064     * match the ones used in ICU4C, while java.lang.Character type
3065     * values, though similar, skip the value 17.</p>
3066     * @param ch code point whose type is to be determined
3067     * @return category which is a value of UCharacterCategory
3068     * @stable ICU 2.1
3069     */
3070    public static int getType(int ch)
3071    {
3072        return getProperty(ch) & UCharacterProperty.TYPE_MASK;
3073    }
3074       
3075    /**
3076     * Determines if a code point has a defined meaning in the up-to-date 
3077     * Unicode standard.
3078     * E.g. supplementary code points though allocated space are not defined in 
3079     * Unicode yet.<br>
3080     * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3081     * @param ch code point to be determined if it is defined in the most 
3082     *        current version of Unicode
3083     * @return true if this code point is defined in unicode
3084     * @stable ICU 2.1
3085     */
3086    public static boolean isDefined(int ch)
3087    {
3088        return getType(ch) != 0;
3089    }
3090                                    
3091    /**
3092     * Determines if a code point is a Java digit.
3093     * <br>This method observes the semantics of
3094     * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 
3095     * digits only.
3096     * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 
3097     * numeric letters and other numbers as digits. 
3098     * This has been changed to conform to the java semantics.
3099     * @param ch code point to query
3100     * @return true if this code point is a digit 
3101     * @stable ICU 2.1
3102     */
3103    public static boolean isDigit(int ch)
3104    {
3105        return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3106    }
3107
3108    /**
3109     * Determines if the specified code point is an ISO control character.
3110     * A code point is considered to be an ISO control character if it is in 
3111     * the range &#92u0000 through &#92u001F or in the range &#92u007F through 
3112     * &#92u009F.<br>
3113     * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3114     * @param ch code point to determine if it is an ISO control character
3115     * @return true if code point is a ISO control character
3116     * @stable ICU 2.1
3117     */
3118    public static boolean isISOControl(int ch)
3119    {
3120        return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 
3121            ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3122    }
3123                                    
3124    /**
3125     * Determines if the specified code point is a letter.
3126     * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3127     * @param ch code point to determine if it is a letter
3128     * @return true if code point is a letter
3129     * @stable ICU 2.1
3130     */
3131    public static boolean isLetter(int ch)
3132    {
3133        // if props == 0, it will just fall through and return false
3134        return ((1 << getType(ch)) 
3135        & ((1 << UCharacterCategory.UPPERCASE_LETTER) 
3136           | (1 << UCharacterCategory.LOWERCASE_LETTER)
3137           | (1 << UCharacterCategory.TITLECASE_LETTER)
3138           | (1 << UCharacterCategory.MODIFIER_LETTER)
3139           | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3140    }
3141                
3142    /**
3143     * Determines if the specified code point is a letter or digit.
3144     * Note this method, unlike java.lang.Character does not regard the ascii 
3145     * characters 'A' - 'Z' and 'a' - 'z' as digits.
3146     * @param ch code point to determine if it is a letter or a digit
3147     * @return true if code point is a letter or a digit
3148     * @stable ICU 2.1
3149     */
3150    public static boolean isLetterOrDigit(int ch)
3151    {
3152        return ((1 << getType(ch)) 
3153        & ((1 << UCharacterCategory.UPPERCASE_LETTER) 
3154           | (1 << UCharacterCategory.LOWERCASE_LETTER)
3155           | (1 << UCharacterCategory.TITLECASE_LETTER)
3156           | (1 << UCharacterCategory.MODIFIER_LETTER)
3157           | (1 << UCharacterCategory.OTHER_LETTER)
3158           | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3159    }
3160     
3161    /**
3162     * Compatibility override of Java deprecated method.  This
3163     * method will always remain deprecated.  Delegates to
3164     * java.lang.Character.isJavaIdentifierStart.
3165     * @param cp the code point
3166     * @return true if the code point can start a java identifier.
3167     * @deprecated ICU 3.4 (Java)
3168     */
3169    public static boolean isJavaLetter(int cp) {
3170    return isJavaIdentifierStart(cp);
3171    }
3172
3173    /**
3174     * Compatibility override of Java deprecated method.  This
3175     * method will always remain deprecated.  Delegates to
3176     * java.lang.Character.isJavaIdentifierPart.
3177     * @param cp the code point
3178     * @return true if the code point can continue a java identifier.
3179     * @deprecated ICU 3.4 (Java)
3180     */
3181    public static boolean isJavaLetterOrDigit(int cp) {
3182    return isJavaIdentifierPart(cp);
3183    }
3184
3185    /**
3186     * Compatibility override of Java method, delegates to
3187     * java.lang.Character.isJavaIdentifierStart.
3188     * @param cp the code point
3189     * @return true if the code point can start a java identifier.
3190     * @draft ICU 3.4
3191     * @provisional This API might change or be removed in a future release.
3192     */
3193    public static boolean isJavaIdentifierStart(int cp) {
3194    // note, downcast to char for jdk 1.4 compatibility
3195    return java.lang.Character.isJavaIdentifierStart((char)cp);
3196    }
3197
3198    /**
3199     * Compatibility override of Java method, delegates to
3200     * java.lang.Character.isJavaIdentifierPart.
3201     * @param cp the code point
3202     * @return true if the code point can continue a java identifier.
3203     * @draft ICU 3.4
3204     * @provisional This API might change or be removed in a future release.
3205     */
3206    public static boolean isJavaIdentifierPart(int cp) {
3207    // note, downcast to char for jdk 1.4 compatibility
3208    return java.lang.Character.isJavaIdentifierPart((char)cp);
3209    }
3210
3211    /**
3212     * Determines if the specified code point is a lowercase character.
3213     * UnicodeData only contains case mappings for code points where they are 
3214     * one-to-one mappings; it also omits information about context-sensitive 
3215     * case mappings.<br> For more information about Unicode case mapping 
3216     * please refer to the 
3217     * <a HREF=http://www.unicode.org/unicode/reports/tr21/>Technical report 
3218     * #21</a>.<br>
3219     * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3220     * @param ch code point to determine if it is in lowercase
3221     * @return true if code point is a lowercase character
3222     * @stable ICU 2.1
3223     */
3224    public static boolean isLowerCase(int ch)
3225    {
3226        // if props == 0, it will just fall through and return false
3227        return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3228    }
3229       
3230    /**
3231     * Determines if the specified code point is a white space character.
3232     * A code point is considered to be an whitespace character if and only
3233     * if it satisfies one of the following criteria:
3234     * <ul>
3235     * <li> It is a Unicode space separator (category "Zs"), but is not
3236     *      a no-break space (&#92u00A0 or &#92u202F or &#92uFEFF).
3237     * <li> It is a Unicode line separator (category "Zl").
3238     * <li> It is a Unicode paragraph separator (category "Zp").
3239     * <li> It is &#92u0009, HORIZONTAL TABULATION. 
3240     * <li> It is &#92u000A, LINE FEED. 
3241     * <li> It is &#92u000B, VERTICAL TABULATION. 
3242     * <li> It is &#92u000C, FORM FEED. 
3243     * <li> It is &#92u000D, CARRIAGE RETURN. 
3244     * <li> It is &#92u001C, FILE SEPARATOR. 
3245     * <li> It is &#92u001D, GROUP SEPARATOR. 
3246     * <li> It is &#92u001E, RECORD SEPARATOR. 
3247     * <li> It is &#92u001F, UNIT SEPARATOR.  
3248     * </ul>
3249     *
3250     * This API tries to synch to the semantics of the Java API,
3251     * java.lang.Character.isWhitespace(). 
3252     * @param ch code point to determine if it is a white space
3253     * @return true if the specified code point is a white space character
3254     * @stable ICU 2.1
3255     */
3256    public static boolean isWhitespace(int ch)
3257    {
3258        // exclude no-break spaces
3259        // if props == 0, it will just fall through and return false
3260        return ((1 << getType(ch)) & 
3261                ((1 << UCharacterCategory.SPACE_SEPARATOR)
3262                 | (1 << UCharacterCategory.LINE_SEPARATOR)
3263                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 
3264        && (ch != NO_BREAK_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 
3265        && (ch != ZERO_WIDTH_NO_BREAK_SPACE_)
3266        // TAB VT LF FF CR FS GS RS US NL are all control characters
3267        // that are white spaces.
3268        || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3269    }
3270       
3271    /**
3272     * Determines if the specified code point is a Unicode specified space 
3273     * character, i.e. if code point is in the category Zs, Zl and Zp.
3274     * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3275     * @param ch code point to determine if it is a space
3276     * @return true if the specified code point is a space character
3277     * @stable ICU 2.1
3278     */
3279    public static boolean isSpaceChar(int ch)
3280    {
3281        // if props == 0, it will just fall through and return false
3282        return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 
3283                      | (1 << UCharacterCategory.LINE_SEPARATOR)
3284                      | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
3285        != 0;
3286    }
3287                                    
3288    /**
3289     * Determines if the specified code point is a titlecase character.
3290     * UnicodeData only contains case mappings for code points where they are 
3291     * one-to-one mappings; it also omits information about context-sensitive 
3292     * case mappings.<br>
3293     * For more information about Unicode case mapping please refer to the 
3294     * <a HREF=http://www.unicode.org/unicode/reports/tr21/>
3295     * Technical report #21</a>.<br>
3296     * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3297     * @param ch code point to determine if it is in title case
3298     * @return true if the specified code point is a titlecase character
3299     * @stable ICU 2.1
3300     */
3301    public static boolean isTitleCase(int ch)
3302    {
3303        // if props == 0, it will just fall through and return false
3304        return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3305    }
3306       
3307    /**
3308     * Determines if the specified code point may be any part of a Unicode 
3309     * identifier other than the starting character.
3310     * A code point may be part of a Unicode identifier if and only if it is 
3311     * one of the following: 
3312     * <ul>
3313     * <li> Lu Uppercase letter
3314     * <li> Ll Lowercase letter
3315     * <li> Lt Titlecase letter
3316     * <li> Lm Modifier letter
3317     * <li> Lo Other letter
3318     * <li> Nl Letter number
3319     * <li> Pc Connecting punctuation character 
3320     * <li> Nd decimal number
3321     * <li> Mc Spacing combining mark 
3322     * <li> Mn Non-spacing mark 
3323     * <li> Cf formatting code
3324     * </ul>
3325     * Up-to-date Unicode implementation of 
3326     * java.lang.Character.isUnicodeIdentifierPart().<br>
3327     * See <a HREF=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3328     * @param ch code point to determine if is can be part of a Unicode 
3329     *        identifier
3330     * @return true if code point is any character belonging a unicode 
3331     *         identifier suffix after the first character
3332     * @stable ICU 2.1
3333     */
3334    public static boolean isUnicodeIdentifierPart(int ch)
3335    {
3336        // if props == 0, it will just fall through and return false
3337        // cat == format
3338        return ((1 << getType(ch)) 
3339        & ((1 << UCharacterCategory.UPPERCASE_LETTER) 
3340           | (1 << UCharacterCategory.LOWERCASE_LETTER)
3341           | (1 << UCharacterCategory.TITLECASE_LETTER)
3342           | (1 << UCharacterCategory.MODIFIER_LETTER)
3343           | (1 << UCharacterCategory.OTHER_LETTER)
3344           | (1 << UCharacterCategory.LETTER_NUMBER) 
3345           | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3346           | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3347           | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
3348           | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3349        || isIdentifierIgnorable(ch);
3350    }
3351                       
3352    /**
3353     * Determines if the specified code point is permissible as the first 
3354     * character in a Unicode identifier.
3355     * A code point may start a Unicode identifier if it is of type either 
3356     * <ul> 
3357     * <li> Lu Uppercase letter
3358     * <li> Ll Lowercase letter
3359     * <li> Lt Titlecase letter
3360     * <li> Lm Modifier letter
3361     * <li> Lo Other letter
3362     * <li> Nl Letter number
3363     * </ul>
3364     * Up-to-date Unicode implementation of 
3365     * java.lang.Character.isUnicodeIdentifierStart().<br>
3366     * See <a HREF=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3367     * @param ch code point to determine if it can start a Unicode identifier
3368     * @return true if code point is the first character belonging a unicode 
3369     *              identifier
3370     * @stable ICU 2.1
3371     */
3372    public static boolean isUnicodeIdentifierStart(int ch)
3373    {
3374        /*int cat = getType(ch);*/
3375        // if props == 0, it will just fall through and return false
3376        return ((1 << getType(ch)) 
3377        & ((1 << UCharacterCategory.UPPERCASE_LETTER) 
3378           | (1 << UCharacterCategory.LOWERCASE_LETTER)
3379           | (1 << UCharacterCategory.TITLECASE_LETTER)
3380           | (1 << UCharacterCategory.MODIFIER_LETTER)
3381           | (1 << UCharacterCategory.OTHER_LETTER)
3382           | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3383    }
3384
3385    /**
3386     * Determines if the specified code point should be regarded as an 
3387     * ignorable character in a Unicode identifier.
3388     * A character is ignorable in the Unicode standard if it is of the type 
3389     * Cf, Formatting code.<br>
3390     * Up-to-date Unicode implementation of 
3391     * java.lang.Character.isIdentifierIgnorable().<br>
3392     * See <a HREF=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3393     * @param ch code point to be determined if it can be ignored in a Unicode 
3394     *        identifier.
3395     * @return true if the code point is ignorable
3396     * @stable ICU 2.1
3397     */
3398    public static boolean isIdentifierIgnorable(int ch)
3399    {
3400        // see java.lang.Character.isIdentifierIgnorable() on range of 
3401        // ignorable characters.
3402        if (ch <= 0x9f) {
3403        return isISOControl(ch) 
3404        && !((ch >= 0x9 && ch <= 0xd) 
3405             || (ch >= 0x1c && ch <= 0x1f));
3406        } 
3407        return getType(ch) == UCharacterCategory.FORMAT;
3408    }
3409                      
3410    /**
3411     * Determines if the specified code point is an uppercase character.
3412     * UnicodeData only contains case mappings for code point where they are 
3413     * one-to-one mappings; it also omits information about context-sensitive 
3414     * case mappings.<br> 
3415     * For language specific case conversion behavior, use 
3416     * toUpperCase(locale, str). <br>
3417     * For example, the case conversion for dot-less i and dotted I in Turkish,
3418     * or for final sigma in Greek.
3419     * For more information about Unicode case mapping please refer to the 
3420     * <a HREF=http://www.unicode.org/unicode/reports/tr21/>
3421     * Technical report #21</a>.<br>
3422     * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
3423     * @param ch code point to determine if it is in uppercase
3424     * @return true if the code point is an uppercase character
3425     * @stable ICU 2.1
3426     */
3427    public static boolean isUpperCase(int ch)
3428    {
3429        // if props == 0, it will just fall through and return false
3430        return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
3431    }
3432                       
3433    /**
3434     * The given code point is mapped to its lowercase equivalent; if the code 
3435     * point has no lowercase equivalent, the code point itself is returned.
3436     * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
3437     *
3438     * <p>This function only returns the simple, single-code point case mapping.
3439     * Full case mappings should be used whenever possible because they produce
3440     * better results by working on whole strings.
3441     * They take into account the string context and the language and can map
3442     * to a result string with a different length as appropriate.
3443     * Full case mappings are applied by the case mapping functions
3444     * that take String parameters rather than code points (int).
3445     * See also the User Guide chapter on C/POSIX migration:
3446     * http://icu.sourceforge.net/userguide/posix.html#case_mappings
3447     *
3448     * @param ch code point whose lowercase equivalent is to be retrieved
3449     * @return the lowercase equivalent code point
3450     * @stable ICU 2.1
3451     */
3452    public static int toLowerCase(int ch) {
3453        return gCsp.tolower(ch);
3454    }
3455
3456    /**
3457     * Converts argument code point and returns a String object representing 
3458     * the code point's value in UTF16 format.
3459     * The result is a string whose length is 1 for non-supplementary code 
3460     * points, 2 otherwise.<br>
3461     * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this 
3462     * function.<br>
3463     * Up-to-date Unicode implementation of java.lang.Character.toString()
3464     * @param ch code point
3465     * @return string representation of the code point, null if code point is not
3466     *         defined in unicode
3467     * @stable ICU 2.1
3468     */
3469    public static String   toString(int ch)
3470    {
3471        if (ch < MIN_VALUE || ch > MAX_VALUE) {
3472            return null;
3473        }
3474        
3475        if (ch < SUPPLEMENTARY_MIN_VALUE) {
3476            return String.valueOf((char)ch);
3477        }
3478        
3479        StringBuffer   result = new StringBuffer  ();
3480        result.append(UTF16.getLeadSurrogate(ch));
3481        result.append(UTF16.getTrailSurrogate(ch));
3482        return result.toString();
3483    }
3484                                    
3485    /**
3486     * Converts the code point argument to titlecase.
3487     * If no titlecase is available, the uppercase is returned. If no uppercase 
3488     * is available, the code point itself is returned.
3489     * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
3490     *
3491     * <p>This function only returns the simple, single-code point case mapping.
3492     * Full case mappings should be used whenever possible because they produce
3493     * better results by working on whole strings.
3494     * They take into account the string context and the language and can map
3495     * to a result string with a different length as appropriate.
3496     * Full case mappings are applied by the case mapping functions
3497     * that take String parameters rather than code points (int).
3498     * See also the User Guide chapter on C/POSIX migration:
3499     * http://icu.sourceforge.net/userguide/posix.html#case_mappings
3500     *
3501     * @param ch code point  whose title case is to be retrieved
3502     * @return titlecase code point
3503     * @stable ICU 2.1
3504     */
3505    public static int toTitleCase(int ch) {
3506        return gCsp.totitle(ch);
3507    }
3508       
3509    /**
3510     * Converts the character argument to uppercase.
3511     * If no uppercase is available, the character itself is returned.
3512     * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
3513     *
3514     * <p>This function only returns the simple, single-code point case mapping.
3515     * Full case mappings should be used whenever possible because they produce
3516     * better results by working on whole strings.
3517     * They take into account the string context and the language and can map
3518     * to a result string with a different length as appropriate.
3519     * Full case mappings are applied by the case mapping functions
3520     * that take String parameters rather than code points (int).
3521     * See also the User Guide chapter on C/POSIX migration:
3522     * http://icu.sourceforge.net/userguide/posix.html#case_mappings
3523     *
3524     * @param ch code point whose uppercase is to be retrieved
3525     * @return uppercase code point
3526     * @stable ICU 2.1
3527     */
3528    public static int toUpperCase(int ch) {
3529        return gCsp.toupper(ch);
3530    }
3531       
3532    // extra methods not in java.lang.Character --------------------------
3533       
3534    /**
3535     * Determines if the code point is a supplementary character.
3536     * A code point is a supplementary character if and only if it is greater 
3537     * than <a HREF=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
3538     * @param ch code point to be determined if it is in the supplementary 
3539     *        plane
3540     * @return true if code point is a supplementary character
3541     * @stable ICU 2.1
3542     */
3543    public static boolean isSupplementary(int ch)
3544    {
3545        return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 
3546            ch <= UCharacter.MAX_VALUE;
3547    }
3548      
3549    /**
3550     * Determines if the code point is in the BMP plane.
3551     * @param ch code point to be determined if it is not a supplementary 
3552     *        character
3553     * @return true if code point is not a supplementary character
3554     * @stable ICU 2.1
3555     */
3556    public static boolean isBMP(int ch) 
3557    {
3558        return (ch >= 0 && ch <= LAST_CHAR_MASK_);
3559    }
3560
3561    /**
3562     * Determines whether the specified code point is a printable character 
3563     * according to the Unicode standard.
3564     * @param ch code point to be determined if it is printable
3565     * @return true if the code point is a printable character
3566     * @stable ICU 2.1
3567     */
3568    public static boolean isPrintable(int ch)
3569    {
3570        int cat = getType(ch);
3571        // if props == 0, it will just fall through and return false
3572        return (cat != UCharacterCategory.UNASSIGNED && 
3573        cat != UCharacterCategory.CONTROL && 
3574        cat != UCharacterCategory.FORMAT &&
3575        cat != UCharacterCategory.PRIVATE_USE &&
3576        cat != UCharacterCategory.SURROGATE &&
3577        cat != UCharacterCategory.GENERAL_OTHER_TYPES);
3578    }
3579
3580    /**
3581     * Determines whether the specified code point is of base form.
3582     * A code point of base form does not graphically combine with preceding 
3583     * characters, and is neither a control nor a format character.
3584     * @param ch code point to be determined if it is of base form
3585     * @return true if the code point is of base form
3586     * @stable ICU 2.1
3587     */
3588    public static boolean isBaseForm(int ch)
3589    {
3590        int cat = getType(ch);
3591        // if props == 0, it will just fall through and return false
3592        return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 
3593            cat == UCharacterCategory.OTHER_NUMBER || 
3594            cat == UCharacterCategory.LETTER_NUMBER || 
3595            cat == UCharacterCategory.UPPERCASE_LETTER || 
3596            cat == UCharacterCategory.LOWERCASE_LETTER || 
3597            cat == UCharacterCategory.TITLECASE_LETTER ||
3598            cat == UCharacterCategory.MODIFIER_LETTER || 
3599            cat == UCharacterCategory.OTHER_LETTER || 
3600            cat == UCharacterCategory.NON_SPACING_MARK || 
3601            cat == UCharacterCategory.ENCLOSING_MARK ||
3602            cat == UCharacterCategory.COMBINING_SPACING_MARK;
3603    }
3604
3605    /**
3606     * Returns the Bidirection property of a code point.
3607     * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 
3608     * property.<br>
3609     * Result returned belongs to the interface 
3610     * <a HREF=UCharacterDirection.html>UCharacterDirection</a>
3611     * @param ch the code point to be determined its direction
3612     * @return direction constant from UCharacterDirection.
3613     * @stable ICU 2.1
3614     */
3615    public static int getDirection(int ch)
3616    {
3617        return gBdp.getClass(ch);
3618    }
3619
3620    /**
3621     * Determines whether the code point has the "mirrored" property.
3622     * This property is set for characters that are commonly used in
3623     * Right-To-Left contexts and need to be displayed with a "mirrored"
3624     * glyph.
3625     * @param ch code point whose mirror is to be determined
3626     * @return true if the code point has the "mirrored" property
3627     * @stable ICU 2.1
3628     */
3629    public static boolean isMirrored(int ch)
3630    {
3631        return gBdp.isMirrored(ch);
3632    }
3633
3634    /**
3635     * Maps the specified code point to a "mirror-image" code point.
3636     * For code points with the "mirrored" property, implementations sometimes 
3637     * need a "poor man's" mapping to another code point such that the default 
3638     * glyph may serve as the mirror-image of the default glyph of the 
3639     * specified code point.<br> 
3640     * This is useful for text conversion to and from codepages with visual 
3641     * order, and for displays without glyph selection capabilities.
3642     * @param ch code point whose mirror is to be retrieved
3643     * @return another code point that may serve as a mirror-image substitute, 
3644     *         or ch itself if there is no such mapping or ch does not have the 
3645     *         "mirrored" property
3646     * @stable ICU 2.1
3647     */
3648    public static int getMirror(int ch)
3649    {
3650        return gBdp.getMirror(ch);
3651    }
3652      
3653    /**
3654     * Gets the combining class of the argument codepoint
3655     * @param ch code point whose combining is to be retrieved
3656     * @return the combining class of the codepoint
3657     * @stable ICU 2.1
3658     */
3659    public static int getCombiningClass(int ch)
3660    {
3661        if (ch < MIN_VALUE || ch > MAX_VALUE) {
3662        throw new IllegalArgumentException  ("Codepoint out of bounds");
3663        }
3664        return NormalizerImpl.getCombiningClass(ch);
3665    }
3666      
3667    /**
3668     * A code point is illegal if and only if
3669     * <ul>
3670     * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3671     * <li> A surrogate value, 0xD800 to 0xDFFF
3672     * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3673     * </ul>
3674     * Note: legal does not mean that it is assigned in this version of Unicode.
3675     * @param ch code point to determine if it is a legal code point by itself
3676     * @return true if and only if legal. 
3677     * @stable ICU 2.1
3678     */
3679    public static boolean isLegal(int ch) 
3680    {
3681        if (ch < MIN_VALUE) {
3682            return false;
3683        }
3684        if (ch < UTF16.SURROGATE_MIN_VALUE) {
3685            return true;
3686        }
3687        if (ch <= UTF16.SURROGATE_MAX_VALUE) {
3688            return false;
3689        }
3690        if (UCharacterUtility.isNonCharacter(ch)) {
3691            return false;
3692        }
3693        return (ch <= MAX_VALUE);
3694    }
3695      
3696    /**
3697     * A string is legal iff all its code points are legal.
3698     * A code point is illegal if and only if
3699     * <ul>
3700     * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3701     * <li> A surrogate value, 0xD800 to 0xDFFF
3702     * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3703     * </ul>
3704     * Note: legal does not mean that it is assigned in this version of Unicode.
3705     * @param str containing code points to examin
3706     * @return true if and only if legal. 
3707     * @stable ICU 2.1
3708     */
3709    public static boolean isLegal(String   str) 
3710    {
3711        int size = str.length();
3712        int codepoint;
3713        for (int i = 0; i < size; i ++)
3714        {
3715        codepoint = UTF16.charAt(str, i);
3716        if (!isLegal(codepoint)) {
3717            return false;
3718        }
3719        if (isSupplementary(codepoint)) {
3720            i ++;
3721        }
3722        }
3723        return true;
3724    }
3725
3726    /**
3727     * Gets the version of Unicode data used. 
3728     * @return the unicode version number used
3729     * @stable ICU 2.1
3730     */
3731    public static VersionInfo getUnicodeVersion()
3732    {
3733        return PROPERTY_.m_unicodeVersion_;
3734    }
3735      
3736    /**
3737     * Retrieve the most current Unicode name of the argument code point, or 
3738     * null if the character is unassigned or outside the range 
3739     * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3740     * <br>
3741     * Note calling any methods related to code point names, e.g. get*Name*() 
3742     * incurs a one-time initialisation cost to construct the name tables.
3743     * @param ch the code point for which to get the name
3744     * @return most current Unicode name
3745     * @stable ICU 2.1
3746     */
3747    public static String   getName(int ch)
3748    {
3749        if(NAME_==null){
3750            throw new MissingResourceException  ("Could not load unames.icu","","");
3751        }
3752        return NAME_.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
3753    }
3754    
3755    /**
3756     * Gets the names for each of the characters in a string
3757     * @param s string to format
3758     * @param separator string to go between names
3759     * @return string of names
3760     * @internal
3761     * @deprecated This API is ICU internal only.
3762     */
3763    public static String   getName(String   s, String   separator) {
3764        if (s.length() == 1) { // handle common case
3765            return getName(s.charAt(0));
3766        }
3767        int cp;
3768        StringBuffer   sb = new StringBuffer  ();
3769        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
3770            cp = UTF16.charAt(s,i);
3771            if (i != 0) sb.append(separator);
3772            sb.append(UCharacter.getName(cp));
3773        }
3774        return sb.toString();
3775    }
3776      
3777    /**
3778     * Retrieve the earlier version 1.0 Unicode name of the argument code 
3779     * point, or null if the character is unassigned or outside the range 
3780     * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3781     * <br>
3782     * Note calling any methods related to code point names, e.g. get*Name*() 
3783     * incurs a one-time initialisation cost to construct the name tables.
3784     * @param ch the code point for which to get the name
3785     * @return version 1.0 Unicode name
3786     * @stable ICU 2.1
3787     */
3788    public static String   getName1_0(int ch)
3789    {
3790        if(NAME_==null){
3791            throw new MissingResourceException  ("Could not load unames.icu","","");
3792        }
3793        return NAME_.getName(ch, 
3794                             UCharacterNameChoice.UNICODE_10_CHAR_NAME);
3795    }
3796    
3797    /**
3798     * <p>Retrieves a name for a valid codepoint. Unlike, getName(int) and
3799     * getName1_0(int), this method will return a name even for codepoints that
3800     * are not assigned a name in UnicodeData.txt.
3801     * </p>
3802     * The names are returned in the following order.
3803     * <ul>
3804     * <li> Most current Unicode name if there is any
3805     * <li> Unicode 1.0 name if there is any
3806     * <li> Extended name in the form of 
3807     *      "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-fffe>
3808     * </ul>
3809     * Note calling any methods related to code point names, e.g. get*Name*() 
3810     * incurs a one-time initialisation cost to construct the name tables.
3811     * @param ch the code point for which to get the name
3812     * @return a name for the argument codepoint
3813     * @stable ICU 2.6
3814     */
3815    public static String   getExtendedName(int ch) 
3816    {
3817        if(NAME_==null){
3818            throw new MissingResourceException  ("Could not load unames.icu","","");
3819        }
3820        return NAME_.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
3821    }
3822    
3823    /**
3824     * Get the ISO 10646 comment for a character.
3825     * The ISO 10646 comment is an informative field in the Unicode Character
3826     * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
3827     * @param ch The code point for which to get the ISO comment.
3828     *           It must be <code>0<=c<=0x10ffff</code>.
3829     * @return The ISO comment, or null if there is no comment for this 
3830     *         character.
3831     * @stable ICU 2.4
3832     */
3833    public static String   getISOComment(int ch)
3834    {
3835        if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE) {
3836            return null;
3837        }
3838        if(NAME_==null){
3839            throw new MissingResourceException  ("Could not load unames.icu","","");
3840        }    
3841        String   result = NAME_.getGroupName(ch, 
3842                                           UCharacterNameChoice.ISO_COMMENT_);
3843        return result;
3844    }
3845      
3846    /**
3847     * <p>Find a Unicode code point by its most current Unicode name and 
3848     * return its code point value. All Unicode names are in uppercase.</p>
3849     * Note calling any methods related to code point names, e.g. get*Name*() 
3850     * incurs a one-time initialisation cost to construct the name tables.
3851     * @param name most current Unicode character name whose code point is to 
3852     *        be returned
3853     * @return code point or -1 if name is not found
3854     * @stable ICU 2.1
3855     */
3856    public static int getCharFromName(String   name)
3857    {     
3858        if(NAME_==null){
3859            throw new MissingResourceException  ("Could not load unames.icu","","");
3860        }
3861        return NAME_.getCharFromName(
3862                     UCharacterNameChoice.UNICODE_CHAR_NAME, name);
3863    }
3864      
3865    /**
3866     * <p>Find a Unicode character by its version 1.0 Unicode name and return 
3867     * its code point value. All Unicode names are in uppercase.</p>
3868     * Note calling any methods related to code point names, e.g. get*Name*() 
3869     * incurs a one-time initialisation cost to construct the name tables.
3870     * @param name Unicode 1.0 code point name whose code point is to 
3871     *             returned
3872     * @return code point or -1 if name is not found
3873     * @stable ICU 2.1
3874     */
3875    public static int getCharFromName1_0(String   name)
3876    {
3877        if(NAME_==null){
3878            throw new MissingResourceException  ("Could not load unames.icu","","");
3879        }
3880        return NAME_.getCharFromName(
3881                     UCharacterNameChoice.UNICODE_10_CHAR_NAME, name);
3882    }
3883    
3884    /**
3885     * <p>Find a Unicode character by either its name and return its code 
3886     * point value. All Unicode names are in uppercase. 
3887     * Extended names are all lowercase except for numbers and are contained
3888     * within angle brackets.</p>
3889     * The names are searched in the following order
3890     * <ul>
3891     * <li> Most current Unicode name if there is any
3892     * <li> Unicode 1.0 name if there is any
3893     * <li> Extended name in the form of 
3894     *      "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE>
3895     * </ul>
3896     * Note calling any methods related to code point names, e.g. get*Name*() 
3897     * incurs a one-time initialisation cost to construct the name tables.
3898     * @param name codepoint name
3899     * @return code point associated with the name or -1 if the name is not
3900     *         found.
3901     * @stable ICU 2.6
3902     */
3903    public static int getCharFromExtendedName(String   name)
3904    {
3905        if(NAME_==null){
3906            throw new MissingResourceException  ("Could not load unames.icu","","");
3907        }
3908        return NAME_.getCharFromName(
3909                     UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
3910    }
3911
3912    /**
3913     * Return the Unicode name for a given property, as given in the
3914     * Unicode database file PropertyAliases.txt.  Most properties
3915     * have more than one name.  The nameChoice determines which one
3916     * is returned.
3917     *
3918     * In addition, this function maps the property
3919     * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
3920     * "General_Category_Mask".  These names are not in
3921     * PropertyAliases.txt.
3922     * 
3923     * @param property UProperty selector.
3924     *
3925     * @param nameChoice UProperty.NameChoice selector for which name
3926     * to get.  All properties have a long name.  Most have a short
3927     * name, but some do not.  Unicode allows for additional names; if
3928     * present these will be returned by UProperty.NameChoice.LONG + i,
3929     * where i=1, 2,...
3930     *
3931     * @return a name, or null if Unicode explicitly defines no name
3932     * ("n/a") for a given property/nameChoice.  If a given nameChoice
3933     * throws an exception, then all larger values of nameChoice will
3934     * throw an exception.  If null is returned for a given
3935     * nameChoice, then other nameChoice values may return non-null
3936     * results.
3937     *
3938     * @exception IllegalArgumentException thrown if property or
3939     * nameChoice are invalid.
3940     *
3941     * @see UProperty
3942     * @see UProperty.NameChoice
3943     * @stable ICU 2.4
3944     */
3945    public static String   getPropertyName(int property,
3946                                         int nameChoice) {
3947        return PNAMES_.getPropertyName(property, nameChoice);
3948    }
3949
3950    /**
3951     * Return the UProperty selector for a given property name, as
3952     * specified in the Unicode database file PropertyAliases.txt.
3953     * Short, long, and any other variants are recognized.
3954     *
3955     * In addition, this function maps the synthetic names "gcm" /
3956     * "General_Category_Mask" to the property
3957     * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
3958     * PropertyAliases.txt.
3959     *
3960     * @param propertyAlias the property name to be matched.  The name
3961     * is compared using "loose matching" as described in
3962     * PropertyAliases.txt.
3963     *
3964     * @return a UProperty enum.
3965     *
3966     * @exception IllegalArgumentException thrown if propertyAlias
3967     * is not recognized.
3968     *
3969     * @see UProperty
3970     * @stable ICU 2.4
3971     */
3972    public static int getPropertyEnum(String   propertyAlias) {
3973        return PNAMES_.getPropertyEnum(propertyAlias);
3974    }
3975
3976    /**
3977     * Return the Unicode name for a given property value, as given in
3978     * the Unicode database file PropertyValueAliases.txt.  Most
3979     * values have more than one name.  The nameChoice determines
3980     * which one is returned.
3981     *
3982     * Note: Some of the names in PropertyValueAliases.txt can only be
3983     * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
3984     * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
3985     * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
3986     * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
3987     *
3988     * @param property UProperty selector constant.
3989     * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
3990     * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
3991     * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
3992     * If out of range, null is returned.
3993     *
3994     * @param value selector for a value for the given property.  In
3995     * general, valid values range from 0 up to some maximum.  There
3996     * are a few exceptions: (1.) UProperty.BLOCK values begin at the
3997     * non-zero value BASIC_LATIN.getID().  (2.)
3998     * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
3999     * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4000     * are mask values produced by left-shifting 1 by
4001     * UCharacter.getType().  This allows grouped categories such as
4002     * [:L:] to be represented.  Mask values are non-contiguous.
4003     *
4004     * @param nameChoice UProperty.NameChoice selector for which name
4005     * to get.  All values have a long name.  Most have a short name,
4006     * but some do not.  Unicode allows for additional names; if
4007     * present these will be returned by UProperty.NameChoice.LONG + i,
4008     * where i=1, 2,...
4009     *
4010     * @return a name, or null if Unicode explicitly defines no name
4011     * ("n/a") for a given property/value/nameChoice.  If a given
4012     * nameChoice throws an exception, then all larger values of
4013     * nameChoice will throw an exception.  If null is returned for a
4014     * given nameChoice, then other nameChoice values may return
4015     * non-null results.
4016     *
4017     * @exception IllegalArgumentException thrown if property, value,
4018     * or nameChoice are invalid.
4019     *
4020     * @see UProperty
4021     * @see UProperty.NameChoice
4022     * @stable ICU 2.4
4023     */
4024    public static String   getPropertyValueName(int property,
4025                                              int value,
4026                                              int nameChoice) 
4027    {
4028        if (property == UProperty.CANONICAL_COMBINING_CLASS 
4029            && value >= UCharacter.getIntPropertyMinValue(
4030                              UProperty.CANONICAL_COMBINING_CLASS)
4031            && value <= UCharacter.getIntPropertyMaxValue(
4032                              UProperty.CANONICAL_COMBINING_CLASS)
4033            && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4034            // this is hard coded for the valid cc
4035            // because PropertyValueAliases.txt does not contain all of them
4036            try {
4037                return PNAMES_.getPropertyValueName(property, value, 
4038                                                    nameChoice);
4039            }
4040            catch (IllegalArgumentException   e) {
4041                return null;
4042            }
4043        }
4044        return PNAMES_.getPropertyValueName(property, value, nameChoice);
4045    }
4046
4047    /**
4048     * Return the property value integer for a given value name, as
4049     * specified in the Unicode database file PropertyValueAliases.txt.
4050     * Short, long, and any other variants are recognized.
4051     *
4052     * Note: Some of the names in PropertyValueAliases.txt will only be
4053     * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4054     * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4055     * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4056     * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4057     *
4058     * @param property UProperty selector constant.
4059     * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4060     * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4061     * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4062     * Only these properties can be enumerated.
4063     *
4064     * @param valueAlias the value name to be matched.  The name is
4065     * compared using "loose matching" as described in
4066     * PropertyValueAliases.txt.
4067     *
4068     * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4069     * values are mask values produced by left-shifting 1 by
4070     * UCharacter.getType().  This allows grouped categories such as
4071     * [:L:] to be represented.
4072     *
4073     * @see UProperty
4074     * @throws IllegalArgumentException if property is not a valid UProperty
4075     *         selector
4076     * @stable ICU 2.4
4077     */
4078    public static int getPropertyValueEnum(int property,
4079                                           String   valueAlias) {
4080        return PNAMES_.getPropertyValueEnum(property, valueAlias);
4081    }
4082      
4083    /**
4084     * Returns a code point corresponding to the two UTF16 characters.
4085     * @param lead the lead char
4086     * @param trail the trail char
4087     * @return code point if surrogate characters are valid.
4088     * @exception IllegalArgumentException thrown when argument characters do
4089     *            not form a valid codepoint
4090     * @stable ICU 2.1
4091     */
4092    public static int getCodePoint(char lead, char trail) 
4093    {
4094        if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE && 
4095        lead <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
4096            trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE && 
4097        trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
4098            return UCharacterProperty.getRawSupplementary(lead, trail);
4099        }
4100        throw new IllegalArgumentException  ("Illegal surrogate characters");
4101    }
4102      
4103    /**
4104     * Returns the code point corresponding to the UTF16 character.
4105     * @param char16 the UTF16 character
4106     * @return code point if argument is a valid character.
4107     * @exception IllegalArgumentException thrown when char16 is not a valid
4108     *            codepoint
4109     * @stable ICU 2.1
4110     */
4111    public static int getCodePoint(char char16) 
4112    {
4113        if (UCharacter.isLegal(char16)) {
4114            return char16;
4115        }
4116        throw new IllegalArgumentException  ("Illegal codepoint");
4117    }
4118
4119    /**
4120     * Implementation of UCaseProps.ContextIterator, iterates over a String.
4121     * See ustrcase.c/utf16_caseContextIterator().
4122     */
4123    private static class StringContextIterator implements UCaseProps.ContextIterator {
4124        /**
4125         * Constructor.
4126         * @param s String to iterate over. 
4127         */
4128        StringContextIterator(String   s) {
4129            this.s=s;
4130            limit=s.length();
4131            cpStart=cpLimit=index=0;
4132            dir=0;
4133        }
4134
4135        /**
4136         * Set the iteration limit for nextCaseMapCP() to an index within the string.
4137         * If the limit parameter is negative or past the string, then the
4138         * string length is restored as the iteration limit.
4139         *
4140         * This limit does not affect the next() function which always
4141         * iterates to the very end of the string.
4142         *
4143         * @param lim The iteration limit.
4144         */
4145        public void setLimit(int lim) {
4146            if(0<=lim && lim<=s.length()) {
4147                limit=lim;
4148            } else {
4149                limit=s.length();
4150            }
4151        }
4152
4153        /**
4154         * Iterate forward through the string to fetch the next code point
4155         * to be case-mapped, and set the context indexes for it.
4156         * Performance optimization, to save on function calls and redundant
4157         * tests. Combines UTF16.charAt(), UTF16.getCharCount(), and setIndex().
4158         *
4159         * When the iteration limit is reached (and -1 is returned),
4160         * getCPStart() will be at the iteration limit.
4161         *
4162         * Iteration with next() does not affect the position for nextCaseMapCP().
4163         *
4164         * @return The next code point to be case-mapped, or <0 when the iteration is done.
4165         */
4166        public int nextCaseMapCP() {
4167            cpStart=cpLimit;
4168            if(cpLimit<limit) {
4169                int c=s.charAt(cpLimit++);
4170                if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE) {
4171                    char c2;
4172                    if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit &&
4173                        UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) && c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE
4174                    ) {
4175                        // supplementary code point
4176                        ++cpLimit;
4177                        c=UCharacterProperty.getRawSupplementary((char)c, c2);
4178                    // else unpaired surrogate code point
4179                    }
4180                // else BMP code point
4181                }
4182                return c;
4183            } else {
4184                return -1;
4185            }
4186        }
4187
4188        /**
4189         * Get the start of the code point that was last returned
4190         * by nextCaseMapCP().
4191         */
4192        public int getCPStart() {
4193            return cpStart;
4194        }
4195
4196        // implement UCaseProps.ContextIterator
4197        public void reset(int dir) {
4198            if(dir>0) {
4199                /* reset for forward iteration */
4200                this.dir=1;
4201                index=cpLimit;
4202            } else if(dir<0) {
4203                /* reset for backward iteration */
4204                this.dir=-1;
4205                index=cpStart;
4206            } else {
4207                // not a valid direction
4208                this.dir=0;
4209                index=0;
4210            }
4211        }
4212
4213        public int next() {
4214            int c;
4215
4216            if(dir>0 && index<s.length()) {
4217                c=UTF16.charAt(s, index);
4218                index+=UTF16.getCharCount(c);
4219                return c;
4220            } else if(dir<0 && index>0) {
4221                c=UTF16.charAt(s, index-1);
4222                index-=UTF16.getCharCount(c);
4223                return c;
4224            }
4225            return -1;
4226        }
4227
4228        // variables
4229        protected String   s;
4230        protected int index, limit, cpStart, cpLimit;
4231        protected int dir; // 0=initial state  >0=forward  <0=backward
4232    }
4233
4234    /**
4235     * Gets uppercase version of the argument string. 
4236     * Casing is dependent on the default locale and context-sensitive.
4237     * @param str source string to be performed on
4238     * @return uppercase version of the argument string
4239     * @stable ICU 2.1
4240     */
4241    public static String   toUpperCase(String   str)
4242    {
4243        return toUpperCase(ULocale.getDefault(), str);
4244    }
4245      
4246    /**
4247     * Gets lowercase version of the argument string. 
4248     * Casing is dependent on the default locale and context-sensitive
4249     * @param str source string to be performed on
4250     * @return lowercase version of the argument string
4251     * @stable ICU 2.1
4252     */
4253    public static String   toLowerCase(String   str)
4254    {
4255        return toLowerCase(ULocale.getDefault(), str);
4256    }
4257    
4258    /**
4259     * <p>Gets the titlecase version of the argument string.</p>
4260     * <p>Position for titlecasing is determined by the argument break 
4261     * iterator, hence the user can customized his break iterator for 
4262     * a specialized titlecasing. In this case only the forward iteration 
4263     * needs to be implemented.
4264     * If the break iterator passed in is null, the default Unicode algorithm
4265     * will be used to determine the titlecase positions.
4266     * </p>
4267     * <p>Only positions returned by the break iterator will be title cased,
4268     * character in between the positions will all be in lower case.</p>
4269     * <p>Casing is dependent on the default locale and context-sensitive</p>
4270     * @param str source string to be performed on
4271     * @param breakiter break iterator to determine the positions in which
4272     *        the character should be title cased.
4273     * @return lowercase version of the argument string
4274     * @stable ICU 2.6
4275     */
4276    public static String   toTitleCase(String   str, BreakIterator breakiter)
4277    {
4278        return toTitleCase(ULocale.getDefault(), str, breakiter);
4279    }
4280      
4281    /**
4282     * Gets uppercase version of the argument string. 
4283     * Casing is dependent on the argument locale and context-sensitive.
4284     * @param locale which string is to be converted in
4285     * @param str source string to be performed on
4286     * @return uppercase version of the argument string
4287     * @stable ICU 2.1
4288     */
4289    public static String   toUpperCase(Locale   locale, String   str)
4290    {
4291        return toUpperCase(ULocale.forLocale(locale), str);
4292    }
4293
4294    /**
4295     * Gets uppercase version of the argument string. 
4296     * Casing is dependent on the argument locale and context-sensitive.
4297     * @param locale which string is to be converted in
4298     * @param str source string to be performed on
4299     * @return uppercase version of the argument string
4300     * @draft ICU 3.2
4301     * @provisional This API might change or be removed in a future release.
4302     */
4303    public static String   toUpperCase(ULocale locale, String   str) {
4304        StringContextIterator iter = new StringContextIterator(str);
4305        StringBuffer   result = new StringBuffer  (str.length());
4306        int[] locCache = new int[1];
4307        int c;
4308
4309        if (locale == null) {
4310            locale = ULocale.getDefault();
4311        }
4312        locCache[0]=0;
4313
4314        while((c=iter.nextCaseMapCP())>=0) {
4315            c=gCsp.toFullUpper(c, iter, result, locale, locCache);
4316
4317            /* decode the result */
4318            if(c<0) {
4319                /* (not) original code point */
4320                c=~c;
4321            } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4322                /* mapping already appended to result */
4323                continue;
4324            /* } else { append single-code point mapping */
4325            }
4326            if(c<=0xffff) {
4327                result.append((char)c);
4328            } else {
4329                UTF16.append(result, c);
4330            }
4331        }
4332        return result.toString();
4333    }
4334
4335    /**
4336     * Gets lowercase version of the argument string. 
4337     * Casing is dependent on the argument locale and context-sensitive
4338     * @param locale which string is to be converted in
4339     * @param str source string to be performed on
4340     * @return lowercase version of the argument string
4341     * @stable ICU 2.1
4342     */
4343    public static String   toLowerCase(Locale   locale, String   str)
4344    {
4345        return toLowerCase(ULocale.forLocale(locale), str);
4346    }
4347
4348    /**
4349     * Gets lowercase version of the argument string. 
4350     * Casing is dependent on the argument locale and context-sensitive
4351     * @param locale which string is to be converted in
4352     * @param str source string to be performed on
4353     * @return lowercase version of the argument string
4354     * @draft ICU 3.2
4355     * @provisional This API might change or be removed in a future release.
4356     */
4357    public static String   toLowerCase(ULocale locale, String   str) {
4358        StringContextIterator iter = new StringContextIterator(str);
4359        StringBuffer   result = new StringBuffer  (str.length());
4360        int[] locCache = new int[1];
4361        int c;
4362
4363        if (locale == null) {
4364            locale = ULocale.getDefault();
4365        }
4366        locCache[0]=0;
4367
4368        while((c=iter.nextCaseMapCP())>=0) {
4369            c=gCsp.toFullLower(c, iter, result, locale, locCache);
4370
4371            /* decode the result */
4372            if(c<0) {
4373                /* (not) original code point */
4374                c=~c;
4375            } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4376                /* mapping already appended to result */
4377                continue;
4378            /* } else { append single-code point mapping */
4379            }
4380            if(c<=0xffff) {
4381                result.append((char)c);
4382            } else {
4383                UTF16.append(result, c);
4384            }
4385        }
4386        return result.toString();
4387    }
4388
4389    /**
4390     * <p>Gets the titlecase version of the argument string.</p>
4391     * <p>Position for titlecasing is determined by the argument break 
4392     * iterator, hence the user can customized his break iterator for 
4393     * a specialized titlecasing. In this case only the forward iteration 
4394     * needs to be implemented.
4395     * If the break iterator passed in is null, the default Unicode algorithm
4396     * will be used to determine the titlecase positions.
4397     * </p>
4398     * <p>Only positions returned by the break iterator will be title cased,
4399     * character in between the positions will all be in lower case.</p>
4400     * <p>Casing is dependent on the argument locale and context-sensitive</p>
4401     * @param locale which string is to be converted in
4402     * @param str source string to be performed on
4403     * @param breakiter break iterator to determine the positions in which
4404     *        the character should be title cased.
4405     * @return lowercase version of the argument string
4406     * @stable ICU 2.6
4407     */
4408    public static String   toTitleCase(Locale   locale, String   str, 
4409                                     BreakIterator breakiter)
4410    {
4411        return toTitleCase(ULocale.forLocale(locale), str, breakiter);
4412    }
4413
4414    /**
4415     * <p>Gets the titlecase version of the argument string.</p>
4416     * <p>Position for titlecasing is determined by the argument break 
4417     * iterator, hence the user can customized his break iterator for 
4418     * a specialized titlecasing. In this case only the forward iteration 
4419     * needs to be implemented.
4420     * If the break iterator passed in is null, the default Unicode algorithm
4421     * will be used to determine the titlecase positions.
4422     * </p>
4423     * <p>Only positions returned by the break iterator will be title cased,
4424     * character in between the positions will all be in lower case.</p>
4425     * <p>Casing is dependent on the argument locale and context-sensitive</p>
4426     * @param locale which string is to be converted in
4427     * @param str source string to be performed on
4428     * @param titleIter break iterator to determine the positions in which
4429     *        the character should be title cased.
4430     * @return lowercase version of the argument string
4431     * @draft ICU 3.2
4432     * @provisional This API might change or be removed in a future release.
4433     */
4434    public static String   toTitleCase(ULocale locale, String   str, 
4435                                     BreakIterator titleIter) {
4436        StringContextIterator iter = new StringContextIterator(str);
4437        StringBuffer   result = new StringBuffer  (str.length());
4438        int[] locCache = new int[1];
4439        int c, srcLength = str.length();
4440
4441        if (locale == null) {
4442            locale = ULocale.getDefault();
4443        }
4444        locCache[0]=0;
4445
4446        if(titleIter == null) {
4447            titleIter = BreakIterator.getWordInstance(locale);
4448        }
4449        titleIter.setText(str);
4450
4451        int prev, titleStart, index;
4452        boolean isFirstIndex;
4453
4454        /* set up local variables */
4455        prev=0;
4456        isFirstIndex=true;
4457
4458        /* titlecasing loop */
4459        while(prev<srcLength) {
4460            /* find next index where to titlecase */
4461            if(isFirstIndex) {
4462                isFirstIndex=false;
4463                index=titleIter.first();
4464            } else {
4465                index=titleIter.next();
4466            }
4467            if(index==BreakIterator.DONE || index>srcLength) {
4468                index=srcLength;
4469            }
4470
4471            /*
4472             * Unicode 4 & 5 section 3.13 Default Case Operations:
4473             *
4474             * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
4475             * #29, "Text Boundaries." Between each pair of word boundaries, find the first
4476             * cased character F. If F exists, map F to default_title(F); then map each
4477             * subsequent character C to default_lower(C).
4478             *
4479             * In this implementation, segment [prev..index[ into 3 parts:
4480             * a) uncased characters (copy as-is) [prev..titleStart[
4481             * b) first case letter (titlecase)         [titleStart..titleLimit[
4482             * c) subsequent characters (lowercase)                 [titleLimit..index[
4483             */
4484            if(prev<index) {
4485                /* find and copy uncased characters [prev..titleStart[ */
4486                iter.setLimit(index);
4487                while((c=iter.nextCaseMapCP())>=0 && UCaseProps.NONE==gCsp.getType(c)) {}
4488                titleStart=iter.getCPStart();
4489                if(prev<titleStart) {
4490                    // TODO: With Java 5, this would want to be result.append(str, prev, titleStart);
4491                    result.append(str.substring(prev, titleStart));
4492                }
4493
4494                if(titleStart<index) {
4495                    /* titlecase c which is from titleStart */
4496                    c=gCsp.toFullTitle(c, iter, result, locale, locCache);
4497
4498                    /* decode the result and lowercase up to index */
4499                    for(;;) {
4500                        if(c<0) {
4501                            /* (not) original code point */
4502                            c=~c;
4503                            if(c<=0xffff) {
4504                                result.append((char)c);
4505                            } else {
4506                                UTF16.append(result, c);
4507                            }
4508                        } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4509                            /* mapping already appended to result */
4510                        } else {
4511                            /* append single-code point mapping */
4512                            if(c<=0xffff) {
4513                                result.append((char)c);
4514                            } else {
4515                                UTF16.append(result, c);
4516                            }
4517                        }
4518                        
4519                        if((c=iter.nextCaseMapCP())>=0) {
4520                            c=gCsp.toFullLower(c, iter, result, locale, locCache);
4521                        } else {
4522                            break;
4523                        }
4524                    }
4525                }
4526            }
4527
4528            prev=index;
4529        }
4530        return result.toString();
4531    }
4532
4533    /**
4534     * The given character is mapped to its case folding equivalent according 
4535     * to UnicodeData.txt and CaseFolding.txt; if the character has no case 
4536     * folding equivalent, the character itself is returned.
4537     *
4538     * <p>This function only returns the simple, single-code point case mapping.
4539     * Full case mappings should be used whenever possible because they produce
4540     * better results by working on whole strings.
4541     * They can map to a result string with a different length as appropriate.
4542     * Full case mappings are applied by the case mapping functions
4543     * that take String parameters rather than code points (int).
4544     * See also the User Guide chapter on C/POSIX migration:
4545     * http://icu.sourceforge.net/userguide/posix.html#case_mappings
4546     *
4547     * @param ch             the character to be converted
4548     * @param defaultmapping Indicates if all mappings defined in 
4549     *                       CaseFolding.txt is to be used, otherwise the 
4550     *                       mappings for dotted I  and dotless i marked with 
4551     *                       'I' in CaseFolding.txt will be skipped.
4552     * @return               the case folding equivalent of the character, if 
4553     *                       any; otherwise the character itself.
4554     * @see                  #foldCase(String, boolean)
4555     * @stable ICU 2.1
4556     */
4557    public static int foldCase(int ch, boolean defaultmapping) {
4558        return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4559    }
4560
4561    /**
4562     * The given string is mapped to its case folding equivalent according to
4563     * UnicodeData.txt and CaseFolding.txt; if any character has no case 
4564     * folding equivalent, the character itself is returned.
4565     * "Full", multiple-code point case folding mappings are returned here.
4566     * For "simple" single-code point mappings use the API 
4567     * foldCase(int ch, boolean defaultmapping).
4568     * @param str            the String to be converted
4569     * @param defaultmapping Indicates if all mappings defined in 
4570     *                       CaseFolding.txt is to be used, otherwise the 
4571     *                       mappings for dotted I and dotless i marked with 
4572     *                       'I' in CaseFolding.txt will be skipped.
4573     * @return               the case folding equivalent of the character, if 
4574     *                       any; otherwise the character itself.
4575     * @see                  #foldCase(int, boolean)
4576     * @stable ICU 2.1
4577     */
4578    public static String   foldCase(String   str, boolean defaultmapping) {
4579        return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4580    }
4581    
4582    /**
4583     * Option value for case folding: use default mappings defined in CaseFolding.txt.
4584     * @stable ICU 2.6
4585     */
4586    public static final int FOLD_CASE_DEFAULT    =      0x0000;
4587    /** 
4588     * Option value for case folding: exclude the mappings for dotted I 
4589     * and dotless i marked with 'I' in CaseFolding.txt. 
4590     * @stable ICU 2.6
4591     */
4592    public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
4593    
4594    /**
4595     * The given character is mapped to its case folding equivalent according 
4596     * to UnicodeData.txt and CaseFolding.txt; if the character has no case 
4597     * folding equivalent, the character itself is returned.
4598     *
4599     * <p>This function only returns the simple, single-code point case mapping.
4600     * Full case mappings should be used whenever possible because they produce
4601     * better results by working on whole strings.
4602     * They can map to a result string with a different length as appropriate.
4603     * Full case mappings are applied by the case mapping functions
4604     * that take String parameters rather than code points (int).
4605     * See also the User Guide chapter on C/POSIX migration:
4606     * http://icu.sourceforge.net/userguide/posix.html#case_mappings
4607     *
4608     * @param ch             the character to be converted
4609     * @param options        A bit set for special processing. Currently the recognised options are
4610     *                        FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 
4611     * @return               the case folding equivalent of the character, if 
4612     *                       any; otherwise the character itself.
4613     * @see #foldCase(String, boolean)
4614     * @stable ICU 2.6
4615     */
4616    public static int foldCase(int ch, int options) {
4617        return gCsp.fold(ch, options);
4618    }
4619    
4620    /**
4621     * The given string is mapped to its case folding equivalent according to
4622     * UnicodeData.txt and CaseFolding.txt; if any character has no case 
4623     * folding equivalent, the character itself is returned.
4624     * "Full", multiple-code point case folding mappings are returned here.
4625     * For "simple" single-code point mappings use the API 
4626     * foldCase(int ch, boolean defaultmapping).
4627     * @param str            the String to be converted
4628     * @param options        A bit set for special processing. Currently the recognised options are
4629     *                        FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 
4630     * @return               the case folding equivalent of the character, if 
4631     *                       any; otherwise the character itself.
4632     * @see #foldCase(int, boolean)
4633     * @stable ICU 2.6
4634     */
4635    public static final String   foldCase(String   str, int options) {
4636        StringBuffer   result = new StringBuffer  (str.length());
4637        int c, i, length;
4638
4639        length = str.length();
4640        for(i=0; i<length;) {
4641            c=UTF16.charAt(str, i);
4642            i+=UTF16.getCharCount(c);
4643            c=gCsp.toFullFolding(c, result, options);
4644
4645            /* decode the result */
4646            if(c<0) {
4647                /* (not) original code point */
4648                c=~c;
4649            } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
4650                /* mapping already appended to result */
4651                continue;
4652            /* } else { append single-code point mapping */
4653            }
4654            if(c<=0xffff) {
4655                result.append((char)c);
4656            } else {
4657                UTF16.append(result, c);
4658            }
4659        }
4660        return result.toString();
4661    }
4662
4663    /**
4664     * Return numeric value of Han code points.
4665     * <br> This returns the value of Han 'numeric' code points,
4666     * including those for zero, ten, hundred, thousand, ten thousand,
4667     * and hundred million.
4668     * This includes both the standard and 'checkwriting'
4669     * characters, the 'big circle' zero character, and the standard
4670     * zero character.
4671     * @param ch code point to query
4672     * @return value if it is a Han 'numeric character,' otherwise return -1.  
4673     * @stable ICU 2.4
4674     */
4675    public static int getHanNumericValue(int ch)
4676    {
4677        // TODO: Are these all covered by Unicode numeric value data?
4678        switch(ch)
4679        {
4680        case IDEOGRAPHIC_NUMBER_ZERO_ :
4681        case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
4682        return 0; // Han Zero
4683        case CJK_IDEOGRAPH_FIRST_ :
4684        case CJK_IDEOGRAPH_COMPLEX_ONE_ :
4685        return 1; // Han One
4686        case CJK_IDEOGRAPH_SECOND_ :
4687        case CJK_IDEOGRAPH_COMPLEX_TWO_ :
4688        return 2; // Han Two
4689        case CJK_IDEOGRAPH_THIRD_ :
4690        case CJK_IDEOGRAPH_COMPLEX_THREE_ :
4691        return 3; // Han Three
4692        case CJK_IDEOGRAPH_FOURTH_ :
4693        case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
4694        return 4; // Han Four
4695        case CJK_IDEOGRAPH_FIFTH_ :
4696        case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
4697        return 5; // Han Five
4698        case CJK_IDEOGRAPH_SIXTH_ :
4699        case CJK_IDEOGRAPH_COMPLEX_SIX_ :
4700        return 6; // Han Six
4701        case CJK_IDEOGRAPH_SEVENTH_ :
4702        case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
4703        return 7; // Han Seven
4704        case CJK_IDEOGRAPH_EIGHTH_ : 
4705        case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
4706        return 8; // Han Eight
4707        case CJK_IDEOGRAPH_NINETH_ :
4708        case CJK_IDEOGRAPH_COMPLEX_NINE_ :
4709        return 9; // Han Nine
4710        case CJK_IDEOGRAPH_TEN_ :
4711        case CJK_IDEOGRAPH_COMPLEX_TEN_ :
4712        return 10;
4713        case CJK_IDEOGRAPH_HUNDRED_ :
4714        case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
4715        return 100;
4716        case CJK_IDEOGRAPH_THOUSAND_ :
4717        case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
4718        return 1000;
4719        case CJK_IDEOGRAPH_TEN_THOUSAND_ :
4720        return 10000;
4721        case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
4722        return 100000000;
4723        }
4724        return -1; // no value
4725    }
4726    
4727    /**
4728     * <p>Gets an iterator for character types, iterating over codepoints.</p>
4729     * Example of use:<br>
4730     * <pre>
4731     * RangeValueIterator iterator = UCharacter.getTypeIterator();
4732     * RangeValueIterator.Element element = new RangeValueIterator.Element();
4733     * while (iterator.next(element)) {
4734     *     System.out.println("Codepoint \\u" + 
4735     *                        Integer.toHexString(element.start) + 
4736     *                        " to codepoint \\u" +
4737     *                        Integer.toHexString(element.limit - 1) + 
4738     *                        " has the character type " + 
4739     *                        element.value);
4740     * }
4741     * </pre>
4742     * @return an iterator 
4743     * @stable ICU 2.6
4744     */
4745    public static RangeValueIterator getTypeIterator()
4746    {
4747        return new UCharacterTypeIterator(PROPERTY_);
4748    }
4749
4750    /**
4751     * <p>Gets an iterator for character names, iterating over codepoints.</p>
4752     * <p>This API only gets the iterator for the modern, most up-to-date 
4753     * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
4754     * for extended names use getExtendedNameIterator().</p>
4755     * Example of use:<br>
4756     * <pre>
4757     * ValueIterator iterator = UCharacter.getNameIterator();
4758     * ValueIterator.Element element = new ValueIterator.Element();
4759     * while (iterator.next(element)) {
4760     *     System.out.println("Codepoint \\u" + 
4761     *                        Integer.toHexString(element.codepoint) +
4762     *                        " has the name " + (String)element.value);
4763     * }
4764     * </pre>
4765     * <p>The maximal range which the name iterator iterates is from 
4766     * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.</p>
4767     * @return an iterator 
4768     * @stable ICU 2.6
4769     */
4770    public static ValueIterator getNameIterator()
4771    {
4772        if(NAME_==null){
4773            throw new RuntimeException  ("Could not load unames.icu");
4774        }
4775        return new UCharacterNameIterator(NAME_,
4776                      UCharacterNameChoice.UNICODE_CHAR_NAME);
4777    }
4778    
4779    /**
4780     * <p>Gets an iterator for character names, iterating over codepoints.</p>
4781     * <p>This API only gets the iterator for the older 1.0 Unicode names. 
4782     * For modern, most up-to-date Unicode names use getNameIterator() or
4783     * for extended names use getExtendedNameIterator().</p>
4784     * Example of use:<br>
4785     * <pre>
4786     * ValueIterator iterator = UCharacter.get1_0NameIterator();
4787     * ValueIterator.Element element = new ValueIterator.Element();
4788     * while (iterator.next(element)) {
4789     *     System.out.println("Codepoint \\u" + 
4790     *                        Integer.toHexString(element.codepoint) +
4791     *                        " has the name " + (String)element.value);
4792     * }
4793     * </pre>
4794     * <p>The maximal range which the name iterator iterates is from 
4795     * @return an iterator 
4796     * @stable ICU 2.6
4797     */
4798    public static ValueIterator getName1_0Iterator()
4799    {
4800        if(NAME_==null){
4801            throw new RuntimeException  ("Could not load unames.icu");
4802        }
4803        return new UCharacterNameIterator(NAME_,
4804                      UCharacterNameChoice.UNICODE_10_CHAR_NAME);
4805    }
4806    
4807    /**
4808     * <p>Gets an iterator for character names, iterating over codepoints.</p>
4809     * <p>This API only gets the iterator for the extended names. 
4810     * For modern, most up-to-date Unicode names use getNameIterator() or
4811     * for older 1.0 Unicode names use get1_0NameIterator().</p>
4812     * Example of use:<br>
4813     * <pre>
4814     * ValueIterator iterator = UCharacter.getExtendedNameIterator();
4815     * ValueIterator.Element element = new ValueIterator.Element();
4816     * while (iterator.next(element)) {
4817     *     System.out.println("Codepoint \\u" + 
4818     *                        Integer.toHexString(element.codepoint) +
4819     *                        " has the name " + (String)element.value);
4820     * }
4821     * </pre>
4822     * <p>The maximal range which the name iterator iterates is from 
4823     * @return an iterator 
4824     * @stable ICU 2.6
4825     */
4826    public static ValueIterator getExtendedNameIterator()
4827    {
4828        if(NAME_==null){
4829            throw new MissingResourceException  ("Could not load unames.icu","","");
4830        }
4831        return new UCharacterNameIterator(NAME_,
4832                      UCharacterNameChoice.EXTENDED_CHAR_NAME);
4833    }
4834    
4835    /**
4836     * <p>Get the "age" of the code point.</p>
4837     * <p>The "age" is the Unicode version when the code point was first
4838     * designated (as a non-character or for Private Use) or assigned a 
4839     * character.
4840     * <p>This can be useful to avoid emitting code points to receiving 
4841     * processes that do not accept newer characters.</p>
4842     * <p>The data is from the UCD file DerivedAge.txt.</p>
4843     * @param ch The code point.
4844     * @return the Unicode version number
4845     * @stable ICU 2.6
4846     */
4847    public static VersionInfo getAge(int ch) 
4848    {
4849        if (ch < MIN_VALUE || ch > MAX_VALUE) {
4850        throw new IllegalArgumentException  ("Codepoint out of bounds");
4851        }
4852        return PROPERTY_.getAge(ch);
4853    }
4854    
4855    /**
4856     * <p>Check a binary Unicode property for a code point.</p> 
4857     * <p>Unicode, especially in version 3.2, defines many more properties 
4858     * than the original set in UnicodeData.txt.</p>
4859     * <p>This API is intended to reflect Unicode properties as defined in 
4860     * the Unicode Character Database (UCD) and Unicode Technical Reports 
4861     * (UTR).</p>
4862     * <p>For details about the properties see 
4863     * <a HREF=http://www.unicode.org/>http://www.unicode.org/</a>.</p>
4864     * <p>For names of Unicode properties see the UCD file 
4865     * PropertyAliases.txt.</p>
4866     * <p>This API does not check the validity of the codepoint.</p>
4867     * <p>Important: If ICU is built with UCD files from Unicode versions 
4868     * below 3.2, then properties marked with "new" are not or 
4869     * not fully available.</p>
4870     * @param ch code point to test.
4871     * @param property selector constant from com.ibm.icu.lang.UProperty, 
4872     *        identifies which binary property to check.
4873     * @return true or false according to the binary Unicode property value 
4874     *         for ch. Also false if property is out of bounds or if the 
4875     *         Unicode version does not have data for the property at all, or 
4876     *         not for this code point.
4877     * @see com.ibm.icu.lang.UProperty
4878     * @stable ICU 2.6
4879     */
4880    public static boolean hasBinaryProperty(int ch, int property) 
4881    {
4882    if (ch < MIN_VALUE || ch > MAX_VALUE) {
4883        throw new IllegalArgumentException  ("Codepoint out of bounds");
4884        }
4885        return PROPERTY_.hasBinaryProperty(ch, property);
4886    }
4887        
4888    /**
4889     * <p>Check if a code point has the Alphabetic Unicode property.</p> 
4890     * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).</p>
4891     * <p>Different from UCharacter.isLetter(ch)!</p> 
4892     * @stable ICU 2.6
4893     * @param ch codepoint to be tested
4894     */
4895    public static boolean isUAlphabetic(int ch)
4896    {
4897    return hasBinaryProperty(ch, UProperty.ALPHABETIC);
4898    }
4899
4900    /**
4901     * <p>Check if a code point has the Lowercase Unicode property.</p>
4902     * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).</p>
4903     * <p>This is different from UCharacter.isLowerCase(ch)!</p>
4904     * @param ch codepoint to be tested
4905     * @stable ICU 2.6
4906     */
4907    public static boolean isULowercase(int ch) 
4908    {
4909    return hasBinaryProperty(ch, UProperty.LOWERCASE);
4910    }
4911
4912    /**
4913     * <p>Check if a code point has the Uppercase Unicode property.</p>
4914     * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).</p>
4915     * <p>This is different from UCharacter.isUpperCase(ch)!</p>
4916     * @param ch codepoint to be tested
4917     * @stable ICU 2.6
4918     */
4919    public static boolean isUUppercase(int ch) 
4920    {
4921    return hasBinaryProperty(ch, UProperty.UPPERCASE);
4922    }
4923
4924    /**
4925     * <p>Check if a code point has the White_Space Unicode property.</p>
4926     * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).</p>
4927     * <p>This is different from both UCharacter.isSpace(ch) and 
4928     * UCharacter.isWhitespace(ch)!</p>
4929     * @param ch codepoint to be tested
4930     * @stable ICU 2.6
4931     */
4932    public static boolean isUWhiteSpace(int ch) 
4933    {
4934    return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
4935    }
4936
4937
4938    /**
4939     * <p>Gets the property value for an Unicode property type of a code point. 
4940     * Also returns binary and mask property values.</p>
4941     * <p>Unicode, especially in version 3.2, defines many more properties than 
4942     * the original set in UnicodeData.txt.</p>
4943     * <p>The properties APIs are intended to reflect Unicode properties as 
4944     * defined in the Unicode Character Database (UCD) and Unicode Technical 
4945     * Reports (UTR). For details about the properties see 
4946     * http://www.unicode.org/.</p>
4947     * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
4948     * </p>
4949     * <pre>
4950     * Sample usage:
4951     * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
4952     * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
4953     * boolean b = (ideo == 1) ? true : false; 
4954     * </pre>
4955     * @param ch code point to test.
4956     * @param type UProperty selector constant, identifies which binary 
4957     *        property to check. Must be 
4958     *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or 
4959     *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or 
4960     *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
4961     * @return numeric value that is directly the property value or,
4962     *         for enumerated properties, corresponds to the numeric value of 
4963     *         the enumerated constant of the respective property value 
4964     *         enumeration type (cast to enum type if necessary).
4965     *         Returns 0 or 1 (for false / true) for binary Unicode properties.
4966     *         Returns a bit-mask for mask properties.
4967     *         Returns 0 if 'type' is out of bounds or if the Unicode version
4968     *         does not have data for the property at all, or not for this code 
4969     *         point.
4970     * @see UProperty
4971     * @see #hasBinaryProperty
4972     * @see #getIntPropertyMinValue
4973     * @see #getIntPropertyMaxValue
4974     * @see #getUnicodeVersion
4975     * @stable ICU 2.4
4976     */
4977    public static int getIntPropertyValue(int ch, int type)
4978    {
4979        if (type < UProperty.BINARY_START) {
4980            return 0; // undefined
4981        } 
4982        else if (type < UProperty.BINARY_LIMIT) {
4983            return hasBinaryProperty(ch, type) ? 1 : 0;
4984        } 
4985        else if (type < UProperty.INT_START) {
4986            return 0; // undefined
4987        } 
4988        else if (type < UProperty.INT_LIMIT) {
4989            //int result = 0;
4990            switch (type) {
4991            case UProperty.BIDI_CLASS:
4992                return getDirection(ch);
4993            case UProperty.BLOCK:
4994                return UnicodeBlock.idOf(ch);
4995            case UProperty.CANONICAL_COMBINING_CLASS:
4996                return getCombiningClass(ch);
4997            case UProperty.DECOMPOSITION_TYPE:
4998                return PROPERTY_.getAdditional(ch, 2) 
4999            & DECOMPOSITION_TYPE_MASK_;
5000            case UProperty.EAST_ASIAN_WIDTH:
5001                return (PROPERTY_.getAdditional(ch, 0)
5002            & EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_;
5003            case UProperty.GENERAL_CATEGORY:
5004                return getType(ch);
5005            case UProperty.JOINING_GROUP:
5006                return gBdp.getJoiningGroup(ch);
5007            case UProperty.JOINING_TYPE:
5008                return gBdp.getJoiningType(ch);
5009            case UProperty.LINE_BREAK:
5010                return (int)(PROPERTY_.getAdditional(ch, 0)& LINE_BREAK_MASK_)>>LINE_BREAK_SHIFT_;
5011            case UProperty.NUMERIC_TYPE:
5012                type=getNumericType(PROPERTY_.getProperty(ch));
5013                if(type>NumericType.NUMERIC) {
5014                    /* keep internal variants of NumericType.NUMERIC from becoming visible */
5015                    type=NumericType.NUMERIC;
5016                }
5017                return type;
5018            case UProperty.SCRIPT:
5019                return UScript.getScript(ch);
5020            case UProperty.HANGUL_SYLLABLE_TYPE:
5021        /* purely algorithmic; hardcode known characters, check for assigned new ones */ 
5022        if(ch<NormalizerImpl.JAMO_L_BASE) { 
5023            /* NA */ 
5024        } else if(ch<=0x11ff) { 
5025            /* Jamo range */ 
5026            if(ch<=0x115f) { 
5027            /* Jamo L range, HANGUL CHOSEONG ... */ 
5028            if(ch==0x115f || ch<=0x1159 || getType(ch)==UCharacterCategory.OTHER_LETTER) { 
5029                return HangulSyllableType.LEADING_JAMO; 
5030            } 
5031            } else if(ch<=0x11a7) { 
5032            /* Jamo V range, HANGUL JUNGSEONG ... */ 
5033            if(ch<=0x11a2 || getType(ch)==UCharacterCategory.OTHER_LETTER) { 
5034                return HangulSyllableType.VOWEL_JAMO; 
5035            } 
5036            } else { 
5037            /* Jamo T range */ 
5038            if(ch<=0x11f9 || getType(ch)==UCharacterCategory.OTHER_LETTER) { 
5039                return HangulSyllableType.TRAILING_JAMO; 
5040            } 
5041            } 
5042        } else if((ch-=NormalizerImpl.HANGUL_BASE)<0) { 
5043            /* NA */ 
5044        } else if(ch<NormalizerImpl.HANGUL_COUNT) { 
5045            /* Hangul syllable */ 
5046            return ch%NormalizerImpl.JAMO_T_COUNT==0 ? HangulSyllableType.LV_SYLLABLE : HangulSyllableType.LVT_SYLLABLE; 
5047        } 
5048        return 0; /* NA */ 
5049
5050            case UProperty.NFD_QUICK_CHECK:
5051            case UProperty.NFKD_QUICK_CHECK:
5052            case UProperty.NFC_QUICK_CHECK:
5053            case UProperty.NFKC_QUICK_CHECK:
5054                return NormalizerImpl.quickCheck(ch, (type-UProperty.NFD_QUICK_CHECK)+2); // 2=UNORM_NFD
5055            case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
5056                return NormalizerImpl.getFCD16(ch)>>8;
5057            case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
5058                return NormalizerImpl.getFCD16(ch)&0xff;
5059            case UProperty.GRAPHEME_CLUSTER_BREAK:
5060                return (int)(PROPERTY_.getAdditional(ch, 2)& GCB_MASK)>>GCB_SHIFT;
5061            case UProperty.SENTENCE_BREAK:
5062                return (int)(PROPERTY_.getAdditional(ch, 2)& SB_MASK)>>SB_SHIFT;
5063            case UProperty.WORD_BREAK:
5064                return (int)(PROPERTY_.getAdditional(ch, 2)& WB_MASK)>>WB_SHIFT;
5065            default:
5066               
5067        return 0; /* undefined */
5068            }
5069        } else if (type == UProperty.GENERAL_CATEGORY_MASK) {
5070            return UCharacterProperty.getMask(getType(ch));
5071        }
5072        return 0; // undefined
5073    }
5074    /**
5075     * Returns a string version of the property value.
5076     * @param propertyEnum
5077     * @param codepoint
5078     * @param nameChoice
5079     * @return value as string
5080     * @internal
5081     * @deprecated This API is ICU internal only.
5082     */
5083    public static String   getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5084        // TODO some of these are less efficient, since a string is forced!
5085        if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5086                (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5087            return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), nameChoice);
5088        }
5089        if (propertyEnum == UProperty.NUMERIC_VALUE) {
5090                return String.valueOf(getUnicodeNumericValue(codepoint));
5091        }
5092        // otherwise must be string property
5093        switch (propertyEnum) {
5094        case UProperty.AGE: return getAge(codepoint).toString();
5095        case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5096        case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(getMirror(codepoint));
5097        case UProperty.CASE_FOLDING: return foldCase(UTF16.valueOf(codepoint), true);
5098        case UProperty.LOWERCASE_MAPPING: return toLowerCase(UTF16.valueOf(codepoint));
5099        case UProperty.NAME: return getName(codepoint);
5100        case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(foldCase(codepoint,true));
5101        case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(toLowerCase(codepoint));
5102        case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(toTitleCase(codepoint));
5103        case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(toUpperCase(codepoint));
5104        case UProperty.TITLECASE_MAPPING: return toTitleCase(UTF16.valueOf(codepoint),null);
5105        case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5106        case UProperty.UPPERCASE_MAPPING: return toUpperCase(UTF16.valueOf(codepoint));        
5107        }
5108        throw new IllegalArgumentException  ("Illegal Property Enum");
5109    }
5110    
5111    /**
5112     * Get the minimum value for an integer/binary Unicode property type.
5113     * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5114     * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5115     * @param type UProperty selector constant, identifies which binary 
5116     *        property to check. Must be 
5117     *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or 
5118     *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5119     * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 
5120     *         for a Unicode property. 0 if the property 
5121     *         selector 'type' is out of range.
5122     * @see UProperty
5123     * @see #hasBinaryProperty
5124     * @see #getUnicodeVersion
5125     * @see #getIntPropertyMaxValue
5126     * @see #getIntPropertyValue
5127     * @stable ICU 2.4
5128     */
5129    public static int getIntPropertyMinValue(int type)
5130    {
5131
5132        return 0; // undefined; and: all other properties have a minimum value 
5133    // of 0
5134    }
5135
5136    
5137    /**
5138     * Get the maximum value for an integer/binary Unicode property.
5139     * Can be used together with UCharacter.getIntPropertyMinValue(int)
5140     * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5141     * Examples for min/max values (for Unicode 3.2):
5142     * <ul>
5143     * <li> UProperty.BIDI_CLASS:    0/18 (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5144     * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5145     * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5146     * </ul>
5147     * For undefined UProperty constant values, min/max values will be 0/-1.
5148     * @param type UProperty selector constant, identifies which binary 
5149     *        property to check. Must be 
5150     *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or 
5151     *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5152     * @return Maximum value returned by u_getIntPropertyValue for a Unicode 
5153     *         property. &lt;= 0 if the property selector 'type' is out of range.
5154     * @see UProperty
5155     * @see #hasBinaryProperty
5156     * @see #getUnicodeVersion
5157     * @see #getIntPropertyMaxValue
5158     * @see #getIntPropertyValue
5159     * @stable ICU 2.4
5160     */
5161    public static int getIntPropertyMaxValue(int type)
5162    { 
5163        if (type < UProperty.BINARY_START) {
5164            return -1; // undefined
5165        } 
5166        else if (type < UProperty.BINARY_LIMIT) {
5167            return 1; // maximum TRUE for all binary properties
5168        } 
5169        else if (type < UProperty.INT_START) {
5170            return -1; // undefined
5171        } 
5172        else if (type < UProperty.INT_LIMIT) {
5173            switch (type) {
5174            case UProperty.BIDI_CLASS:
5175            case UProperty.JOINING_GROUP:
5176            case UProperty.JOINING_TYPE:
5177                return gBdp.getMaxValue(type);
5178            case UProperty.BLOCK:
5179                return (PROPERTY_.getMaxValues(0) & BLOCK_MASK_) >> BLOCK_SHIFT_;
5180            case UProperty.CANONICAL_COMBINING_CLASS:
5181            case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
5182            case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
5183                return 0xff; // TODO do we need to be more precise, 
5184                             // getting the actual maximum?
5185            case UProperty.DECOMPOSITION_TYPE:
5186                return PROPERTY_.getMaxValues(2) & DECOMPOSITION_TYPE_MASK_;
5187            case UProperty.EAST_ASIAN_WIDTH:
5188                return (PROPERTY_.getMaxValues(0) & EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_;
5189            case UProperty.GENERAL_CATEGORY:
5190                return UCharacterCategory.CHAR_CATEGORY_COUNT - 1;
5191            case UProperty.LINE_BREAK:
5192                return (PROPERTY_.getMaxValues(0) & LINE_BREAK_MASK_) >> LINE_BREAK_SHIFT_;
5193            case UProperty.NUMERIC_TYPE:
5194                return NumericType.COUNT - 1;
5195            case UProperty.SCRIPT:
5196                return PROPERTY_.getMaxValues(0) & SCRIPT_MASK_;   
5197            case UProperty.HANGUL_SYLLABLE_TYPE:
5198                return HangulSyllableType.COUNT-1;
5199            case UProperty.NFD_QUICK_CHECK:
5200            case UProperty.NFKD_QUICK_CHECK:
5201                return 1; // YES -- these are never "maybe", only "no" or "yes"
5202            case UProperty.NFC_QUICK_CHECK:
5203            case UProperty.NFKC_QUICK_CHECK:
5204                return 2; // MAYBE
5205            case UProperty.GRAPHEME_CLUSTER_BREAK:
5206                return (PROPERTY_.getMaxValues(2) & GCB_MASK) >> GCB_SHIFT;
5207            case UProperty.SENTENCE_BREAK:
5208                return (PROPERTY_.getMaxValues(2) & SB_MASK) >> SB_SHIFT;
5209            case UProperty.WORD_BREAK:
5210                return (PROPERTY_.getMaxValues(2) & WB_MASK) >> WB_SHIFT;
5211            default:
5212                return -1; // undefined
5213            }
5214
5215        }
5216        return -1; // undefined
5217    }
5218
5219    /**
5220     * Provide the java.lang.Character forDigit API, for convenience.
5221     * @stable ICU 3.0
5222     */
5223    public static char forDigit(int digit, int radix) {
5224        return java.lang.Character.forDigit(digit, radix);
5225    }
5226
5227    // JDK 1.5 API coverage
5228
5229    /**
5230     * Cover the JDK 1.5 API, for convenience.
5231     * @see UTF16#LEAD_SURROGATE_MIN_VALUE
5232     * @stable ICU 3.0
5233     */
5234    public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE;
5235
5236    /**
5237     * Cover the JDK 1.5 API, for convenience.
5238     * @see UTF16#LEAD_SURROGATE_MAX_VALUE
5239     * @stable ICU 3.0
5240     */
5241    public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE;
5242
5243    /**
5244     * Cover the JDK 1.5 API, for convenience.
5245     * @see UTF16#TRAIL_SURROGATE_MIN_VALUE
5246     * @stable ICU 3.0
5247     */
5248    public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE;
5249
5250    /**
5251     * Cover the JDK 1.5 API, for convenience.
5252     * @see UTF16#TRAIL_SURROGATE_MAX_VALUE
5253     * @stable ICU 3.0
5254     */
5255    public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE;
5256
5257    /**
5258     * Cover the JDK 1.5 API, for convenience.
5259     * @see UTF16#SURROGATE_MIN_VALUE
5260     * @stable ICU 3.0
5261     */
5262    public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE;
5263
5264    /**
5265     * Cover the JDK 1.5 API, for convenience.
5266     * @see UTF16#SURROGATE_MAX_VALUE
5267     * @stable ICU 3.0
5268     */
5269    public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE;
5270
5271    /**
5272     * Cover the JDK 1.5 API, for convenience.
5273     * @see UTF16#SUPPLEMENTARY_MIN_VALUE
5274     * @stable ICU 3.0
5275     */
5276    public static final int  MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE;
5277
5278    /**
5279     * Cover the JDK 1.5 API, for convenience.
5280     * @see UTF16#CODEPOINT_MAX_VALUE
5281     * @stable ICU 3.0
5282     */
5283    public static final int  MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE;
5284
5285    /**
5286     * Cover the JDK 1.5 API, for convenience.
5287     * @see UTF16#CODEPOINT_MIN_VALUE
5288     * @stable ICU 3.0
5289     */
5290    public static final int  MIN_CODE_POINT = UTF16.CODEPOINT_MIN_VALUE;
5291
5292    /**
5293     * Cover the JDK 1.5 API, for convenience.
5294     * @param cp the code point to check
5295     * @return true if cp is a valid code point
5296     * @stable ICU 3.0
5297     */
5298    public static final boolean isValidCodePoint(int cp) {
5299        return cp >= 0 && cp <= MAX_CODE_POINT;
5300    }
5301
5302    /**
5303     * Cover the JDK 1.5 API, for convenience.
5304     * @param cp the code point to check
5305     * @return true if cp is a supplementary code point
5306     * @stable ICU 3.0
5307     */
5308    public static final boolean isSupplementaryCodePoint(int cp) {
5309        return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE
5310            && cp <= UTF16.CODEPOINT_MAX_VALUE;
5311    }
5312
5313    /**
5314     * Cover the JDK 1.5 API, for convenience.
5315     * @param ch the char to check
5316     * @return true if ch is a high (lead) surrogate
5317     * @stable ICU 3.0
5318     */
5319    public static boolean isHighSurrogate(char ch) {
5320        return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
5321    }
5322
5323    /**
5324     * Cover the JDK 1.5 API, for convenience.
5325     * @param ch the char to check
5326     * @return true if ch is a low (trail) surrogate
5327     * @stable ICU 3.0
5328     */
5329    public static boolean isLowSurrogate(char ch) {
5330        return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
5331    }
5332
5333    /**
5334     * Cover the JDK 1.5 API, for convenience.  Return true if the chars
5335     * form a valid surrogate pair.
5336     * @param high the high (lead) char
5337     * @param low the low (trail) char
5338     * @return true if high, low form a surrogate pair
5339     * @stable ICU 3.0
5340     */
5341    public static final boolean isSurrogatePair(char high, char low) {
5342        return isHighSurrogate(high) && isLowSurrogate(low);
5343    }
5344
5345    /**
5346     * Cover the JDK 1.5 API, for convenience.  Return the number of chars needed
5347     * to represent the code point.  This does not check the
5348     * code point for validity.
5349     * @param cp the code point to check
5350     * @return the number of chars needed to represent the code point
5351     * @see UTF16#getCharCount
5352     * @stable ICU 3.0
5353     */
5354    public static int charCount(int cp) {
5355        return UTF16.getCharCount(cp);
5356    }
5357
5358    /**
5359     * Cover the JDK 1.5 API, for convenience.  Return the code point represented by
5360     * the characters.  This does not check the surrogate pair for validity.
5361     * @param high the high (lead) surrogate
5362     * @param low the low (trail) surrogate
5363     * @return the code point formed by the surrogate pair
5364     * @stable ICU 3.0
5365     */
5366    public static final int toCodePoint(char high, char low) {
5367        return UCharacterProperty.getRawSupplementary(high, low);
5368    }
5369
5370    /**
5371     * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5372     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5373     * API.  This examines only the characters at index and index+1.
5374     * @param seq the characters to check
5375     * @param index the index of the first or only char forming the code point
5376     * @return the code point at the index
5377     * @stable ICU 3.0
5378     */
5379//#ifdef FOUNDATION
5380    public static final int codePointAt(String   seq, int index) {
5381        char c1 = seq.charAt(index++);
5382        if (isHighSurrogate(c1)) {
5383            if (index < seq.length()) {
5384                char c2 = seq.charAt(index);
5385                if (isLowSurrogate(c2)) {
5386                    return toCodePoint(c1, c2);
5387                }
5388            }
5389        }
5390        return c1;
5391    }
5392    public static final int codePointAt(StringBuffer   seq, int index) {
5393        return codePointAt(seq.toString(), index);
5394    }
5395//#else
5396//##    public static final int codePointAt(CharSequence seq, int index) {
5397//##        char c1 = seq.charAt(index++);
5398//##        if (isHighSurrogate(c1)) {
5399//##            if (index < seq.length()) {
5400//##                char c2 = seq.charAt(index);
5401//##                if (isLowSurrogate(c2)) {
5402//##                    return toCodePoint(c1, c2);
5403//##                }
5404//##            }
5405//##        }
5406//##        return c1;
5407//##    }
5408//#endif
5409//#ifdef ECLIPSE_FRAGMENT
5410//##    public static final int codePointAt(String seq, int index) {
5411//##        return codePointAt((CharSequence)seq, index);
5412//##    }
5413//##    public static final int codePointAt(StringBuffer seq, int index) {
5414//##        return codePointAt((CharSequence)seq, index);
5415//##    }
5416//#endif
5417
5418    /**
5419     * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5420     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5421     * API.  This examines only the characters at index and index+1.
5422     * @param text the characters to check
5423     * @param index the index of the first or only char forming the code point
5424     * @return the code point at the index
5425     * @stable ICU 3.0
5426     */
5427    public static final int codePointAt(char[] text, int index) {
5428        char c1 = text[index++];
5429        if (isHighSurrogate(c1)) {
5430            if (index < text.length) {
5431                char c2 = text[index];
5432                if (isLowSurrogate(c2)) {
5433                    return toCodePoint(c1, c2);
5434                }
5435            }
5436        }
5437        return c1;
5438    }
5439
5440    /**
5441     * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5442     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5443     * API.  This examines only the characters at index and index+1.
5444     * @param text the characters to check
5445     * @param index the index of the first or only char forming the code point
5446     * @param limit the limit of the valid text
5447     * @return the code point at the index
5448     * @stable ICU 3.0
5449     */
5450    public static final int codePointAt(char[] text, int index, int limit) {
5451    if (index >= limit || limit > text.length) {
5452        throw new IndexOutOfBoundsException  ();
5453    }
5454        char c1 = text[index++];
5455        if (isHighSurrogate(c1)) {
5456            if (index < limit) {
5457                char c2 = text[index];
5458                if (isLowSurrogate(c2)) {
5459                    return toCodePoint(c1, c2);
5460                }
5461            }
5462        }
5463        return c1;
5464    }
5465
5466    /**
5467     * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5468     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5469     * API.  This examines only the characters at index-1 and index-2.
5470     * @param seq the characters to check
5471     * @param index the index after the last or only char forming the code point
5472     * @return the code point before the index
5473     * @stable ICU 3.0
5474     */
5475//#ifdef FOUNDATION
5476    public static final int codePointBefore(StringBuffer   seq, int index) {
5477        return codePointBefore(seq.toString(), index);
5478    }
5479    public static final int codePointBefore(String   seq, int index) {
5480        char c2 = seq.charAt(--index);
5481        if (isLowSurrogate(c2)) {
5482            if (index > 0) {
5483                char c1 = seq.charAt(--index);
5484                if (isHighSurrogate(c1)) {
5485                    return toCodePoint(c1, c2);
5486                }
5487            }
5488        }
5489        return c2;
5490    }
5491//#else
5492//##    public static final int codePointBefore(CharSequence seq, int index) {
5493//##        char c2 = seq.charAt(--index);
5494//##        if (isLowSurrogate(c2)) {
5495//##            if (index > 0) {
5496//##                char c1 = seq.charAt(--index);
5497//##                if (isHighSurrogate(c1)) {
5498//##                    return toCodePoint(c1, c2);
5499//##                }
5500//##            }
5501//##        }
5502//##        return c2;
5503//##    }
5504//#endif
5505//#ifdef ECLIPSE_FRAGMENT
5506//##    public static final int codePointBefore(String seq, int index) {
5507//##        return codePointBefore((CharSequence)seq, index);
5508//##    }
5509//##    public static final int codePointBefore(StringBuffer seq, int index) {
5510//##        return codePointBefore((CharSequence)seq, index);
5511//##    }
5512//#endif
5513
5514    /**
5515     * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5516     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5517     * API.  This examines only the characters at index-1 and index-2.
5518     * @param text the characters to check
5519     * @param index the index after the last or only char forming the code point
5520     * @return the code point before the index
5521     * @stable ICU 3.0
5522     */
5523    public static final int codePointBefore(char[] text, int index) {
5524        char c2 = text[--index];
5525        if (isLowSurrogate(c2)) {
5526            if (index > 0) {
5527                char c1 = text[--index];
5528                if (isHighSurrogate(c1)) {
5529                    return toCodePoint(c1, c2);
5530                }
5531            }
5532        }
5533        return c2;
5534    }
5535
5536    /**
5537     * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5538     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5539     * API.  This examines only the characters at index-1 and index-2.
5540     * @param text the characters to check
5541     * @param index the index after the last or only char forming the code point
5542     * @param limit the start of the valid text
5543     * @return the code point before the index
5544     * @stable ICU 3.0
5545     */
5546    public static final int codePointBefore(char[] text, int index, int limit) {
5547    if (index <= limit || limit < 0) {
5548        throw new IndexOutOfBoundsException  ();
5549    }
5550        char c2 = text[--index];
5551        if (isLowSurrogate(c2)) {
5552            if (index > limit) {
5553                char c1 = text[--index];
5554                if (isHighSurrogate(c1)) {
5555                    return toCodePoint(c1, c2);
5556                }
5557            }
5558        }
5559        return c2;
5560    }
5561
5562    /**
5563     * Cover the JDK 1.5 API, for convenience.  Writes the chars representing the
5564     * code point into the destination at the given index.
5565     * @param cp the code point to convert
5566     * @param dst the destination array into which to put the char(s) representing the code point
5567     * @param dstIndex the index at which to put the first (or only) char
5568     * @return the count of the number of chars written (1 or 2)
5569     * @throws IllegalArgumentException if cp is not a valid code point
5570     * @stable ICU 3.0
5571     */
5572    public static final int toChars(int cp, char[] dst, int dstIndex) {
5573        if (cp >= 0) {
5574            if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
5575                dst[dstIndex] = (char)cp;
5576                return 1;
5577            }
5578            if (cp <= MAX_CODE_POINT) {
5579                dst[dstIndex] = UTF16.getLeadSurrogate(cp);
5580                dst[dstIndex+1] = UTF16.getTrailSurrogate(cp);
5581                return 2;
5582            }
5583        }
5584        throw new IllegalArgumentException  ();
5585    }
5586
5587    /**
5588     * Cover the JDK 1.5 API, for convenience.  Returns a char array
5589     * representing the code point.
5590     * @param cp the code point to convert
5591     * @return an array containing the char(s) representing the code point
5592     * @throws IllegalArgumentException if cp is not a valid code point
5593     * @stable ICU 3.0
5594     */
5595    public static final char[] toChars(int cp) {
5596        if (cp >= 0) {
5597            if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
5598                return new char[] { (char)cp };
5599            }
5600            if (cp <= MAX_CODE_POINT) {
5601                return new char[] {
5602                    UTF16.getLeadSurrogate(cp),
5603                    UTF16.getTrailSurrogate(cp)
5604                };
5605            }
5606        }
5607        throw new IllegalArgumentException  ();
5608    }
5609
5610    /**
5611     * Cover the JDK API, for convenience.  Return a byte representing the directionality of
5612     * the character.
5613     * <br/><b>Note</b>: Unlike the JDK, this returns DIRECTIONALITY_LEFT_TO_RIGHT for undefined or
5614     * out-of-bounds characters.  <br/><b>Note</b>: The return value must be
5615     * tested using the constants defined in {@link UCharacterEnums.ECharacterDirection}
5616     * since the values are different from the ones defined by <code>java.lang.Character</code>.
5617     * @param cp the code point to check
5618     * @return the directionality of the code point
5619     * @see #getDirection
5620     * @stable ICU 3.0
5621     */
5622    public static byte getDirectionality(int cp)
5623    {
5624        return (byte)getDirection(cp);
5625    }
5626
5627    /**
5628     * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
5629     * @param text the characters to check
5630     * @param start the start of the range
5631     * @param limit the limit of the range
5632     * @return the number of code points in the range
5633     * @stable ICU 3.0
5634     */
5635//#ifdef FOUNDATION
5636    public static int codePointCount(String   text, int start, int limit) {
5637        if (start < 0 || limit < start || limit > text.length()) {
5638            throw new IndexOutOfBoundsException  ("start (" + start +
5639                ") or limit (" + limit +
5640                ") invalid or out of range 0, " + text.length());
5641        }
5642
5643        int len = limit - start;
5644        while (limit > start) {
5645            char ch = text.charAt(--limit);
5646            while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5647                ch = text.charAt(--limit);
5648                if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5649                    --len;
5650                    break;
5651                }
5652            }
5653        }
5654        return len;
5655    }
5656//#else
5657//##    public static int codePointCount(CharSequence text, int start, int limit) {
5658//##        if (start < 0 || limit < start || limit > text.length()) {
5659//##            throw new IndexOutOfBoundsException("start (" + start +
5660//##                ") or limit (" + limit +
5661//##                ") invalid or out of range 0, " + text.length());
5662//##        }
5663//##
5664//##        int len = limit - start;
5665//##        while (limit > start) {
5666//##            char ch = text.charAt(--limit);
5667//##            while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5668//##                ch = text.charAt(--limit);
5669//##                if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5670//##                    --len;
5671//##                    break;
5672//##                }
5673//##            }
5674//##        }
5675//##        return len;
5676//##    }
5677//#endif
5678//#ifdef ECLIPSE_FRAGMENT
5679//##    public static int codePointCount(String text, int start, int limit) {
5680//##        return codePointCount((CharSequence)text, start, limit);
5681//##    }
5682//#endif
5683
5684    /**
5685     * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
5686     * @param text the characters to check
5687     * @param start the start of the range
5688     * @param limit the limit of the range
5689     * @return the number of code points in the range
5690     * @stable ICU 3.0
5691     */
5692    public static int codePointCount(char[] text, int start, int limit) {
5693        if (start < 0 || limit < start || limit > text.length) {
5694            throw new IndexOutOfBoundsException  ("start (" + start +
5695                                                ") or limit (" + limit +
5696                                                ") invalid or out of range 0, " + text.length);
5697        }
5698
5699        int len = limit - start;
5700        while (limit > start) {
5701            char ch = text[--limit];
5702            while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5703                ch = text[--limit];
5704                if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5705                    --len;
5706                    break;
5707                }
5708            }
5709        }
5710        return len;
5711    }
5712
5713    /**
5714     * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
5715     * @param text the characters to check
5716     * @param index the index to adjust
5717     * @param codePointOffset the number of code points by which to offset the index
5718     * @return the adjusted index
5719     * @stable ICU 3.0
5720     */
5721//#ifdef FOUNDATION
5722    public static int offsetByCodePoints(String   text, int index, int codePointOffset) {
5723        if (index < 0 || index > text.length()) {
5724            throw new IndexOutOfBoundsException  ("index ( " + index +
5725                                                ") out of range 0, " + text.length());
5726        }
5727
5728        if (codePointOffset < 0) {
5729            while (++codePointOffset <= 0) {
5730                char ch = text.charAt(--index);
5731                while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
5732                    ch = text.charAt(--index);
5733                    if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5734                        if (++codePointOffset > 0) {
5735                            return index+1;
5736                        }
5737                    }
5738                }
5739            }
5740        } else {
5741            int limit = text.length();
5742            while (--codePointOffset >= 0) {
5743                char ch = text.charAt(index++);
5744                while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5745                    ch = text.charAt(index++);
5746                    if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5747                        if (--codePointOffset < 0) {
5748                            return index-1;
5749                        }
5750                    }
5751                }
5752            }
5753        }
5754
5755        return index;
5756    }
5757//#else
5758//##    public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
5759//##        if (index < 0 || index > text.length()) {
5760//##            throw new IndexOutOfBoundsException("index ( " + index +
5761//##                                                ") out of range 0, " + text.length());
5762//##        }
5763//##
5764//##        if (codePointOffset < 0) {
5765//##            while (++codePointOffset <= 0) {
5766//##                char ch = text.charAt(--index);
5767//##                while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
5768//##                    ch = text.charAt(--index);
5769//##                    if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5770//##                        if (++codePointOffset > 0) {
5771//##                            return index+1;
5772//##                        }
5773//##                    }
5774//##                }
5775//##            }
5776//##        } else {
5777//##            int limit = text.length();
5778//##            while (--codePointOffset >= 0) {
5779//##                char ch = text.charAt(index++);
5780//##                while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5781//##                    ch = text.charAt(index++);
5782//##                    if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5783//##                        if (--codePointOffset < 0) {
5784//##                            return index-1;
5785//##                        }
5786//##                    }
5787//##                }
5788//##            }
5789//##        }
5790//##
5791//##        return index;
5792//##    }
5793//#endif
5794//#ifdef ECLIPSE_FRAGMENT
5795//##    public static int offsetByCodePoints(String text, int index, int codePointOffset) {
5796//##        return offsetByCodePoints((CharSequence)text, index, codePointOffset);
5797//##    }
5798//#endif
5799    /**
5800     * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
5801     * @param text the characters to check
5802     * @param start the start of the range to check
5803     * @param count the length of the range to check
5804     * @param index the index to adjust
5805     * @param codePointOffset the number of code points by which to offset the index
5806     * @return the adjusted index
5807     * @stable ICU 3.0
5808     */
5809    public static int offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset) {
5810        int limit = start + count;
5811        if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
5812            throw new IndexOutOfBoundsException  ("index ( " + index +
5813                                                ") out of range " + start +
5814                                                ", " + limit +
5815                                                " in array 0, " + text.length);
5816        }
5817
5818        if (codePointOffset < 0) {
5819            while (++codePointOffset <= 0) {
5820                char ch = text[--index];
5821                if (index < start) {
5822                    throw new IndexOutOfBoundsException  ("index ( " + index +
5823                                                        ") < start (" + start +
5824                                                        ")");
5825                }
5826                while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
5827                    ch = text[--index];
5828                    if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5829                        if (++codePointOffset > 0) {
5830                            return index+1;
5831                        }
5832                    }
5833                }
5834            }
5835        } else {
5836            while (--codePointOffset >= 0) {
5837                char ch = text[index++];
5838                if (index > limit) {
5839                    throw new IndexOutOfBoundsException  ("index ( " + index +
5840                                                        ") > limit (" + limit +
5841                                                        ")");
5842                }
5843                while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5844                    ch = text[index++];
5845                    if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5846                        if (--codePointOffset < 0) {
5847                            return index-1;
5848                        }
5849                    }
5850                }
5851            }
5852        }
5853
5854        return index;
5855    }
5856
5857    // protected data members --------------------------------------------
5858    
5859    /**
5860     * Database storing the sets of character name
5861     */
5862    static UCharacterName NAME_ = null;
5863
5864    /**
5865     * Singleton object encapsulating the imported pnames.icu property aliases
5866     */
5867    static UPropertyAliases PNAMES_ = null;
5868      
5869    // block to initialise name database and unicode 1.0 data 
5870    static {
5871        try {
5872            PNAMES_ = new UPropertyAliases();
5873            NAME_ = UCharacterName.getInstance();
5874        } catch (IOException   e) {
5875            // e.printStackTrace();
5876            throw new MissingResourceException  (e.getMessage(),"","");
5877            //throw new RuntimeException(e.getMessage());
5878            // DONOT throw an exception
5879            // we might be building ICU modularly wothout names.icu and
5880            // pnames.icu
5881        }
5882    }
5883        
5884    // private variables -------------------------------------------------
5885    
5886    /**
5887     * Database storing the sets of character property
5888     */
5889    private static final UCharacterProperty PROPERTY_;
5890    /**
5891     * For optimization
5892     */
5893    private static final char[] PROPERTY_TRIE_INDEX_;
5894    private static final char[] PROPERTY_TRIE_DATA_;
5895    private static final int PROPERTY_INITIAL_VALUE_;
5896
5897    private static final UCaseProps gCsp;
5898    private static final UBiDiProps gBdp;
5899
5900    // block to initialise character property database
5901    static
5902    {
5903        try
5904        {
5905            PROPERTY_ = UCharacterProperty.getInstance();
5906            PROPERTY_TRIE_INDEX_ = PROPERTY_.m_trieIndex_;
5907            PROPERTY_TRIE_DATA_ = PROPERTY_.m_trieData_;
5908            PROPERTY_INITIAL_VALUE_ = PROPERTY_.m_trieInitialValue_;
5909        }
5910        catch (Exception   e)
5911        {
5912            throw new MissingResourceException  (e.getMessage(),"","");
5913        }
5914
5915        /*
5916         * In ICU4J 3.2, most Unicode properties were loaded from uprops.icu.
5917         * ICU4J 3.4 adds ucase.icu for case mapping properties and
5918         * ubidi.icu for bidi/shaping properties and
5919         * removes case/bidi/shaping properties from uprops.icu.
5920         *
5921         * Loading of uprops.icu was always done during class loading of UCharacter.class.
5922         * In order to maintain performance for all such properties,
5923         * ucase.icu and ubidi.icu are also loaded during class loading of UCharacter.class.
5924         * It will not fail if they are missing.
5925         * These data items are loaded early to avoid having to synchronize access to them,
5926         * for thread safety and performance.
5927         *
5928         * We try to load these data items at most once.
5929         * If it works, we use the resulting singleton object.
5930         * If it fails, then we get a dummy object, which always works unless
5931         * we are seriously out of memory.
5932         * After UCharacter.class loading, we have a never-changing pointer to either the
5933         * real singleton or the dummy.
5934         *
5935         * This method is used in Unicode properties APIs that
5936         * do not have a service object and also do not have an error code parameter.
5937         * Other API implementations get the singleton themselves
5938         * (synchronized), store it in the service object, and report errors.
5939         */
5940        UCaseProps csp;
5941        try {
5942            csp=UCaseProps.getSingleton();
5943        } catch(IOException   e) {
5944            csp=UCaseProps.getDummy();
5945        }
5946        gCsp=csp;
5947
5948        UBiDiProps bdp;
5949        try {
5950            bdp=UBiDiProps.getSingleton();
5951        } catch(IOException   e) {
5952            bdp=UBiDiProps.getDummy();
5953        }
5954        gBdp=bdp;
5955    }
5956    
5957    /**
5958     * To get the last character out from a data type
5959     */
5960    private static final int LAST_CHAR_MASK_ = 0xFFFF;
5961      
5962    /**
5963     * To get the last byte out from a data type
5964     */
5965    private static final int LAST_BYTE_MASK_ = 0xFF;
5966      
5967    /**
5968     * Shift 16 bits
5969     */
5970    private static final int SHIFT_16_ = 16;
5971      
5972    /**
5973     * Shift 24 bits
5974     */
5975    private static final int SHIFT_24_ = 24;  
5976    
5977    /**
5978     * Decimal radix
5979     */
5980    private static final int DECIMAL_RADIX_ = 10;
5981      
5982    /**
5983     * No break space code point
5984     */
5985    private static final int NO_BREAK_SPACE_ = 0xA0;
5986      
5987    /**
5988     * Narrow no break space code point
5989     */
5990    private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
5991      
5992    /**
5993     * Zero width no break space code point
5994     */
5995    private static final int ZERO_WIDTH_NO_BREAK_SPACE_ = 0xFEFF;
5996      
5997    /**
5998     * Ideographic number zero code point
5999     */
6000    private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
6001            
6002    /**
6003     * CJK Ideograph, First code point
6004     */
6005    private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
6006      
6007    /**
6008     * CJK Ideograph, Second code point
6009     */
6010    private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
6011            
6012    /**
6013     * CJK Ideograph, Third code point
6014     */
6015    private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
6016      
6017    /**
6018     * CJK Ideograph, Fourth code point
6019     */
6020    private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8;
6021      
6022    /**
6023     * CJK Ideograph, FIFTH code point
6024     */
6025    private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6026      
6027    /**
6028     * CJK Ideograph, Sixth code point
6029     */
6030    private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6031            
6032    /**
6033     * CJK Ideograph, Seventh code point
6034     */
6035    private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6036      
6037    /**
6038     * CJK Ideograph, Eighth code point
6039     */
6040    private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6041      
6042    /**
6043     * CJK Ideograph, Nineth code point
6044     */
6045    private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6046      
6047    /**
6048     * Application Program command code point
6049     */
6050    private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6051      
6052    /**
6053     * Unit separator code point
6054     */
6055    private static final int UNIT_SEPARATOR_ = 0x001F;
6056      
6057    /**
6058     * Delete code point
6059     */
6060    private static final int DELETE_ = 0x007F;
6061    /**
6062     * ISO control character first range upper limit 0x0 - 0x1F
6063     */
6064    private static final int ISO_CONTROL_FIRST_RANGE_MAX_ = 0x1F;
6065    /**
6066     * Shift to get numeric type
6067     */
6068    private static final int NUMERIC_TYPE_SHIFT_ = 5;
6069    /**
6070     * Mask to get numeric type
6071     */
6072    private static final int NUMERIC_TYPE_MASK_ = 0x7 << NUMERIC_TYPE_SHIFT_;
6073      
6074    /* encoding of fractional and large numbers */
6075    private static final int MAX_SMALL_NUMBER=0xff;
6076
6077    private static final int FRACTION_NUM_SHIFT=3;        /* numerator: bits 7..3 */
6078    private static final int FRACTION_DEN_MASK=7;         /* denominator: bits 2..0 */
6079
6080    private static final int FRACTION_MAX_NUM=31;
6081    private static final int FRACTION_DEN_OFFSET=2;       /* denominator values are 2..9 */
6082
6083    private static final int FRACTION_MIN_DEN=FRACTION_DEN_OFFSET;
6084    private static final int FRACTION_MAX_DEN=FRACTION_MIN_DEN+FRACTION_DEN_MASK;
6085
6086    private static final int LARGE_MANT_SHIFT=4;          /* mantissa: bits 7..4 */
6087    private static final int LARGE_EXP_MASK=0xf;          /* exponent: bits 3..0 */
6088    private static final int LARGE_EXP_OFFSET=2;          /* regular exponents 2..17 */
6089    private static final int LARGE_EXP_OFFSET_EXTRA=18;   /* extra large exponents 18..33 */
6090
6091    private static final int LARGE_MIN_EXP=LARGE_EXP_OFFSET;
6092    private static final int LARGE_MAX_EXP=LARGE_MIN_EXP+LARGE_EXP_MASK;
6093    private static final int LARGE_MAX_EXP_EXTRA=LARGE_EXP_OFFSET_EXTRA+LARGE_EXP_MASK;
6094
6095    /**
6096     * Han digit characters
6097     */
6098    private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;    
6099    private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;    
6100    private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;    
6101    private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;    
6102    private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;    
6103    private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;    
6104    private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;    
6105    private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;    
6106    private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;    
6107    private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;    
6108    private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;    
6109    private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;    
6110    private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;    
6111    private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;    
6112    private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;    
6113    private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;    
6114    private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;    
6115    private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6116
6117    /**
6118     * Zero Width Non Joiner.
6119     * Equivalent to icu4c ZWNJ.
6120     */
6121    private static final int ZERO_WIDTH_NON_JOINER_ = 0x200c;
6122    /**
6123     * Zero Width Joiner
6124     * Equivalent to icu4c ZWJ. 
6125     */
6126    private static final int ZERO_WIDTH_JOINER_ = 0x200d;
6127
6128    /*
6129     * Properties in vector word 2
6130     * Bits
6131     * 31..24   More binary properties (see UCharacterProperty)
6132     * 23..19   reserved
6133     * 18..14   Sentence Break
6134     * 13..10   Word Break
6135     *  9.. 5   Grapheme Cluster Break
6136     *  4.. 0   Decomposition Type
6137     */
6138    private static final int SB_MASK          = 0x0007c000;
6139    private static final int SB_SHIFT         = 14;
6140
6141    private static final int WB_MASK          = 0x00003c00;
6142    private static final int WB_SHIFT         = 10;
6143
6144    private static final int GCB_MASK         = 0x000003e0;
6145    private static final int GCB_SHIFT        = 5;
6146
6147    /**
6148     * Integer properties mask for decomposition type.
6149     * Equivalent to icu4c UPROPS_DT_MASK. 
6150     */    
6151    private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
6152
6153    /*
6154     * Properties in vector word 0
6155     * Bits
6156     * 31..24   DerivedAge version major/minor one nibble each (see UCharacterProperty)
6157     * 23..18   Line Break
6158     * 17..15   East Asian Width
6159     * 14.. 7   UBlockCode
6160     *  6.. 0   UScriptCode
6161     */
6162
6163    /**
6164     * Integer properties mask and shift values for East Asian cell width.
6165     * Equivalent to icu4c UPROPS_EA_MASK 
6166     */    
6167    private static final int EAST_ASIAN_MASK_ = 0x00038000;
6168    /**
6169     * Integer properties mask and shift values for East Asian cell width.
6170     * Equivalent to icu4c UPROPS_EA_SHIFT 
6171     */    
6172    private static final int EAST_ASIAN_SHIFT_ = 15;
6173    /**
6174     * Integer properties mask and shift values for line breaks.
6175     * Equivalent to icu4c UPROPS_LB_MASK 
6176     */    
6177    private static final int LINE_BREAK_MASK_ = 0x00FC0000;
6178    /**
6179     * Integer properties mask and shift values for line breaks.
6180     * Equivalent to icu4c UPROPS_LB_SHIFT 
6181     */    
6182    private static final int LINE_BREAK_SHIFT_ = 18;
6183    /**
6184     * Integer properties mask and shift values for blocks.
6185     * Equivalent to icu4c UPROPS_BLOCK_MASK 
6186     */    
6187    private static final int BLOCK_MASK_ = 0x00007f80;
6188    /**
6189     * Integer properties mask and shift values for blocks.
6190     * Equivalent to icu4c UPROPS_BLOCK_SHIFT 
6191     */    
6192    private static final int BLOCK_SHIFT_ = 7;
6193    /**
6194     * Integer properties mask and shift values for scripts.
6195     * Equivalent to icu4c UPROPS_SHIFT_MASK
6196     */    
6197    private static final int SCRIPT_MASK_ = 0x0000007f;
6198                           
6199    // private constructor -----------------------------------------------
6200    ///CLOVER:OFF  
6201    /**
6202     * Private constructor to prevent instantiation
6203     */
6204    private UCharacter()
6205    {
6206    }
6207    ///CLOVER:ON 
6208    // private methods ---------------------------------------------------
6209    
6210    /**
6211     * Getting the digit values of characters like 'A' - 'Z', normal, 
6212     * half-width and full-width. This method assumes that the other digit 
6213     * characters are checked by the calling method.
6214     * @param ch character to test
6215     * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
6216     *         its corresponding digit will be returned.
6217     */
6218    private static int getEuropeanDigit(int ch) {
6219        if ((ch > 0x7a && ch < 0xff21)  
6220            || ch < 0x41 || (ch > 0x5a && ch < 0x61)
6221            || ch > 0xff5a || (ch > 0xff31 && ch < 0xff41)) {
6222            return -1;
6223        } 
6224        if (ch <= 0x7a) {
6225            // ch >= 0x41 or ch < 0x61 
6226            return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
6227        }
6228        // ch >= 0xff21
6229        if (ch <= 0xff3a) {
6230            return ch + 10 - 0xff21;
6231        } 
6232        // ch >= 0xff41 && ch <= 0xff5a
6233        return ch + 10 - 0xff41;
6234    }
6235    
6236    /**
6237     * Gets the numeric type of the property argument
6238     * @param props 32 bit property
6239     * @return the numeric type
6240     */
6241    private static int getNumericType(int props)
6242    {
6243        return (props & NUMERIC_TYPE_MASK_) >> NUMERIC_TYPE_SHIFT_;
6244    }
6245    
6246    /**
6247     * Gets the property value at the index.
6248     * This is optimized.
6249     * Note this is alittle different from CharTrie the index m_trieData_
6250     * is never negative.
6251     * This is a duplicate of UCharacterProperty.getProperty. For optimization
6252     * purposes, this method calls the trie data directly instead of through 
6253     * UCharacterProperty.getProperty.
6254     * @param ch code point whose property value is to be retrieved
6255     * @return property value of code point
6256     * @stable ICU 2.6
6257     */
6258    private static final int getProperty(int ch)
6259    {
6260        if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE 
6261            || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE 
6262                && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) {
6263            // BMP codepoint 0000..D7FF or DC00..FFFF
6264            try { // using try for ch < 0 is faster than using an if statement
6265                return PROPERTY_TRIE_DATA_[
6266                              (PROPERTY_TRIE_INDEX_[ch >> 5] << 2) 
6267                              + (ch & 0x1f)];
6268            } catch (ArrayIndexOutOfBoundsException   e) {
6269                return PROPERTY_INITIAL_VALUE_;
6270            }
6271        }
6272        if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
6273            // lead surrogate D800..DBFF
6274            return PROPERTY_TRIE_DATA_[
6275                              (PROPERTY_TRIE_INDEX_[(0x2800 >> 5) + (ch >> 5)] << 2) 
6276                              + (ch & 0x1f)];
6277        }
6278        // for optimization
6279        if (ch <= UTF16.CODEPOINT_MAX_VALUE) {
6280            // supplementary code point 10000..10FFFF
6281            // look at the construction of supplementary characters
6282            // trail forms the ends of it.
6283            return PROPERTY_.m_trie_.getSurrogateValue(
6284                                      UTF16.getLeadSurrogate(ch), 
6285                                      (char)(ch & 0x3ff));
6286        }
6287        // return m_dataOffset_ if there is an error, in this case we return 
6288        // the default value: m_initialValue_
6289        // we cannot assume that m_initialValue_ is at offset 0
6290        // this is for optimization.
6291        return PROPERTY_INITIAL_VALUE_;
6292    }
6293}
6294
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags