Character


1   /*
2    * @(#)Character.java   1.31 04/09/14
3    *
4    * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5    * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6    */
7   
8   package java.lang;
9   import java.util.Map  ;
10  import java.util.HashMap  ;
11  import java.util.Locale  ;
12  
13  /**
14   * The <code>Character</code> class wraps a value of the primitive
15   * type <code>char</code> in an object. An object of type
16   * <code>Character</code> contains a single field whose type is
17   * <code>char</code>.
18   * <p>
19   * In addition, this class provides several methods for determining
20   * a character's category (lowercase letter, digit, etc.) and for converting
21   * characters from uppercase to lowercase and vice versa.
22   * <p>
23   * Character information is based on the Unicode Standard, version 4.0.
24   * <p>
25   * The methods and data of class <code>Character</code> are defined by
26   * the information in the <i>UnicodeData</i> file that is part of the
27   * Unicode Character Database maintained by the Unicode
28   * Consortium. This file specifies various properties including name
29   * and general category for every defined Unicode code point or
30   * character range.
31   * <p>
32   * The file and its description are available from the Unicode Consortium at:
33   * <ul>
34   * <li><a HREF="http://www.unicode.org">http://www.unicode.org</a>
35   * </ul>
36   *
37   * <h4><a name="unicode">Unicode Character Representations</a></h4>
38   *
39   * <p>The <code>char</code> data type (and therefore the value that a
40   * <code>Character</code> object encapsulates) are based on the
41   * original Unicode specification, which defined characters as
42   * fixed-width 16-bit entities. The Unicode standard has since been
43   * changed to allow for characters whose representation requires more
44   * than 16 bits.  The range of legal <em>code point</em>s is now
45   * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
46   * (Refer to the <a
47   * HREF="http://www.unicode.org/reports/tr27/#notation"><i>
48   * definition</i></a> of the U+<i>n</i> notation in the Unicode
49   * standard.)
50   *
51   * <p>The set of characters from U+0000 to U+FFFF is sometimes
52   * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
53   * name="supplementary">Characters</a> whose code points are greater
54   * than U+FFFF are called <em>supplementary character</em>s.  The Java
55   * 2 platform uses the UTF-16 representation in <code>char</code>
56   * arrays and in the <code>String</code> and <code>StringBuffer</code>
57   * classes. In this representation, supplementary characters are
58   * represented as a pair of <code>char</code> values, the first from
59   * the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the
60   * second from the <em>low-surrogates</em> range
61   * (&#92;uDC00-&#92;uDFFF).
62   *
63   * <p>A <code>char</code> value, therefore, represents Basic
64   * Multilingual Plane (BMP) code points, including the surrogate
65   * code points, or code units of the UTF-16 encoding. An
66   * <code>int</code> value represents all Unicode code points,
67   * including supplementary code points. The lower (least significant)
68   * 21 bits of <code>int</code> are used to represent Unicode code
69   * points and the upper (most significant) 11 bits must be zero.
70   * Unless otherwise specified, the behavior with respect to
71   * supplementary characters and surrogate <code>char</code> values is
72   * as follows:
73   *
74   * <ul>
75   * <li>The methods that only accept a <code>char</code> value cannot support
76   * supplementary characters. They treat <code>char</code> values from the
77   * surrogate ranges as undefined characters. For example,
78   * <code>Character.isLetter('&#92;uD840')</code> returns <code>false</code>, even though
79   * this specific value if followed by any low-surrogate value in a string
80   * would represent a letter.
81   *
82   * <li>The methods that accept an <code>int</code> value support all
83   * Unicode characters, including supplementary characters. For
84   * example, <code>Character.isLetter(0x2F81A)</code> returns
85   * <code>true</code> because the code point value represents a letter
86   * (a CJK ideograph).
87   * </ul>
88   *
89   * <p>In the J2SE API documentation, <em>Unicode code point</em> is
90   * used for character values in the range between U+0000 and U+10FFFF,
91   * and <em>Unicode code unit</em> is used for 16-bit
92   * <code>char</code> values that are code units of the <em>UTF-16</em>
93   * encoding. For more information on Unicode terminology, refer to the
94   * <a HREF="http://www.unicode.org/glossary/">Unicode Glossary</a>.
95   *
96   * @author  Lee Boynton
97   * @author  Guy Steele
98   * @author  Akira Tanaka
99   * @since   1.0
100  */
101 public final
102 class Character extends Object   implements java.io.Serializable  , Comparable  <Character  > {
103     /**
104      * The minimum radix available for conversion to and from strings.
105      * The constant value of this field is the smallest value permitted
106      * for the radix argument in radix-conversion methods such as the
107      * <code>digit</code> method, the <code>forDigit</code>
108      * method, and the <code>toString</code> method of class
109      * <code>Integer</code>.
110      *
111      * @see     java.lang.Character#digit(char, int)
112      * @see     java.lang.Character#forDigit(int, int)
113      * @see     java.lang.Integer#toString(int, int)
114      * @see     java.lang.Integer#valueOf(java.lang.String)
115      */
116     public static final int MIN_RADIX = 2;
117 
118     /**
119      * The maximum radix available for conversion to and from strings.
120      * The constant value of this field is the largest value permitted
121      * for the radix argument in radix-conversion methods such as the
122      * <code>digit</code> method, the <code>forDigit</code>
123      * method, and the <code>toString</code> method of class
124      * <code>Integer</code>.
125      *
126      * @see     java.lang.Character#digit(char, int)
127      * @see     java.lang.Character#forDigit(int, int)
128      * @see     java.lang.Integer#toString(int, int)
129      * @see     java.lang.Integer#valueOf(java.lang.String)
130      */
131     public static final int MAX_RADIX = 36;
132 
133     /**
134      * The constant value of this field is the smallest value of type
135      * <code>char</code>, <code>'&#92;u0000'</code>.
136      *
137      * @since   1.0.2
138      */
139     public static final char   MIN_VALUE = '\u0000';
140 
141     /**
142      * The constant value of this field is the largest value of type
143      * <code>char</code>, <code>'&#92;uFFFF'</code>.
144      *
145      * @since   1.0.2
146      */
147     public static final char   MAX_VALUE = '\uffff';
148 
149     /**
150      * The <code>Class</code> instance representing the primitive type
151      * <code>char</code>.
152      *
153      * @since   1.1
154      */
155     public static final Class  <Character  > TYPE = Class.getPrimitiveClass("char");
156 
157    /*
158     * Normative general types
159     */
160 
161    /*
162     * General character types
163     */
164 
165    /**
166     * General category "Cn" in the Unicode specification.
167     * @since   1.1
168     */
169     public static final byte
170         UNASSIGNED                  = 0;
171 
172    /**
173     * General category "Lu" in the Unicode specification.
174     * @since   1.1
175     */
176     public static final byte
177         UPPERCASE_LETTER            = 1;
178 
179    /**
180     * General category "Ll" in the Unicode specification.
181     * @since   1.1
182     */
183     public static final byte
184         LOWERCASE_LETTER            = 2;
185 
186    /**
187     * General category "Lt" in the Unicode specification.
188     * @since   1.1
189     */
190     public static final byte
191         TITLECASE_LETTER            = 3;
192 
193    /**
194     * General category "Lm" in the Unicode specification.
195     * @since   1.1
196     */
197     public static final byte
198         MODIFIER_LETTER             = 4;
199 
200    /**
201     * General category "Lo" in the Unicode specification.
202     * @since   1.1
203     */
204     public static final byte
205         OTHER_LETTER                = 5;
206 
207    /**
208     * General category "Mn" in the Unicode specification.
209     * @since   1.1
210     */
211     public static final byte
212         NON_SPACING_MARK            = 6;
213 
214    /**
215     * General category "Me" in the Unicode specification.
216     * @since   1.1
217     */
218     public static final byte
219         ENCLOSING_MARK              = 7;
220 
221    /**
222     * General category "Mc" in the Unicode specification.
223     * @since   1.1
224     */
225     public static final byte
226         COMBINING_SPACING_MARK      = 8;
227 
228    /**
229     * General category "Nd" in the Unicode specification.
230     * @since   1.1
231     */
232     public static final byte
233         DECIMAL_DIGIT_NUMBER        = 9;
234 
235    /**
236     * General category "Nl" in the Unicode specification.
237     * @since   1.1
238     */
239     public static final byte
240         LETTER_NUMBER               = 10;
241 
242    /**
243     * General category "No" in the Unicode specification.
244     * @since   1.1
245     */
246     public static final byte
247         OTHER_NUMBER                = 11;
248 
249    /**
250     * General category "Zs" in the Unicode specification.
251     * @since   1.1
252     */
253     public static final byte
254         SPACE_SEPARATOR             = 12;
255 
256    /**
257     * General category "Zl" in the Unicode specification.
258     * @since   1.1
259     */
260     public static final byte
261         LINE_SEPARATOR              = 13;
262 
263    /**
264     * General category "Zp" in the Unicode specification.
265     * @since   1.1
266     */
267     public static final byte
268         PARAGRAPH_SEPARATOR         = 14;
269 
270    /**
271     * General category "Cc" in the Unicode specification.
272     * @since   1.1
273     */
274     public static final byte
275         CONTROL                     = 15;
276 
277    /**
278     * General category "Cf" in the Unicode specification.
279     * @since   1.1
280     */
281     public static final byte
282         FORMAT                      = 16;
283 
284    /**
285     * General category "Co" in the Unicode specification.
286     * @since   1.1
287     */
288     public static final byte
289         PRIVATE_USE                 = 18;
290 
291    /**
292     * General category "Cs" in the Unicode specification.
293     * @since   1.1
294     */
295     public static final byte
296         SURROGATE                   = 19;
297 
298    /**
299     * General category "Pd" in the Unicode specification.
300     * @since   1.1
301     */
302     public static final byte
303         DASH_PUNCTUATION            = 20;
304 
305    /**
306     * General category "Ps" in the Unicode specification.
307     * @since   1.1
308     */
309     public static final byte
310         START_PUNCTUATION           = 21;
311 
312    /**
313     * General category "Pe" in the Unicode specification.
314     * @since   1.1
315     */
316     public static final byte
317         END_PUNCTUATION             = 22;
318 
319    /**
320     * General category "Pc" in the Unicode specification.
321     * @since   1.1
322     */
323     public static final byte
324         CONNECTOR_PUNCTUATION       = 23;
325 
326    /**
327     * General category "Po" in the Unicode specification.
328     * @since   1.1
329     */
330     public static final byte
331         OTHER_PUNCTUATION           = 24;
332 
333    /**
334     * General category "Sm" in the Unicode specification.
335     * @since   1.1
336     */
337     public static final byte
338         MATH_SYMBOL                 = 25;
339 
340    /**
341     * General category "Sc" in the Unicode specification.
342     * @since   1.1
343     */
344     public static final byte
345         CURRENCY_SYMBOL             = 26;
346 
347    /**
348     * General category "Sk" in the Unicode specification.
349     * @since   1.1
350     */
351     public static final byte
352         MODIFIER_SYMBOL             = 27;
353 
354    /**
355     * General category "So" in the Unicode specification.
356     * @since   1.1
357     */
358     public static final byte
359         OTHER_SYMBOL                = 28;
360 
361    /**
362     * General category "Pi" in the Unicode specification.
363     * @since   1.4
364     */
365     public static final byte
366         INITIAL_QUOTE_PUNCTUATION   = 29;
367 
368    /**
369     * General category "Pf" in the Unicode specification.
370     * @since   1.4
371     */
372     public static final byte
373         FINAL_QUOTE_PUNCTUATION     = 30;
374 
375     /**
376      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
377      */
378      static final int ERROR = 0xFFFFFFFF;
379 
380 
381     /**
382      * Undefined bidirectional character type. Undefined <code>char</code>
383      * values have undefined directionality in the Unicode specification.
384      * @since 1.4
385      */
386      public static final byte DIRECTIONALITY_UNDEFINED = -1;
387 
388     /**
389      * Strong bidirectional character type "L" in the Unicode specification.
390      * @since 1.4
391      */
392     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
393 
394     /**
395      * Strong bidirectional character type "R" in the Unicode specification.
396      * @since 1.4
397      */
398     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
399 
400     /**
401     * Strong bidirectional character type "AL" in the Unicode specification.
402      * @since 1.4
403      */
404     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
405 
406     /**
407      * Weak bidirectional character type "EN" in the Unicode specification.
408      * @since 1.4
409      */
410     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
411 
412     /**
413      * Weak bidirectional character type "ES" in the Unicode specification.
414      * @since 1.4
415      */
416     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
417 
418     /**
419      * Weak bidirectional character type "ET" in the Unicode specification.
420      * @since 1.4
421      */
422     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
423 
424     /**
425      * Weak bidirectional character type "AN" in the Unicode specification.
426      * @since 1.4
427      */
428     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
429 
430     /**
431      * Weak bidirectional character type "CS" in the Unicode specification.
432      * @since 1.4
433      */
434     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
435 
436     /**
437      * Weak bidirectional character type "NSM" in the Unicode specification.
438      * @since 1.4
439      */
440     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
441 
442     /**
443      * Weak bidirectional character type "BN" in the Unicode specification.
444      * @since 1.4
445      */
446     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
447 
448     /**
449      * Neutral bidirectional character type "B" in the Unicode specification.
450      * @since 1.4
451      */
452     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
453 
454     /**
455      * Neutral bidirectional character type "S" in the Unicode specification.
456      * @since 1.4
457      */
458     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
459 
460     /**
461      * Neutral bidirectional character type "WS" in the Unicode specification.
462      * @since 1.4
463      */
464     public static final byte DIRECTIONALITY_WHITESPACE = 12;
465 
466     /**
467      * Neutral bidirectional character type "ON" in the Unicode specification.
468      * @since 1.4
469      */
470     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
471 
472     /**
473      * Strong bidirectional character type "LRE" in the Unicode specification.
474      * @since 1.4
475      */
476     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
477 
478     /**
479      * Strong bidirectional character type "LRO" in the Unicode specification.
480      * @since 1.4
481      */
482     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
483 
484     /**
485      * Strong bidirectional character type "RLE" in the Unicode specification.
486      * @since 1.4
487      */
488     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
489 
490     /**
491      * Strong bidirectional character type "RLO" in the Unicode specification.
492      * @since 1.4
493      */
494     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
495 
496     /**
497      * Weak bidirectional character type "PDF" in the Unicode specification.
498      * @since 1.4
499      */
500     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
501 
502     /**
503      * The minimum value of a Unicode high-surrogate code unit in the
504      * UTF-16 encoding. A high-surrogate is also known as a
505      * <i>leading-surrogate</i>.
506      *
507      * @since 1.5
508      */
509     public static final char MIN_HIGH_SURROGATE = '\uD800';
510 
511     /**
512      * The maximum value of a Unicode high-surrogate code unit in the
513      * UTF-16 encoding. A high-surrogate is also known as a
514      * <i>leading-surrogate</i>.
515      *
516      * @since 1.5
517      */
518     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
519 
520     /**
521      * The minimum value of a Unicode low-surrogate code unit in the
522      * UTF-16 encoding. A low-surrogate is also known as a
523      * <i>trailing-surrogate</i>.
524      *
525      * @since 1.5
526      */
527     public static final char MIN_LOW_SURROGATE  = '\uDC00';
528 
529     /**
530      * The maximum value of a Unicode low-surrogate code unit in the
531      * UTF-16 encoding. A low-surrogate is also known as a
532      * <i>trailing-surrogate</i>.
533      *
534      * @since 1.5
535      */
536     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
537 
538     /**
539      * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
540      *
541      * @since 1.5
542      */
543     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
544 
545     /**
546      * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
547      *
548      * @since 1.5
549      */
550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
551 
552     /**
553      * The minimum value of a supplementary code point.
554      *
555      * @since 1.5
556      */
557     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
558 
559     /**
560      * The minimum value of a Unicode code point.
561      * 
562      * @since 1.5
563      */
564     public static final int MIN_CODE_POINT = 0x000000;
565 
566     /**
567      * The maximum value of a Unicode code point.
568      *
569      * @since 1.5
570      */
571     public static final int MAX_CODE_POINT = 0x10ffff;
572 
573 
574     /**
575      * Instances of this class represent particular subsets of the Unicode
576      * character set.  The only family of subsets defined in the
577      * <code>Character</code> class is <code>{@link Character.UnicodeBlock
578      * UnicodeBlock}</code>.  Other portions of the Java API may define other
579      * subsets for their own purposes.
580      *
581      * @since 1.2
582      */
583     public static class Subset  {
584 
585         private String   name;
586 
587         /**
588          * Constructs a new <code>Subset</code> instance.
589          *
590          * @exception NullPointerException if name is <code>null</code>
591          * @param  name  The name of this subset
592          */
593         protected Subset(String   name) {
594             if (name == null) {
595                 throw new NullPointerException  ("name");
596             }
597             this.name = name;
598         }
599 
600         /**
601          * Compares two <code>Subset</code> objects for equality.
602          * This method returns <code>true</code> if and only if
603          * <code>this</code> and the argument refer to the same
604          * object; since this method is <code>final</code>, this
605          * guarantee holds for all subclasses.
606          */
607         public final boolean equals(Object   obj) {
608             return (this == obj);
609         }
610 
611         /**
612          * Returns the standard hash code as defined by the
613          * <code>{@link Object#hashCode}</code> method.  This method
614          * is <code>final</code> in order to ensure that the
615          * <code>equals</code> and <code>hashCode</code> methods will
616          * be consistent in all subclasses.
617          */
618         public final int hashCode() {
619             return super.hashCode();
620         }
621 
622         /**
623          * Returns the name of this subset.
624          */
625         public final String   toString() {
626             return name;
627         }
628     }
629 
630     /**
631      * A family of character subsets representing the character blocks in the
632      * Unicode specification. Character blocks generally define characters
633      * used for a specific script or purpose. A character is contained by
634      * at most one Unicode block.
635      *
636      * @since 1.2
637      */
638     public static final class UnicodeBlock extends Subset {
639 
640         private static Map   map = new HashMap  ();
641 
642         /**
643          * Create a UnicodeBlock with the given identifier name. 
644          * This name must be the same as the block identifier.
645          */
646         private UnicodeBlock(String   idName) {
647             super(idName);
648             map.put(idName.toUpperCase(Locale.US), this);
649         }
650 
651         /**
652          * Create a UnicodeBlock with the given identifier name and
653          * alias name.
654          */
655         private UnicodeBlock(String   idName, String   alias) {
656             this(idName);
657             map.put(alias.toUpperCase(Locale.US), this);
658         }
659 
660         /** 
661          * Create a UnicodeBlock with the given identifier name and 
662          * alias names.
663          */
664         private UnicodeBlock(String   idName, String  [] aliasName) {
665             this(idName);
666             if (aliasName != null) {
667                 for(int x=0; x<aliasName.length; ++x) {
668                     map.put(aliasName[x].toUpperCase(Locale.US), this);
669                 }
670             }
671         }
672 
673         /**
674          * Constant for the "Basic Latin" Unicode character block.
675          * @since 1.2
676          */
677         public static final UnicodeBlock  BASIC_LATIN = 
678             new UnicodeBlock("BASIC_LATIN", new String  [] {"Basic Latin", "BasicLatin" });
679 
680         /**
681          * Constant for the "Latin-1 Supplement" Unicode character block.
682          * @since 1.2
683          */
684         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
685             new UnicodeBlock("LATIN_1_SUPPLEMENT", new String  []{ "Latin-1 Supplement", "Latin-1Supplement"});
686 
687         /**
688          * Constant for the "Latin Extended-A" Unicode character block.
689          * @since 1.2
690          */
691         public static final UnicodeBlock LATIN_EXTENDED_A = 
692             new UnicodeBlock("LATIN_EXTENDED_A", new String  []{ "Latin Extended-A", "LatinExtended-A"});
693 
694         /**
695          * Constant for the "Latin Extended-B" Unicode character block.
696          * @since 1.2
697          */
698         public static final UnicodeBlock LATIN_EXTENDED_B = 
699             new UnicodeBlock("LATIN_EXTENDED_B", new String  [] {"Latin Extended-B", "LatinExtended-B"});
700 
701         /**
702          * Constant for the "IPA Extensions" Unicode character block.
703          * @since 1.2
704          */
705         public static final UnicodeBlock IPA_EXTENSIONS = 
706             new UnicodeBlock("IPA_EXTENSIONS", new String  [] {"IPA Extensions", "IPAExtensions"});
707 
708         /**
709          * Constant for the "Spacing Modifier Letters" Unicode character block.
710          * @since 1.2
711          */
712         public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 
713             new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String  [] { "Spacing Modifier Letters",
714                                                                         "SpacingModifierLetters"});
715 
716         /**
717          * Constant for the "Combining Diacritical Marks" Unicode character block.
718          * @since 1.2
719          */
720         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 
721             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String  [] {"Combining Diacritical Marks",
722                                                                           "CombiningDiacriticalMarks" });
723 
724         /**
725          * Constant for the "Greek and Coptic" Unicode character block.
726          * <p>
727          * This block was previously known as the "Greek" block.
728          *
729          * @since 1.2
730          */
731         public static final UnicodeBlock GREEK 
732             = new UnicodeBlock("GREEK", new String  [] {"Greek and Coptic", "GreekandCoptic"});
733 
734         /**
735          * Constant for the "Cyrillic" Unicode character block.
736          * @since 1.2
737          */
738         public static final UnicodeBlock CYRILLIC = 
739             new UnicodeBlock("CYRILLIC");
740 
741         /**
742          * Constant for the "Armenian" Unicode character block.
743          * @since 1.2
744          */
745         public static final UnicodeBlock ARMENIAN = 
746             new UnicodeBlock("ARMENIAN");
747 
748         /**
749          * Constant for the "Hebrew" Unicode character block.
750          * @since 1.2
751          */
752         public static final UnicodeBlock HEBREW = 
753             new UnicodeBlock("HEBREW");
754 
755         /**
756          * Constant for the "Arabic" Unicode character block.
757          * @since 1.2
758          */
759         public static final UnicodeBlock ARABIC = 
760             new UnicodeBlock("ARABIC");
761 
762         /**
763          * Constant for the "Devanagari" Unicode character block.
764          * @since 1.2
765          */
766         public static final UnicodeBlock DEVANAGARI = 
767             new UnicodeBlock("DEVANAGARI");
768 
769         /**
770          * Constant for the "Bengali" Unicode character block.
771          * @since 1.2
772          */
773         public static final UnicodeBlock BENGALI =
774             new UnicodeBlock("BENGALI");
775 
776         /**
777          * Constant for the "Gurmukhi" Unicode character block.
778          * @since 1.2
779          */
780         public static final UnicodeBlock GURMUKHI = 
781             new UnicodeBlock("GURMUKHI");
782 
783         /**
784          * Constant for the "Gujarati" Unicode character block.
785          * @since 1.2
786          */
787         public static final UnicodeBlock GUJARATI = 
788             new UnicodeBlock("GUJARATI");
789 
790         /**
791          * Constant for the "Oriya" Unicode character block.
792          * @since 1.2
793          */
794         public static final UnicodeBlock ORIYA = 
795             new UnicodeBlock("ORIYA");
796 
797         /**
798          * Constant for the "Tamil" Unicode character block.
799          * @since 1.2
800          */
801         public static final UnicodeBlock TAMIL = 
802             new UnicodeBlock("TAMIL");
803 
804         /**
805          * Constant for the "Telugu" Unicode character block.
806          * @since 1.2
807          */
808         public static final UnicodeBlock TELUGU = 
809             new UnicodeBlock("TELUGU");
810 
811         /**
812          * Constant for the "Kannada" Unicode character block.
813          * @since 1.2
814          */
815         public static final UnicodeBlock KANNADA = 
816             new UnicodeBlock("KANNADA");
817 
818         /**
819          * Constant for the "Malayalam" Unicode character block.
820          * @since 1.2
821          */
822         public static final UnicodeBlock MALAYALAM =
823             new UnicodeBlock("MALAYALAM");
824 
825         /**
826          * Constant for the "Thai" Unicode character block.
827          * @since 1.2
828          */
829         public static final UnicodeBlock THAI = 
830             new UnicodeBlock("THAI");
831 
832         /**
833          * Constant for the "Lao" Unicode character block.
834          * @since 1.2
835          */
836         public static final UnicodeBlock LAO = 
837             new UnicodeBlock("LAO");
838 
839         /**
840          * Constant for the "Tibetan" Unicode character block.
841          * @since 1.2
842          */
843         public static final UnicodeBlock TIBETAN = 
844             new UnicodeBlock("TIBETAN");
845 
846         /**
847          * Constant for the "Georgian" Unicode character block.
848          * @since 1.2
849          */
850         public static final UnicodeBlock GEORGIAN =
851             new UnicodeBlock("GEORGIAN");
852 
853         /**
854          * Constant for the "Hangul Jamo" Unicode character block.
855          * @since 1.2
856          */
857         public static final UnicodeBlock HANGUL_JAMO = 
858             new UnicodeBlock("HANGUL_JAMO", new String  [] {"Hangul Jamo", "HangulJamo"});
859 
860         /**
861          * Constant for the "Latin Extended Additional" Unicode character block.
862          * @since 1.2
863          */
864         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 
865             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String  [] {"Latin Extended Additional",
866                                                                         "LatinExtendedAdditional"});
867 
868         /**
869          * Constant for the "Greek Extended" Unicode character block.
870          * @since 1.2
871          */
872         public static final UnicodeBlock GREEK_EXTENDED = 
873             new UnicodeBlock("GREEK_EXTENDED", new String  [] {"Greek Extended", "GreekExtended"});
874 
875         /**
876          * Constant for the "General Punctuation" Unicode character block.
877          * @since 1.2
878          */
879         public static final UnicodeBlock GENERAL_PUNCTUATION = 
880             new UnicodeBlock("GENERAL_PUNCTUATION", new String  [] {"General Punctuation", "GeneralPunctuation"});
881 
882         /**
883          * Constant for the "Superscripts and Subscripts" Unicode character block.
884          * @since 1.2
885          */
886         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 
887             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String  [] {"Superscripts and Subscripts",
888                                                                           "SuperscriptsandSubscripts" });
889 
890         /**
891          * Constant for the "Currency Symbols" Unicode character block.
892          * @since 1.2
893          */
894         public static final UnicodeBlock CURRENCY_SYMBOLS = 
895             new UnicodeBlock("CURRENCY_SYMBOLS", new String  [] { "Currency Symbols", "CurrencySymbols"});
896 
897         /**
898          * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
899          * <p>
900          * This block was previously known as "Combining Marks for Symbols".
901          * @since 1.2
902          */
903         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 
904             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String  [] {"Combining Diacritical Marks for Symbols",
905                                                                                                                                                   "CombiningDiacriticalMarksforSymbols",
906                                                                           "Combining Marks for Symbols",
907                                                                           "CombiningMarksforSymbols" });
908 
909         /**
910          * Constant for the "Letterlike Symbols" Unicode character block.
911          * @since 1.2
912          */
913         public static final UnicodeBlock LETTERLIKE_SYMBOLS = 
914             new UnicodeBlock("LETTERLIKE_SYMBOLS", new String  [] { "Letterlike Symbols", "LetterlikeSymbols"});
915 
916         /**
917          * Constant for the "Number Forms" Unicode character block.
918          * @since 1.2
919          */
920         public static final UnicodeBlock NUMBER_FORMS = 
921             new UnicodeBlock("NUMBER_FORMS", new String  [] {"Number Forms", "NumberForms"});
922 
923         /**
924          * Constant for the "Arrows" Unicode character block.
925          * @since 1.2
926          */
927         public static final UnicodeBlock ARROWS = 
928             new UnicodeBlock("ARROWS");
929 
930         /**
931          * Constant for the "Mathematical Operators" Unicode character block.
932          * @since 1.2
933          */
934         public static final UnicodeBlock MATHEMATICAL_OPERATORS = 
935             new UnicodeBlock("MATHEMATICAL_OPERATORS", new String  [] {"Mathematical Operators",
936                                                                      "MathematicalOperators"});
937 
938         /**
939          * Constant for the "Miscellaneous Technical" Unicode character block.
940          * @since 1.2
941          */
942         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 
943             new UnicodeBlock("MISCELLANEOUS_TECHNICAL", new String  [] {"Miscellaneous Technical",
944                                                                       "MiscellaneousTechnical"});
945 
946         /**
947          * Constant for the "Control Pictures" Unicode character block.
948          * @since 1.2
949          */
950         public static final UnicodeBlock CONTROL_PICTURES = 
951             new UnicodeBlock("CONTROL_PICTURES", new String  [] {"Control Pictures", "ControlPictures"});
952 
953         /**
954          * Constant for the "Optical Character Recognition" Unicode character block.
955          * @since 1.2
956          */
957         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 
958             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", new String  [] {"Optical Character Recognition", 
959                                                                             "OpticalCharacterRecognition"});
960 
961         /**
962          * Constant for the "Enclosed Alphanumerics" Unicode character block.
963          * @since 1.2
964          */
965         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 
966             new UnicodeBlock("ENCLOSED_ALPHANUMERICS", new String  [] {"Enclosed Alphanumerics", 
967                                                                      "EnclosedAlphanumerics"});
968 
969         /**
970          * Constant for the "Box Drawing" Unicode character block.
971          * @since 1.2
972          */
973         public static final UnicodeBlock BOX_DRAWING = 
974             new UnicodeBlock("BOX_DRAWING", new String  [] {"Box Drawing", "BoxDrawing"});
975 
976         /**
977          * Constant for the "Block Elements" Unicode character block.
978          * @since 1.2
979          */
980         public static final UnicodeBlock BLOCK_ELEMENTS = 
981             new UnicodeBlock("BLOCK_ELEMENTS", new String  [] {"Block Elements", "BlockElements"});
982 
983         /**
984          * Constant for the "Geometric Shapes" Unicode character block.
985          * @since 1.2
986          */
987         public static final UnicodeBlock GEOMETRIC_SHAPES = 
988             new UnicodeBlock("GEOMETRIC_SHAPES", new String  [] {"Geometric Shapes", "GeometricShapes"});
989 
990         /**
991          * Constant for the "Miscellaneous Symbols" Unicode character block.
992          * @since 1.2
993          */
994         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 
995             new UnicodeBlock("MISCELLANEOUS_SYMBOLS", new String  [] {"Miscellaneous Symbols", 
996                                                                     "MiscellaneousSymbols"});
997 
998         /**
999          * Constant for the "Dingbats" Unicode character block.
1000         * @since 1.2
1001         */
1002        public static final UnicodeBlock DINGBATS = 
1003            new UnicodeBlock("DINGBATS");
1004
1005        /**
1006         * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1007         * @since 1.2
1008         */
1009        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 
1010            new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", new String  [] {"CJK Symbols and Punctuation",
1011                                                                          "CJKSymbolsandPunctuation"});
1012
1013        /**
1014         * Constant for the "Hiragana" Unicode character block.
1015         * @since 1.2
1016         */
1017        public static final UnicodeBlock HIRAGANA = 
1018            new UnicodeBlock("HIRAGANA");
1019
1020        /**
1021         * Constant for the "Katakana" Unicode character block.
1022         * @since 1.2
1023         */
1024        public static final UnicodeBlock KATAKANA = 
1025            new UnicodeBlock("KATAKANA");
1026
1027        /**
1028         * Constant for the "Bopomofo" Unicode character block.
1029         * @since 1.2
1030         */
1031        public static final UnicodeBlock BOPOMOFO = 
1032            new UnicodeBlock("BOPOMOFO");
1033
1034        /**
1035         * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1036         * @since 1.2
1037         */
1038        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 
1039            new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", new String  [] {"Hangul Compatibility Jamo",
1040                                                                        "HangulCompatibilityJamo"});
1041
1042        /**
1043         * Constant for the "Kanbun" Unicode character block.
1044         * @since 1.2
1045         */
1046        public static final UnicodeBlock KANBUN = 
1047            new UnicodeBlock("KANBUN");
1048
1049        /**
1050         * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1051         * @since 1.2
1052         */
1053        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 
1054            new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", new String  [] {"Enclosed CJK Letters and Months",
1055                                                                              "EnclosedCJKLettersandMonths"});
1056
1057        /**
1058         * Constant for the "CJK Compatibility" Unicode character block.
1059         * @since 1.2
1060         */
1061        public static final UnicodeBlock CJK_COMPATIBILITY = 
1062            new UnicodeBlock("CJK_COMPATIBILITY", new String  [] {"CJK Compatibility", "CJKCompatibility"});
1063
1064        /**
1065         * Constant for the "CJK Unified Ideographs" Unicode character block.
1066         * @since 1.2
1067         */
1068        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 
1069            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", new String  [] {"CJK Unified Ideographs", 
1070                                                                     "CJKUnifiedIdeographs"});
1071
1072        /**
1073         * Constant for the "Hangul Syllables" Unicode character block.
1074         * @since 1.2
1075         */
1076        public static final UnicodeBlock HANGUL_SYLLABLES = 
1077            new UnicodeBlock("HANGUL_SYLLABLES", new String  [] {"Hangul Syllables", "HangulSyllables"});
1078
1079        /**
1080         * Constant for the "Private Use Area" Unicode character block.
1081         * @since 1.2
1082         */
1083        public static final UnicodeBlock PRIVATE_USE_AREA = 
1084            new UnicodeBlock("PRIVATE_USE_AREA", new String  [] {"Private Use Area", "PrivateUseArea"});
1085
1086        /**
1087         * Constant for the "CJK Compatibility Ideographs" Unicode character block.
1088         * @since 1.2
1089         */
1090        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 
1091            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 
1092                             new String  [] {"CJK Compatibility Ideographs",
1093                                           "CJKCompatibilityIdeographs"});
1094
1095        /**
1096         * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1097         * @since 1.2
1098         */
1099        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 
1100            new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", new String  [] {"Alphabetic Presentation Forms",
1101                                                                            "AlphabeticPresentationForms"});
1102
1103        /**
1104         * Constant for the "Arabic Presentation Forms-A" Unicode character block.
1105         * @since 1.2
1106         */
1107        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 
1108            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", new String  [] {"Arabic Presentation Forms-A",
1109                                                                          "ArabicPresentationForms-A"});
1110
1111        /**
1112         * Constant for the "Combining Half Marks" Unicode character block.
1113         * @since 1.2
1114         */
1115        public static final UnicodeBlock COMBINING_HALF_MARKS = 
1116            new UnicodeBlock("COMBINING_HALF_MARKS", new String  [] {"Combining Half Marks",
1117                                                                   "CombiningHalfMarks"});
1118
1119        /**
1120         * Constant for the "CJK Compatibility Forms" Unicode character block.
1121         * @since 1.2
1122         */
1123        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 
1124            new UnicodeBlock("CJK_COMPATIBILITY_FORMS", new String  [] {"CJK Compatibility Forms",
1125                                                                      "CJKCompatibilityForms"});
1126
1127        /**
1128         * Constant for the "Small Form Variants" Unicode character block.
1129         * @since 1.2
1130         */
1131        public static final UnicodeBlock SMALL_FORM_VARIANTS = 
1132            new UnicodeBlock("SMALL_FORM_VARIANTS", new String  [] {"Small Form Variants", 
1133                                                                  "SmallFormVariants"});
1134
1135        /**
1136         * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1137         * @since 1.2
1138         */
1139        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 
1140            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", new String  [] {"Arabic Presentation Forms-B",
1141                                                                          "ArabicPresentationForms-B"});
1142
1143        /**
1144         * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
1145         * @since 1.2
1146         */
1147        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 
1148            new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 
1149                             new String  [] {"Halfwidth and Fullwidth Forms",
1150                                           "HalfwidthandFullwidthForms"});
1151
1152        /**
1153         * Constant for the "Specials" Unicode character block.
1154         * @since 1.2
1155         */
1156        public static final UnicodeBlock SPECIALS = 
1157            new UnicodeBlock("SPECIALS");
1158
1159        /**
1160         * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1161         *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1162         *             {@link #LOW_SURROGATES}. These new constants match 
1163         *             the block definitions of the Unicode Standard.
1164         *             The {@link #of(char)} and {@link #of(int)} methods
1165         *             return the new constants, not SURROGATES_AREA.
1166         */
1167        @Deprecated  
1168        public static final UnicodeBlock SURROGATES_AREA = 
1169            new UnicodeBlock("SURROGATES_AREA");
1170
1171        /**
1172         * Constant for the "Syriac" Unicode character block.
1173         * @since 1.4
1174         */
1175        public static final UnicodeBlock SYRIAC = 
1176            new UnicodeBlock("SYRIAC");
1177
1178        /**
1179         * Constant for the "Thaana" Unicode character block.
1180         * @since 1.4
1181         */
1182        public static final UnicodeBlock THAANA = 
1183            new UnicodeBlock("THAANA");
1184
1185        /** 
1186         * Constant for the "Sinhala" Unicode character block.
1187         * @since 1.4
1188         */
1189        public static final UnicodeBlock SINHALA = 
1190            new UnicodeBlock("SINHALA");
1191
1192        /**
1193         * Constant for the "Myanmar" Unicode character block.
1194         * @since 1.4
1195         */
1196        public static final UnicodeBlock MYANMAR = 
1197            new UnicodeBlock("MYANMAR");
1198
1199        /**
1200         * Constant for the "Ethiopic" Unicode character block.
1201         * @since 1.4
1202         */
1203        public static final UnicodeBlock ETHIOPIC = 
1204            new UnicodeBlock("ETHIOPIC");
1205
1206        /**
1207         * Constant for the "Cherokee" Unicode character block.
1208         * @since 1.4
1209         */
1210        public static final UnicodeBlock CHEROKEE = 
1211            new UnicodeBlock("CHEROKEE");
1212
1213        /**
1214         * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1215         * @since 1.4
1216         */
1217        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 
1218            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1219                             new String  [] {"Unified Canadian Aboriginal Syllabics",
1220                                           "UnifiedCanadianAboriginalSyllabics"});
1221
1222        /**
1223         * Constant for the "Ogham" Unicode character block.
1224         * @since 1.4
1225         */
1226        public static final UnicodeBlock OGHAM = 
1227                             new UnicodeBlock("OGHAM");
1228
1229        /**
1230         * Constant for the "Runic" Unicode character block.
1231         * @since 1.4
1232         */
1233        public static final UnicodeBlock RUNIC = 
1234                             new UnicodeBlock("RUNIC");
1235
1236        /**
1237         * Constant for the "Khmer" Unicode character block.
1238         * @since 1.4
1239         */
1240        public static final UnicodeBlock KHMER = 
1241                             new UnicodeBlock("KHMER");
1242
1243        /**
1244         * Constant for the "Mongolian" Unicode character block.
1245         * @since 1.4
1246         */
1247        public static final UnicodeBlock MONGOLIAN = 
1248                             new UnicodeBlock("MONGOLIAN");
1249
1250        /**
1251         * Constant for the "Braille Patterns" Unicode character block.
1252         * @since 1.4
1253         */
1254        public static final UnicodeBlock BRAILLE_PATTERNS = 
1255            new UnicodeBlock("BRAILLE_PATTERNS", new String  [] {"Braille Patterns",
1256                                                               "BraillePatterns"});
1257
1258        /**
1259         * Constant for the "CJK Radicals Supplement" Unicode character block.
1260         * @since 1.4
1261         */
1262        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 
1263             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", new String  [] {"CJK Radicals Supplement",
1264                                                                       "CJKRadicalsSupplement"});
1265
1266        /**
1267         * Constant for the "Kangxi Radicals" Unicode character block.
1268         * @since 1.4
1269         */
1270        public static final UnicodeBlock KANGXI_RADICALS = 
1271            new UnicodeBlock("KANGXI_RADICALS", new String  [] {"Kangxi Radicals", "KangxiRadicals"});
1272
1273        /**
1274         * Constant for the "Ideographic Description Characters" Unicode character block.
1275         * @since 1.4
1276         */
1277        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1278            new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String  [] {"Ideographic Description Characters",
1279                                                                                 "IdeographicDescriptionCharacters"});
1280
1281        /**
1282         * Constant for the "Bopomofo Extended" Unicode character block.
1283         * @since 1.4
1284         */
1285        public static final UnicodeBlock BOPOMOFO_EXTENDED = 
1286            new UnicodeBlock("BOPOMOFO_EXTENDED", new String  [] {"Bopomofo Extended", 
1287                                                                "BopomofoExtended"});
1288
1289        /**
1290         * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1291         * @since 1.4
1292         */
1293        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 
1294            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String  [] {"CJK Unified Ideographs Extension A",
1295                                                                                 "CJKUnifiedIdeographsExtensionA"});
1296
1297        /**
1298         * Constant for the "Yi Syllables" Unicode character block.
1299         * @since 1.4
1300         */
1301        public static final UnicodeBlock YI_SYLLABLES = 
1302            new UnicodeBlock("YI_SYLLABLES", new String  [] {"Yi Syllables", "YiSyllables"});
1303
1304        /**
1305         * Constant for the "Yi Radicals" Unicode character block.
1306         * @since 1.4
1307         */
1308        public static final UnicodeBlock YI_RADICALS =
1309            new UnicodeBlock("YI_RADICALS", new String  [] {"Yi Radicals", "YiRadicals"});
1310        
1311
1312        /**
1313         * Constant for the "Cyrillic Supplementary" Unicode character block.
1314         * @since 1.5
1315         */
1316        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 
1317            new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", new String  [] {"Cyrillic Supplementary",
1318                                                                     "CyrillicSupplementary"});
1319
1320        /**
1321         * Constant for the "Tagalog" Unicode character block.
1322         * @since 1.5
1323         */
1324        public static final UnicodeBlock TAGALOG = 
1325            new UnicodeBlock("TAGALOG");
1326
1327        /**
1328         * Constant for the "Hanunoo" Unicode character block.
1329         * @since 1.5
1330         */
1331        public static final UnicodeBlock HANUNOO = 
1332            new UnicodeBlock("HANUNOO");
1333
1334        /**
1335         * Constant for the "Buhid" Unicode character block.
1336         * @since 1.5
1337         */
1338        public static final UnicodeBlock BUHID = 
1339            new UnicodeBlock("BUHID");
1340
1341        /**
1342         * Constant for the "Tagbanwa" Unicode character block.
1343         * @since 1.5
1344         */
1345        public static final UnicodeBlock TAGBANWA = 
1346            new UnicodeBlock("TAGBANWA");
1347
1348        /**
1349         * Constant for the "Limbu" Unicode character block.
1350         * @since 1.5
1351         */
1352        public static final UnicodeBlock LIMBU = 
1353            new UnicodeBlock("LIMBU");
1354
1355        /**
1356         * Constant for the "Tai Le" Unicode character block.
1357         * @since 1.5
1358         */
1359        public static final UnicodeBlock TAI_LE = 
1360            new UnicodeBlock("TAI_LE", new String  [] {"Tai Le", "TaiLe"});
1361
1362        /**
1363         * Constant for the "Khmer Symbols" Unicode character block.
1364         * @since 1.5
1365         */
1366        public static final UnicodeBlock KHMER_SYMBOLS = 
1367            new UnicodeBlock("KHMER_SYMBOLS", new String  [] {"Khmer Symbols", "KhmerSymbols"});
1368
1369        /**
1370         * Constant for the "Phonetic Extensions" Unicode character block.
1371         * @since 1.5
1372         */
1373        public static final UnicodeBlock PHONETIC_EXTENSIONS = 
1374            new UnicodeBlock("PHONETIC_EXTENSIONS", new String  [] {"Phonetic Extensions", "PhoneticExtensions"});
1375
1376        /**
1377         * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1378         * @since 1.5
1379         */
1380        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 
1381            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 
1382                             new String  []{"Miscellaneous Mathematical Symbols-A",
1383                                          "MiscellaneousMathematicalSymbols-A"});
1384
1385        /**
1386         * Constant for the "Supplemental Arrows-A" Unicode character block.
1387         * @since 1.5
1388         */
1389        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 
1390            new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", new String  [] {"Supplemental Arrows-A",
1391                                                                    "SupplementalArrows-A"});
1392
1393        /**
1394         * Constant for the "Supplemental Arrows-B" Unicode character block.
1395         * @since 1.5
1396         */
1397        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 
1398            new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", new String  [] {"Supplemental Arrows-B",
1399                                                                    "SupplementalArrows-B"});
1400
1401        /**
1402         * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
1403         * @since 1.5
1404         */
1405        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1406                = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 
1407                                   new String  [] {"Miscellaneous Mathematical Symbols-B",
1408                                                 "MiscellaneousMathematicalSymbols-B"});
1409
1410        /**
1411         * Constant for the "Supplemental Mathematical Operators" Unicode character block.
1412         * @since 1.5
1413         */
1414        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 
1415            new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 
1416                             new String  []{"Supplemental Mathematical Operators",
1417                                          "SupplementalMathematicalOperators"} );
1418
1419        /**
1420         * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
1421         * @since 1.5
1422         */
1423        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 
1424            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String  [] {"Miscellaneous Symbols and Arrows",
1425                                                                               "MiscellaneousSymbolsandArrows"});
1426
1427        /**
1428         * Constant for the "Katakana Phonetic Extensions" Unicode character block.
1429         * @since 1.5
1430         */
1431        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 
1432            new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", new String  [] {"Katakana Phonetic Extensions",
1433                                                                           "KatakanaPhoneticExtensions"});
1434
1435        /**
1436         * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1437         * @since 1.5
1438         */
1439        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 
1440            new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", new String  [] {"Yijing Hexagram Symbols",
1441                                                                      "YijingHexagramSymbols"});
1442
1443        /**
1444         * Constant for the "Variation Selectors" Unicode character block.
1445         * @since 1.5
1446         */
1447        public static final UnicodeBlock VARIATION_SELECTORS = 
1448            new UnicodeBlock("VARIATION_SELECTORS", new String  [] {"Variation Selectors", "VariationSelectors"});
1449
1450        /**
1451         * Constant for the "Linear B Syllabary" Unicode character block.
1452         * @since 1.5
1453         */
1454        public static final UnicodeBlock LINEAR_B_SYLLABARY = 
1455            new UnicodeBlock("LINEAR_B_SYLLABARY", new String  [] {"Linear B Syllabary", "LinearBSyllabary"});
1456
1457        /**
1458         * Constant for the "Linear B Ideograms" Unicode character block.
1459         * @since 1.5
1460         */
1461        public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 
1462            new UnicodeBlock("LINEAR_B_IDEOGRAMS", new String  [] {"Linear B Ideograms", "LinearBIdeograms"});
1463
1464        /**
1465         * Constant for the "Aegean Numbers" Unicode character block.
1466         * @since 1.5
1467         */
1468        public static final UnicodeBlock AEGEAN_NUMBERS = 
1469            new UnicodeBlock("AEGEAN_NUMBERS", new String  [] {"Aegean Numbers", "AegeanNumbers"});
1470
1471        /**
1472         * Constant for the "Old Italic" Unicode character block.
1473         * @since 1.5
1474         */
1475        public static final UnicodeBlock OLD_ITALIC = 
1476            new UnicodeBlock("OLD_ITALIC", new String  [] {"Old Italic", "OldItalic"});
1477
1478        /**
1479         * Constant for the "Gothic" Unicode character block.
1480         * @since 1.5
1481         */
1482        public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
1483
1484        /**
1485         * Constant for the "Ugaritic" Unicode character block.
1486         * @since 1.5
1487         */
1488        public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
1489
1490        /**
1491         * Constant for the "Deseret" Unicode character block.
1492         * @since 1.5
1493         */
1494        public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
1495
1496        /**
1497         * Constant for the "Shavian" Unicode character block.
1498         * @since 1.5
1499         */
1500        public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
1501
1502        /**
1503         * Constant for the "Osmanya" Unicode character block.
1504         * @since 1.5
1505         */
1506        public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
1507
1508        /**
1509         * Constant for the "Cypriot Syllabary" Unicode character block.
1510         * @since 1.5
1511         */
1512        public static final UnicodeBlock CYPRIOT_SYLLABARY = 
1513            new UnicodeBlock("CYPRIOT_SYLLABARY", new String  [] {"Cypriot Syllabary", "CypriotSyllabary"});
1514
1515        /**
1516         * Constant for the "Byzantine Musical Symbols" Unicode character block.
1517         * @since 1.5
1518         */
1519        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 
1520            new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", new String  [] {"Byzantine Musical Symbols",
1521                                                                        "ByzantineMusicalSymbols"});
1522
1523        /**
1524         * Constant for the "Musical Symbols" Unicode character block.
1525         * @since 1.5
1526         */
1527        public static final UnicodeBlock MUSICAL_SYMBOLS = 
1528            new UnicodeBlock("MUSICAL_SYMBOLS", new String  [] {"Musical Symbols", "MusicalSymbols"});
1529
1530        /**
1531         * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1532         * @since 1.5
1533         */
1534        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 
1535            new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", new String  [] {"Tai Xuan Jing Symbols",
1536                                                                     "TaiXuanJingSymbols"});
1537
1538        /**
1539         * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
1540         * @since 1.5
1541         */
1542        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 
1543            new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 
1544                             new String  [] {"Mathematical Alphanumeric Symbols", "MathematicalAlphanumericSymbols"});
1545
1546        /**
1547         * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
1548         * @since 1.5
1549         */
1550        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 
1551            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 
1552                             new String  [] {"CJK Unified Ideographs Extension B", "CJKUnifiedIdeographsExtensionB"});
1553
1554        /**
1555         * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1556         * @since 1.5
1557         */
1558        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 
1559            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1560                             new String  []{"CJK Compatibility Ideographs Supplement",
1561                                          "CJKCompatibilityIdeographsSupplement"});
1562
1563        /**
1564         * Constant for the "Tags" Unicode character block.
1565         * @since 1.5
1566         */
1567        public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
1568
1569        /**
1570         * Constant for the "Variation Selectors Supplement" Unicode character block.
1571         * @since 1.5
1572         */
1573        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 
1574            new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", new String  [] {"Variation Selectors Supplement",
1575                                                                             "VariationSelectorsSupplement"});
1576
1577        /**
1578         * Constant for the "Supplementary Private Use Area-A" Unicode character block.
1579         * @since 1.5
1580         */
1581        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 
1582            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 
1583                             new String  [] {"Supplementary Private Use Area-A", 
1584                                           "SupplementaryPrivateUseArea-A"});
1585
1586        /**
1587         * Constant for the "Supplementary Private Use Area-B" Unicode character block.
1588         * @since 1.5
1589         */
1590        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 
1591            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1592                             new String  [] {"Supplementary Private Use Area-B",
1593                                           "SupplementaryPrivateUseArea-B"});
1594
1595        /**
1596         * Constant for the "High Surrogates" Unicode character block.
1597         * This block represents codepoint values in the high surrogate
1598         * range: 0xD800 through 0xDB7F
1599         *
1600         * @since 1.5
1601         */
1602        public static final UnicodeBlock HIGH_SURROGATES = 
1603            new UnicodeBlock("HIGH_SURROGATES", new String  [] {"High Surrogates", "HighSurrogates"});
1604
1605        /**
1606         * Constant for the "High Private Use Surrogates" Unicode character block.
1607         * This block represents codepoint values in the high surrogate
1608         * range: 0xDB80 through 0xDBFF
1609         *
1610         * @since 1.5
1611         */
1612        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 
1613            new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", new String  [] { "High Private Use Surrogates",
1614                                                                           "HighPrivateUseSurrogates"});
1615
1616        /**
1617         * Constant for the "Low Surrogates" Unicode character block.
1618         * This block represents codepoint values in the high surrogate
1619         * range: 0xDC00 through 0xDFFF
1620         *
1621         * @since 1.5
1622         */
1623        public static final UnicodeBlock LOW_SURROGATES = 
1624            new UnicodeBlock("LOW_SURROGATES", new String  [] {"Low Surrogates", "LowSurrogates"});
1625     
1626        private static final int blockStarts[] = {
1627            0x0000, // Basic Latin
1628            0x0080, // Latin-1 Supplement
1629            0x0100, // Latin Extended-A
1630            0x0180, // Latin Extended-B
1631            0x0250, // IPA Extensions
1632            0x02B0, // Spacing Modifier Letters
1633            0x0300, // Combining Diacritical Marks
1634            0x0370, // Greek and Coptic
1635            0x0400, // Cyrillic
1636            0x0500, // Cyrillic Supplementary
1637            0x0530, // Armenian
1638            0x0590, // Hebrew
1639            0x0600, // Arabic
1640            0x0700, // Syriac
1641            0x0750, // unassigned
1642            0x0780, // Thaana
1643            0x07C0, // unassigned
1644            0x0900, // Devanagari
1645            0x0980, // Bengali
1646            0x0A00, // Gurmukhi
1647            0x0A80, // Gujarati
1648            0x0B00, // Oriya
1649            0x0B80, // Tamil
1650            0x0C00, // Telugu
1651            0x0C80, // Kannada
1652            0x0D00, // Malayalam
1653            0x0D80, // Sinhala
1654            0x0E00, // Thai
1655            0x0E80, // Lao
1656            0x0F00, // Tibetan
1657            0x1000, // Myanmar
1658            0x10A0, // Georgian
1659            0x1100, // Hangul Jamo
1660            0x1200, // Ethiopic
1661            0x1380, // unassigned
1662            0x13A0, // Cherokee
1663            0x1400, // Unified Canadian Aboriginal Syllabics
1664            0x1680, // Ogham
1665            0x16A0, // Runic
1666            0x1700, // Tagalog
1667            0x1720, // Hanunoo
1668            0x1740, // Buhid
1669            0x1760, // Tagbanwa
1670            0x1780, // Khmer
1671            0x1800, // Mongolian
1672            0x18B0, // unassigned
1673            0x1900, // Limbu
1674            0x1950, // Tai Le
1675            0x1980, // unassigned
1676            0x19E0, // Khmer Symbols
1677            0x1A00, // unassigned
1678            0x1D00, // Phonetic Extensions
1679            0x1D80, // unassigned
1680            0x1E00, // Latin Extended Additional
1681            0x1F00, // Greek Extended
1682            0x2000, // General Punctuation
1683            0x2070, // Superscripts and Subscripts
1684            0x20A0, // Currency Symbols
1685            0x20D0, // Combining Diacritical Marks for Symbols
1686            0x2100, // Letterlike Symbols
1687            0x2150, // Number Forms
1688            0x2190, // Arrows
1689            0x2200, // Mathematical Operators
1690            0x2300, // Miscellaneous Technical
1691            0x2400, // Control Pictures
1692            0x2440, // Optical Character Recognition
1693            0x2460, // Enclosed Alphanumerics
1694            0x2500, // Box Drawing
1695            0x2580, // Block Elements
1696            0x25A0, // Geometric Shapes
1697            0x2600, // Miscellaneous Symbols
1698            0x2700, // Dingbats
1699            0x27C0, // Miscellaneous Mathematical Symbols-A
1700            0x27F0, // Supplemental Arrows-A
1701            0x2800, // Braille Patterns
1702            0x2900, // Supplemental Arrows-B
1703            0x2980, // Miscellaneous Mathematical Symbols-B
1704            0x2A00, // Supplemental Mathematical Operators
1705            0x2B00, // Miscellaneous Symbols and Arrows
1706            0x2C00, // unassigned
1707            0x2E80, // CJK Radicals Supplement
1708            0x2F00, // Kangxi Radicals
1709            0x2FE0, // unassigned
1710            0x2FF0, // Ideographic Description Characters
1711            0x3000, // CJK Symbols and Punctuation
1712            0x3040, // Hiragana
1713            0x30A0, // Katakana
1714            0x3100, // Bopomofo
1715            0x3130, // Hangul Compatibility Jamo
1716            0x3190, // Kanbun
1717            0x31A0, // Bopomofo Extended
1718            0x31C0, // unassigned
1719            0x31F0, // Katakana Phonetic Extensions
1720            0x3200, // Enclosed CJK Letters and Months
1721            0x3300, // CJK Compatibility
1722            0x3400, // CJK Unified Ideographs Extension A
1723            0x4DC0, // Yijing Hexagram Symbols
1724            0x4E00, // CJK Unified Ideographs
1725            0xA000, // Yi Syllables
1726            0xA490, // Yi Radicals
1727            0xA4D0, // unassigned
1728            0xAC00, // Hangul Syllables
1729            0xD7B0, // unassigned
1730            0xD800, // High Surrogates
1731            0xDB80, // High Private Use Surrogates
1732            0xDC00, // Low Surrogates
1733            0xE000, // Private Use
1734            0xF900, // CJK Compatibility Ideographs
1735            0xFB00, // Alphabetic Presentation Forms
1736            0xFB50, // Arabic Presentation Forms-A
1737            0xFE00, // Variation Selectors
1738            0xFE10, // unassigned
1739            0xFE20, // Combining Half Marks
1740            0xFE30, // CJK Compatibility Forms
1741            0xFE50, // Small Form Variants
1742            0xFE70, // Arabic Presentation Forms-B
1743            0xFF00, // Halfwidth and Fullwidth Forms
1744            0xFFF0, // Specials
1745            0x10000, // Linear B Syllabary
1746            0x10080, // Linear B Ideograms
1747            0x10100, // Aegean Numbers
1748            0x10140, // unassigned
1749            0x10300, // Old Italic
1750            0x10330, // Gothic
1751            0x10350, // unassigned
1752            0x10380, // Ugaritic
1753            0x103A0, // unassigned
1754            0x10400, // Deseret
1755            0x10450, // Shavian
1756            0x10480, // Osmanya
1757            0x104B0, // unassigned
1758            0x10800, // Cypriot Syllabary
1759            0x10840, // unassigned
1760            0x1D000, // Byzantine Musical Symbols
1761            0x1D100, // Musical Symbols
1762            0x1D200, // unassigned
1763            0x1D300, // Tai Xuan Jing Symbols
1764            0x1D360, // unassigned
1765            0x1D400, // Mathematical Alphanumeric Symbols
1766            0x1D800, // unassigned
1767            0x20000, // CJK Unified Ideographs Extension B
1768            0x2A6E0, // unassigned
1769            0x2F800, // CJK Compatibility Ideographs Supplement
1770            0x2FA20, // unassigned
1771            0xE0000, // Tags
1772            0xE0080, // unassigned
1773            0xE0100, // Variation Selectors Supplement
1774            0xE01F0, // unassigned
1775            0xF0000, // Supplementary Private Use Area-A
1776            0x100000, // Supplementary Private Use Area-B
1777        };
1778
1779        private static final UnicodeBlock[] blocks = {
1780            BASIC_LATIN,
1781            LATIN_1_SUPPLEMENT,
1782            LATIN_EXTENDED_A,
1783            LATIN_EXTENDED_B,
1784            IPA_EXTENSIONS,
1785            SPACING_MODIFIER_LETTERS,
1786            COMBINING_DIACRITICAL_MARKS,
1787            GREEK,
1788            CYRILLIC,
1789            CYRILLIC_SUPPLEMENTARY,
1790            ARMENIAN,
1791            HEBREW,
1792            ARABIC,
1793            SYRIAC,
1794            null,
1795            THAANA,
1796            null,
1797            DEVANAGARI,
1798            BENGALI,
1799            GURMUKHI,
1800            GUJARATI,
1801            ORIYA,
1802            TAMIL,
1803            TELUGU,
1804            KANNADA,
1805            MALAYALAM,
1806            SINHALA,
1807            THAI,
1808            LAO,
1809            TIBETAN,
1810            MYANMAR,
1811            GEORGIAN,
1812            HANGUL_JAMO,
1813            ETHIOPIC,
1814            null,
1815            CHEROKEE,
1816            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1817            OGHAM,
1818            RUNIC,
1819            TAGALOG,
1820            HANUNOO,
1821            BUHID,
1822            TAGBANWA,
1823            KHMER,
1824            MONGOLIAN,
1825            null,
1826            LIMBU,
1827            TAI_LE,
1828            null,
1829            KHMER_SYMBOLS,
1830            null,
1831            PHONETIC_EXTENSIONS,
1832            null,
1833            LATIN_EXTENDED_ADDITIONAL,
1834            GREEK_EXTENDED,
1835            GENERAL_PUNCTUATION,
1836            SUPERSCRIPTS_AND_SUBSCRIPTS,
1837            CURRENCY_SYMBOLS,
1838            COMBINING_MARKS_FOR_SYMBOLS,
1839            LETTERLIKE_SYMBOLS,
1840            NUMBER_FORMS,
1841            ARROWS,
1842            MATHEMATICAL_OPERATORS,
1843            MISCELLANEOUS_TECHNICAL,
1844            CONTROL_PICTURES,
1845            OPTICAL_CHARACTER_RECOGNITION,
1846            ENCLOSED_ALPHANUMERICS,
1847            BOX_DRAWING,
1848            BLOCK_ELEMENTS,
1849            GEOMETRIC_SHAPES,
1850            MISCELLANEOUS_SYMBOLS,
1851            DINGBATS,
1852            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1853            SUPPLEMENTAL_ARROWS_A,
1854            BRAILLE_PATTERNS,
1855            SUPPLEMENTAL_ARROWS_B,
1856            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1857            SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1858            MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1859            null,
1860            CJK_RADICALS_SUPPLEMENT,
1861            KANGXI_RADICALS,
1862            null,
1863            IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1864            CJK_SYMBOLS_AND_PUNCTUATION,
1865            HIRAGANA,
1866            KATAKANA,
1867            BOPOMOFO,
1868            HANGUL_COMPATIBILITY_JAMO,
1869            KANBUN,
1870            BOPOMOFO_EXTENDED,
1871            null,
1872            KATAKANA_PHONETIC_EXTENSIONS,
1873            ENCLOSED_CJK_LETTERS_AND_MONTHS,
1874            CJK_COMPATIBILITY,
1875            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1876            YIJING_HEXAGRAM_SYMBOLS,
1877            CJK_UNIFIED_IDEOGRAPHS,
1878            YI_SYLLABLES,
1879            YI_RADICALS,
1880            null,
1881            HANGUL_SYLLABLES,
1882            null,
1883            HIGH_SURROGATES,
1884            HIGH_PRIVATE_USE_SURROGATES,
1885            LOW_SURROGATES,
1886            PRIVATE_USE_AREA,
1887            CJK_COMPATIBILITY_IDEOGRAPHS,
1888            ALPHABETIC_PRESENTATION_FORMS,
1889            ARABIC_PRESENTATION_FORMS_A,
1890            VARIATION_SELECTORS,
1891            null,
1892            COMBINING_HALF_MARKS,
1893            CJK_COMPATIBILITY_FORMS,
1894            SMALL_FORM_VARIANTS,
1895            ARABIC_PRESENTATION_FORMS_B,
1896            HALFWIDTH_AND_FULLWIDTH_FORMS,
1897            SPECIALS,
1898            LINEAR_B_SYLLABARY,
1899            LINEAR_B_IDEOGRAMS,
1900            AEGEAN_NUMBERS,
1901            null,
1902            OLD_ITALIC,
1903            GOTHIC,
1904            null,
1905            UGARITIC,
1906            null,
1907            DESERET,
1908            SHAVIAN,
1909            OSMANYA,
1910            null,
1911            CYPRIOT_SYLLABARY,
1912            null,
1913            BYZANTINE_MUSICAL_SYMBOLS,
1914            MUSICAL_SYMBOLS,
1915            null,
1916            TAI_XUAN_JING_SYMBOLS,
1917            null,
1918            MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1919            null,
1920            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1921            null,
1922            CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1923            null,
1924            TAGS,
1925            null,
1926            VARIATION_SELECTORS_SUPPLEMENT,
1927            null,
1928            SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1929            SUPPLEMENTARY_PRIVATE_USE_AREA_B
1930        };
1931
1932
1933        /**
1934         * Returns the object representing the Unicode block containing the
1935         * given character, or <code>null</code> if the character is not a
1936         * member of a defined block.
1937         *
1938                 * <p><b>Note:</b> This method cannot handle <a
1939                 * HREF="Character.html#supplementary"> supplementary
1940                 * characters</a>. To support all Unicode characters,
1941                 * including supplementary characters, use the {@link
1942                 * #of(int)} method.
1943         *
1944         * @param   c  The character in question
1945         * @return  The <code>UnicodeBlock</code> instance representing the
1946         *          Unicode block of which this character is a member, or
1947         *          <code>null</code> if the character is not a member of any
1948         *          Unicode block
1949         */
1950        public static UnicodeBlock of(char c) {
1951            return of((int)c);
1952        }
1953
1954
1955        /**
1956         * Returns the object representing the Unicode block
1957         * containing the given character (Unicode code point), or
1958         * <code>null</code> if the character is not a member of a
1959         * defined block.
1960         *
1961                 * @param   codePoint the character (Unicode code point) in question.
1962         * @return  The <code>UnicodeBlock</code> instance representing the
1963         *          Unicode block of which this character is a member, or
1964         *          <code>null</code> if the character is not a member of any
1965         *          Unicode block
1966                 * @exception IllegalArgumentException if the specified
1967                 * <code>codePoint</code> is an invalid Unicode code point.
1968                 * @see Character#isValidCodePoint(int)
1969                 * @since   1.5
1970         */
1971        public static UnicodeBlock of(int codePoint) {
1972            if (!isValidCodePoint(codePoint)) {
1973                throw new IllegalArgumentException  ();
1974            }
1975
1976            int top, bottom, current;
1977            bottom = 0;
1978            top = blockStarts.length;
1979            current = top/2;
1980
1981            // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
1982            while (top - bottom > 1) {
1983                if (codePoint >= blockStarts[current]) {
1984                    bottom = current;
1985                } else {
1986                    top = current;
1987                }
1988                current = (top + bottom) / 2;
1989            }
1990            return blocks[current];
1991        }
1992
1993        /**
1994         * Returns the UnicodeBlock with the given name. Block
1995         * names are determined by The Unicode Standard. The file
1996         * Blocks-&lt;version&gt;.txt defines blocks for a particular
1997         * version of the standard. The {@link Character} class specifies
1998         * the version of the standard that it supports.
1999         * <p>
2000         * This method accepts block names in the following forms:
2001         * <ol>
2002         * <li> Canonical block names as defined by the Unicode Standard.
2003         * For example, the standard defines a "Basic Latin" block. Therefore, this
2004         * method accepts "Basic Latin" as a valid block name. The documentation of 
2005         * each UnicodeBlock provides the canonical name.
2006         * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
2007         * is a valid block name for the "Basic Latin" block.
2008         * <li>The text representation of each constant UnicodeBlock identifier.
2009         * For example, this method will return the {@link #BASIC_LATIN} block if
2010         * provided with the "BASIC_LATIN" name. This form replaces all spaces and
2011         *  hyphens in the canonical name with underscores.
2012         * </ol>
2013         * Finally, character case is ignored for all of the valid block name forms.
2014         * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
2015         * The en_US locale's case mapping rules are used to provide case-insensitive
2016         * string comparisons for block name validation.
2017         * <p>
2018         * If the Unicode Standard changes block names, both the previous and
2019         * current names will be accepted.
2020         *
2021         * @param blockName A <code>UnicodeBlock</code> name.
2022         * @return The <code>UnicodeBlock</code> instance identified
2023         *         by <code>blockName</code>
2024         * @throws IllegalArgumentException if <code>blockName</code> is an
2025         *         invalid name
2026         * @throws NullPointerException if <code>blockName</code> is null
2027         * @since 1.5
2028         */
2029        public static final UnicodeBlock forName(String   blockName) {
2030            UnicodeBlock block = (UnicodeBlock)map.get(blockName.toUpperCase(Locale.US));
2031            if (block == null) {
2032                throw new IllegalArgumentException  ();
2033            }
2034            return block;
2035        }
2036    }
2037
2038
2039    /**
2040     * The value of the <code>Character</code>.
2041     *
2042     * @serial
2043     */
2044    private final char value;
2045
2046    /** use serialVersionUID from JDK 1.0.2 for interoperability */
2047    private static final long serialVersionUID = 3786198910865385080L;
2048
2049    /**
2050     * Constructs a newly allocated <code>Character</code> object that
2051     * represents the specified <code>char</code> value.
2052     *
2053     * @param  value   the value to be represented by the 
2054     *                  <code>Character</code> object.
2055     */
2056    public Character(char value) {
2057        this.value = value;
2058    }
2059
2060    private static class CharacterCache {
2061    private CharacterCache(){}
2062
2063    static final Character   cache[] = new Character  [127 + 1];
2064
2065    static {
2066        for(int i = 0; i < cache.length; i++)
2067        cache[i] = new Character  ((char)i);
2068    }
2069    }
2070
2071    /**
2072     * Returns a <tt>Character</tt> instance representing the specified
2073     * <tt>char</tt> value.
2074     * If a new <tt>Character</tt> instance is not required, this method
2075     * should generally be used in preference to the constructor
2076     * {@link #Character(char)}, as this method is likely to yield
2077     * significantly better space and time performance by caching
2078     * frequently requested values.
2079     *
2080     * @param  c a char value.
2081     * @return a <tt>Character</tt> instance representing <tt>c</tt>.
2082     * @since  1.5
2083     */
2084    public static Character   valueOf(char c) {
2085    if(c <= 127) { // must cache
2086        return CharacterCache.cache[(int)c];
2087    }
2088        return new Character  (c);
2089    }
2090
2091    /**
2092     * Returns the value of this <code>Character</code> object.
2093     * @return  the primitive <code>char</code> value represented by
2094     *          this object.
2095     */
2096    public char charValue() {
2097        return value;
2098    }
2099
2100    /**
2101     * Returns a hash code for this <code>Character</code>.
2102     * @return  a hash code value for this object.
2103     */
2104    public int hashCode() {
2105        return (int)value;
2106    }
2107
2108    /**
2109     * Compares this object against the specified object.
2110     * The result is <code>true</code> if and only if the argument is not
2111     * <code>null</code> and is a <code>Character</code> object that
2112     * represents the same <code>char</code> value as this object.
2113     *
2114     * @param   obj   the object to compare with.
2115     * @return  <code>true</code> if the objects are the same;
2116     *          <code>false</code> otherwise.
2117     */
2118    public boolean equals(Object   obj) {
2119        if (obj instanceof Character  ) {
2120            return value == ((Character  )obj).charValue();
2121        }
2122        return false;
2123    }
2124
2125    /**
2126     * Returns a <code>String</code> object representing this
2127     * <code>Character</code>'s value.  The result is a string of
2128     * length 1 whose sole component is the primitive
2129     * <code>char</code> value represented by this
2130     * <code>Character</code> object.
2131     *
2132     * @return  a string representation of this object.
2133     */
2134    public String   toString() {
2135        char buf[] = {value};
2136        return String.valueOf(buf);
2137    }
2138
2139    /**
2140     * Returns a <code>String</code> object representing the
2141     * specified <code>char</code>.  The result is a string of length
2142     * 1 consisting solely of the specified <code>char</code>.
2143     *
2144     * @param c the <code>char</code> to be converted
2145     * @return the string representation of the specified <code>char</code>
2146     * @since 1.4
2147     */
2148    public static String   toString(char c) {
2149        return String.valueOf(c);
2150    }
2151
2152    // Maximum character handled by internal fast-path code which
2153    // avoids initializing large tables.
2154    // Note: performance of this "fast-path" code may be sub-optimal
2155    // in negative cases for some accessors due to complicated ranges.
2156    // Should revisit after optimization of table initialization.
2157
2158    private static final int FAST_PATH_MAX = 255;
2159
2160    /**
2161     * Provide the character plane to which this codepoint belongs.
2162     * 
2163     * @param ch the codepoint
2164     * @return the plane of the codepoint argument
2165     * @since 1.5
2166     */
2167    private static int getPlane(int ch) {
2168        return (ch >>> 16);
2169    }
2170
2171    /**
2172     * Determines whether the specified code point is a valid Unicode
2173     * code point value in the range of <code>0x0000</code> to
2174     * <code>0x10FFFF</code> inclusive. This method is equivalent to
2175     * the expression:
2176     *
2177     * <blockquote><pre>
2178     * codePoint >= 0x0000 && codePoint <= 0x10FFFF
2179     * </pre></blockquote>
2180     *
2181     * @param  codePoint the Unicode code point to be tested
2182     * @return <code>true</code> if the specified code point value
2183     * is a valid code point value;
2184     * <code>false</code> otherwise.
2185     * @since  1.5
2186     */
2187    public static boolean isValidCodePoint(int codePoint) {
2188        return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
2189    }
2190
2191    /**
2192     * Determines whether the specified character (Unicode code point)
2193     * is in the supplementary character range. The method call is
2194     * equivalent to the expression:
2195     * <blockquote><pre>
2196     * codePoint >= 0x10000 && codePoint <= 0x10ffff
2197     * </pre></blockquote>
2198     *
2199     * @param  codePoint the character (Unicode code point) to be tested
2200     * @return <code>true</code> if the specified character is in the Unicode
2201     *         supplementary character range; <code>false</code> otherwise.
2202     * @since  1.5
2203     */
2204    public static boolean isSupplementaryCodePoint(int codePoint) {
2205        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2206            && codePoint <= MAX_CODE_POINT;
2207    }
2208
2209    /**
2210     * Determines if the given <code>char</code> value is a
2211     * high-surrogate code unit (also known as <i>leading-surrogate
2212     * code unit</i>). Such values do not represent characters by
2213     * themselves, but are used in the representation of <a
2214     * HREF="#supplementary">supplementary characters</a> in the
2215     * UTF-16 encoding.
2216     *
2217     * <p>This method returns <code>true</code> if and only if
2218     * <blockquote><pre>ch >= '&#92;uD800' && ch <= '&#92;uDBFF'
2219     * </pre></blockquote>
2220     * is <code>true</code>.
2221     *
2222     * @param   ch   the <code>char</code> value to be tested.
2223     * @return  <code>true</code> if the <code>char</code> value
2224     *          is between '&#92;uD800' and '&#92;uDBFF' inclusive;
2225     *          <code>false</code> otherwise.
2226     * @see     java.lang.Character#isLowSurrogate(char)
2227     * @see     Character.UnicodeBlock#of(int)
2228     * @since   1.5
2229     */
2230    public static boolean isHighSurrogate(char ch) {
2231        return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2232    }
2233    
2234    /**
2235     * Determines if the given <code>char</code> value is a
2236     * low-surrogate code unit (also known as <i>trailing-surrogate code
2237     * unit</i>). Such values do not represent characters by themselves,
2238     * but are used in the representation of <a
2239     * HREF="#supplementary">supplementary characters</a> in the UTF-16 encoding.
2240     *
2241     * <p> This method returns <code>true</code> if and only if
2242     * <blockquote><pre>ch >= '&#92;uDC00' && ch <= '&#92;uDFFF'
2243     * </pre></blockquote> is <code>true</code>.
2244     *
2245     * @param   ch   the <code>char</code> value to be tested.
2246     * @return  <code>true</code> if the <code>char</code> value
2247     *          is between '&#92;uDC00' and '&#92;uDFFF' inclusive;
2248     *          <code>false</code> otherwise.
2249     * @see java.lang.Character#isHighSurrogate(char)
2250     * @since   1.5
2251     */
2252    public static boolean isLowSurrogate(char ch) {
2253        return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2254    }
2255
2256    /**
2257     * Determines whether the specified pair of <code>char</code>
2258     * values is a valid surrogate pair. This method is equivalent to
2259     * the expression:
2260     * <blockquote><pre>
2261     * isHighSurrogate(high) && isLowSurrogate(low)
2262     * </pre></blockquote>
2263     *
2264     * @param  high the high-surrogate code value to be tested
2265     * @param  low the low-surrogate code value to be tested
2266     * @return <code>true</code> if the specified high and
2267     * low-surrogate code values represent a valid surrogate pair;
2268     * <code>false</code> otherwise.
2269     * @since  1.5
2270     */
2271    public static boolean isSurrogatePair(char high, char low) {
2272        return isHighSurrogate(high) && isLowSurrogate(low);
2273    }
2274
2275    /**
2276     * Determines the number of <code>char</code> values needed to
2277     * represent the specified character (Unicode code point). If the
2278     * specified character is equal to or greater than 0x10000, then
2279     * the method returns 2. Otherwise, the method returns 1.
2280     *
2281     * <p>This method doesn't validate the specified character to be a
2282     * valid Unicode code point. The caller must validate the
2283     * character value using {@link #isValidCodePoint(int) isValidCodePoint}
2284     * if necessary.
2285     *
2286     * @param   codePoint the character (Unicode code point) to be tested.
2287     * @return  2 if the character is a valid supplementary character; 1 otherwise.
2288     * @see     #isSupplementaryCodePoint(int)
2289     * @since   1.5
2290     */
2291    public static int charCount(int codePoint) {
2292        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1;
2293    }
2294
2295    /**
2296     * Converts the specified surrogate pair to its supplementary code
2297     * point value. This method does not validate the specified
2298     * surrogate pair. The caller must validate it using {@link
2299     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
2300     *
2301     * @param  high the high-surrogate code unit
2302     * @param  low the low-surrogate code unit
2303     * @return the supplementary code point composed from the
2304     *         specified surrogate pair.
2305     * @since  1.5
2306     */
2307    public static int toCodePoint(char high, char low) {
2308        return ((high - MIN_HIGH_SURROGATE) << 10)
2309            + (low - MIN_LOW_SURROGATE) + MIN_SUPPLEMENTARY_CODE_POINT;
2310    }
2311
2312    /**
2313     * Returns the code point at the given index of the
2314     * <code>CharSequence</code>. If the <code>char</code> value at
2315     * the given index in the <code>CharSequence</code> is in the
2316     * high-surrogate range, the following index is less than the
2317     * length of the <code>CharSequence</code>, and the
2318     * <code>char</code> value at the following index is in the
2319     * low-surrogate range, then the supplementary code point
2320     * corresponding to this surrogate pair is returned. Otherwise,
2321     * the <code>char</code> value at the given index is returned.
2322     *
2323     * @param seq a sequence of <code>char</code> values (Unicode code
2324     * units)
2325     * @param index the index to the <code>char</code> values (Unicode
2326     * code units) in <code>seq</code> to be converted
2327     * @return the Unicode code point at the given index
2328     * @exception NullPointerException if <code>seq</code> is null.
2329     * @exception IndexOutOfBoundsException if the value
2330     * <code>index</code> is negative or not less than
2331     * {@link CharSequence#length() seq.length()}.
2332     * @since  1.5
2333     */
2334    public static int codePointAt(CharSequence   seq, int index) {
2335        char c1 = seq.charAt(index++);
2336        if (isHighSurrogate(c1)) {
2337            if (index < seq.length()) {
2338                char c2 = seq.charAt(index);
2339                if (isLowSurrogate(c2)) {
2340                    return toCodePoint(c1, c2);
2341                }
2342            }
2343        }
2344        return c1;
2345    }
2346
2347    /**
2348     * Returns the code point at the given index of the
2349     * <code>char</code> array. If the <code>char</code> value at
2350     * the given index in the <code>char</code> array is in the
2351     * high-surrogate range, the following index is less than the
2352     * length of the <code>char</code> array, and the
2353     * <code>char</code> value at the following index is in the
2354     * low-surrogate range, then the supplementary code point
2355     * corresponding to this surrogate pair is returned. Otherwise,
2356     * the <code>char</code> value at the given index is returned.
2357     *
2358     * @param a the <code>char</code> array
2359     * @param index the index to the <code>char</code> values (Unicode
2360     * code units) in the <code>char</code> array to be converted
2361     * @return the Unicode code point at the given index
2362     * @exception NullPointerException if <code>a</code> is null.
2363     * @exception IndexOutOfBoundsException if the value
2364     * <code>index</code> is negative or not less than
2365     * the length of the <code>char</code> array.
2366     * @since  1.5
2367     */
2368    public static int codePointAt(char[] a, int index) {
2369    return codePointAtImpl(a, index, a.length);
2370    }
2371
2372    /**
2373     * Returns the code point at the given index of the
2374     * <code>char</code> array, where only array elements with
2375     * <code>index</code> less than <code>limit</code> can be used. If
2376     * the <code>char</code> value at the given index in the
2377     * <code>char</code> array is in the high-surrogate range, the
2378     * following index is less than the <code>limit</code>, and the
2379     * <code>char</code> value at the following index is in the
2380     * low-surrogate range, then the supplementary code point
2381     * corresponding to this surrogate pair is returned. Otherwise,
2382     * the <code>char</code> value at the given index is returned.
2383     *
2384     * @param a the <code>char</code> array
2385     * @param index the index to the <code>char</code> values (Unicode
2386     * code units) in the <code>char</code> array to be converted
2387     * @param limit the index after the last array element that can be used in the
2388     * <code>char</code> array
2389     * @return the Unicode code point at the given index
2390     * @exception NullPointerException if <code>a</code> is null.
2391     * @exception IndexOutOfBoundsException if the <code>index</code>
2392     * argument is negative or not less than the <code>limit</code>
2393     * argument, or if the <code>limit</code> argument is negative or
2394     * greater than the length of the <code>char</code> array.
2395     * @since  1.5
2396     */
2397    public static int codePointAt(char[] a, int index, int limit) {
2398    if (index >= limit || limit < 0 || limit > a.length) {
2399        throw new IndexOutOfBoundsException  ();
2400    }
2401    return codePointAtImpl(a, index, limit);
2402    }
2403
2404    static int codePointAtImpl(char[] a, int index, int limit) {
2405        char c1 = a[index++];
2406        if (isHighSurrogate(c1)) {
2407            if (index < limit) {
2408                char c2 = a[index];
2409                if (isLowSurrogate(c2)) {
2410                    return toCodePoint(c1, c2);
2411                }
2412            }
2413        }
2414        return c1;
2415    }
2416
2417    /**
2418     * Returns the code point preceding the given index of the
2419     * <code>CharSequence</code>. If the <code>char</code> value at
2420     * <code>(index - 1)</code> in the <code>CharSequence</code> is in
2421     * the low-surrogate range, <code>(index - 2)</code> is not
2422     * negative, and the <code>char</code> value at <code>(index -
2423     * 2)</code> in the <code>CharSequence</code> is in the
2424     * high-surrogate range, then the supplementary code point
2425     * corresponding to this surrogate pair is returned. Otherwise,
2426     * the <code>char</code> value at <code>(index - 1)</code> is
2427     * returned.
2428     *
2429     * @param seq the <code>CharSequence</code> instance
2430     * @param index the index following the code point that should be returned
2431     * @return the Unicode code point value before the given index.
2432     * @exception NullPointerException if <code>seq</code> is null.
2433     * @exception IndexOutOfBoundsException if the <code>index</code>
2434     * argument is less than 1 or greater than {@link
2435     * CharSequence#length() seq.length()}.
2436     * @since  1.5
2437     */
2438    public static int codePointBefore(CharSequence   seq, int index) {
2439        char c2 = seq.charAt(--index);
2440        if (isLowSurrogate(c2)) {
2441            if (index > 0) {
2442                char c1 = seq.charAt(--index);
2443                if (isHighSurrogate(c1)) {
2444                    return toCodePoint(c1, c2);
2445                }
2446            }
2447        }
2448        return c2;
2449    }
2450
2451    /**
2452     * Returns the code point preceding the given index of the
2453     * <code>char</code> array. If the <code>char</code> value at
2454     * <code>(index - 1)</code> in the <code>char</code> array is in
2455     * the low-surrogate range, <code>(index - 2)</code> is not
2456     * negative, and the <code>char</code> value at <code>(index -
2457     * 2)</code> in the <code>char</code> array is in the
2458     * high-surrogate range, then the supplementary code point
2459     * corresponding to this surrogate pair is returned. Otherwise,
2460     * the <code>char</code> value at <code>(index - 1)</code> is
2461     * returned.
2462     *
2463     * @param a the <code>char</code> array
2464     * @param index the index following the code point that should be returned
2465     * @return the Unicode code point value before the given index.
2466     * @exception NullPointerException if <code>a</code> is null.
2467     * @exception IndexOutOfBoundsException if the <code>index</code>
2468     * argument is less than 1 or greater than the length of the
2469     * <code>char</code> array
2470     * @since  1.5
2471     */
2472    public static int codePointBefore(char[] a, int index) {
2473        return codePointBeforeImpl(a, index, 0);
2474    }
2475
2476    /**
2477     * Returns the code point preceding the given index of the
2478     * <code>char</code> array, where only array elements with
2479     * <code>index</code> greater than or equal to <code>start</code>
2480     * can be used. If the <code>char</code> value at <code>(index -
2481     * 1)</code> in the <code>char</code> array is in the
2482     * low-surrogate range, <code>(index - 2)</code> is not less than
2483     * <code>start</code>, and the <code>char</code> value at
2484     * <code>(index - 2)</code> in the <code>char</code> array is in
2485     * the high-surrogate range, then the supplementary code point
2486     * corresponding to this surrogate pair is returned. Otherwise,
2487     * the <code>char</code> value at <code>(index - 1)</code> is
2488     * returned.
2489     *
2490     * @param a the <code>char</code> array
2491     * @param index the index following the code point that should be returned
2492     * @param start the index of the first array element in the
2493     * <code>char</code> array
2494     * @return the Unicode code point value before the given index.
2495     * @exception NullPointerException if <code>a</code> is null.
2496     * @exception IndexOutOfBoundsException if the <code>index</code>
2497     * argument is not greater than the <code>start</code> argument or
2498     * is greater than the length of the <code>char</code> array, or
2499     * if the <code>start</code> argument is negative or not less than
2500     * the length of the <code>char</code> array.
2501     * @since  1.5
2502     */
2503    public static int codePointBefore(char[] a, int index, int start) {
2504    if (index <= start || start < 0 || start >= a.length) {
2505        throw new IndexOutOfBoundsException  ();
2506    }
2507    return codePointBeforeImpl(a, index, start);
2508    }
2509
2510    static int codePointBeforeImpl(char[] a, int index, int start) {
2511        char c2 = a[--index];
2512        if (isLowSurrogate(c2)) {
2513            if (index > start) {
2514                char c1 = a[--index];
2515                if (isHighSurrogate(c1)) {
2516                    return toCodePoint(c1, c2);
2517                }
2518            }
2519        }
2520        return c2;
2521    }
2522
2523    /**
2524     * Converts the specified character (Unicode code point) to its
2525     * UTF-16 representation. If the specified code point is a BMP
2526     * (Basic Multilingual Plane or Plane 0) value, the same value is
2527     * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
2528     * specified code point is a supplementary character, its
2529     * surrogate values are stored in <code>dst[dstIndex]</code>
2530     * (high-surrogate) and <code>dst[dstIndex+1]</code>
2531     * (low-surrogate), and 2 is returned.
2532     *
2533     * @param  codePoint the character (Unicode code point) to be converted.
2534     * @param  dst an array of <code>char</code> in which the
2535     * <code>codePoint</code>'s UTF-16 value is stored.
2536     * @param dstIndex the start index into the <code>dst</code>
2537     * array where the converted value is stored.
2538     * @return 1 if the code point is a BMP code point, 2 if the
2539     * code point is a supplementary code point.
2540     * @exception IllegalArgumentException if the specified
2541     * <code>codePoint</code> is not a valid Unicode code point.
2542     * @exception NullPointerException if the specified <code>dst</code> is null.
2543     * @exception IndexOutOfBoundsException if <code>dstIndex</code>
2544     * is negative or not less than <code>dst.length</code>, or if
2545     * <code>dst</code> at <code>dstIndex</code> doesn't have enough
2546     * array element(s) to store the resulting <code>char</code>
2547     * value(s). (If <code>dstIndex</code> is equal to
2548     * <code>dst.length-1</code> and the specified
2549     * <code>codePoint</code> is a supplementary character, the
2550     * high-surrogate value is not stored in
2551     * <code>dst[dstIndex]</code>.)
2552     * @since  1.5
2553     */
2554    public static int toChars(int codePoint, char[] dst, int dstIndex) {
2555        if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
2556            throw new IllegalArgumentException  ();
2557        }
2558        if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
2559            dst[dstIndex] = (char) codePoint;
2560            return 1;
2561        }
2562        toSurrogates(codePoint, dst, dstIndex);
2563        return 2;
2564    }
2565
2566    /**
2567     * Converts the specified character (Unicode code point) to its
2568     * UTF-16 representation stored in a <code>char</code> array. If
2569     * the specified code point is a BMP (Basic Multilingual Plane or
2570     * Plane 0) value, the resulting <code>char</code> array has
2571     * the same value as <code>codePoint</code>. If the specified code
2572     * point is a supplementary code point, the resulting
2573     * <code>char</code> array has the corresponding surrogate pair.
2574     *
2575     * @param  codePoint a Unicode code point
2576     * @return a <code>char</code> array having
2577     *         <code>codePoint</code>'s UTF-16 representation.
2578     * @exception IllegalArgumentException if the specified
2579     * <code>codePoint</code> is not a valid Unicode code point.
2580     * @since  1.5
2581     */
2582    public static char[] toChars(int codePoint) {
2583        if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
2584            throw new IllegalArgumentException  ();
2585        }
2586        if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
2587                return new char[] { (char) codePoint };
2588        }
2589        char[] result = new char[2];
2590        toSurrogates(codePoint, result, 0);
2591        return result;
2592    }
2593
2594    static void toSurrogates(int codePoint, char[] dst, int index) {
2595        int offset = codePoint - MIN_SUPPLEMENTARY_CODE_POINT;
2596        dst[index+1] = (char)((offset & 0x3ff) + MIN_LOW_SURROGATE);
2597        dst[index] = (char)((offset >>> 10) + MIN_HIGH_SURROGATE);
2598    }
2599
2600    /**
2601     * Returns the number of Unicode code points in the text range of
2602     * the specified char sequence. The text range begins at the
2603     * specified <code>beginIndex</code> and extends to the
2604     * <code>char</code> at index <code>endIndex - 1</code>. Thus the
2605     * length (in <code>char</code>s) of the text range is
2606     * <code>endIndex-beginIndex</code>. Unpaired surrogates within
2607     * the text range count as one code point each.
2608     *
2609     * @param seq the char sequence
2610     * @param beginIndex the index to the first <code>char</code> of
2611     * the text range.
2612     * @param endIndex the index after the last <code>char</code> of
2613     * the text range.
2614     * @return the number of Unicode code points in the specified text
2615     * range
2616     * @exception NullPointerException if <code>seq</code> is null.
2617     * @exception IndexOutOfBoundsException if the
2618     * <code>beginIndex</code> is negative, or <code>endIndex</code>
2619     * is larger than the length of the given sequence, or
2620     * <code>beginIndex</code> is larger than <code>endIndex</code>.
2621     * @since  1.5
2622     */
2623    public static int codePointCount(CharSequence   seq, int beginIndex, int endIndex) {
2624    int length = seq.length();
2625    if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
2626        throw new IndexOutOfBoundsException  ();
2627    }
2628    int n = 0;
2629    for (int i = beginIndex; i < endIndex; ) {
2630        n++;
2631        if (isHighSurrogate(seq.charAt(i++))) {
2632        if (i < endIndex && isLowSurrogate(seq.charAt(i))) {
2633            i++;
2634        }
2635        }
2636    }
2637    return n;
2638    }
2639
2640    /**
2641     * Returns the number of Unicode code points in a subarray of the
2642     * <code>char</code> array argument. The <code>offset</code>
2643     * argument is the index of the first <code>char</code> of the
2644     * subarray and the <code>count</code> argument specifies the
2645     * length of the subarray in <code>char</code>s. Unpaired
2646     * surrogates within the subarray count as one code point each.
2647     *
2648     * @param a the <code>char</code> array
2649     * @param offset the index of the first <code>char</code> in the
2650     * given <code>char</code> array
2651     * @param count the length of the subarray in <code>char</code>s
2652     * @return the number of Unicode code points in the specified subarray
2653     * @exception NullPointerException if <code>a</code> is null.
2654     * @exception IndexOutOfBoundsException if <code>offset</code> or
2655     * <code>count</code> is negative, or if <code>offset +
2656     * count</code> is larger than the length of the given array.
2657     * @since  1.5
2658     */
2659    public static int codePointCount(char[] a, int offset, int count) {
2660    if (count > a.length - offset || offset < 0 || count < 0) {
2661        throw new IndexOutOfBoundsException  ();
2662    }
2663    return codePointCountImpl(a, offset, count);
2664    }
2665
2666    static int codePointCountImpl(char[] a, int offset, int count) {
2667    int endIndex = offset + count;
2668    int n = 0;
2669    for (int i = offset; i < endIndex; ) {
2670        n++;
2671        if (isHighSurrogate(a[i++])) {
2672        if (i < endIndex && isLowSurrogate(a[i])) {
2673            i++;
2674        }
2675        }
2676    }
2677    return n;
2678    }
2679
2680    /**
2681     * Returns the index within the given char sequence that is offset
2682     * from the given <code>index</code> by <code>codePointOffset</code>
2683     * code points. Unpaired surrogates within the text range given by
2684     * <code>index</code> and <code>codePointOffset</code> count as
2685     * one code point each.
2686     *
2687     * @param seq the char sequence
2688     * @param index the index to be offset
2689     * @param codePointOffset the offset in code points
2690     * @return the index within the char sequence
2691     * @exception NullPointerException if <code>seq</code> is null.
2692     * @exception IndexOutOfBoundsException if <code>index</code>
2693     *   is negative or larger then the length of the char sequence,
2694     *   or if <code>codePointOffset</code> is positive and the
2695     *   subsequence starting with <code>index</code> has fewer than
2696     *   <code>codePointOffset</code> code points, or if
2697     *   <code>codePointOffset</code> is negative and the subsequence
2698     *   before <code>index</code> has fewer than the absolute value
2699     *   of <code>codePointOffset</code> code points.
2700     * @since 1.5
2701     */
2702    public static int offsetByCodePoints(CharSequence   seq, int index,
2703                     int codePointOffset) {
2704    int length = seq.length();
2705    if (index < 0 || index > length) {
2706        throw new IndexOutOfBoundsException  ();
2707    }
2708
2709    int x = index;
2710    if (codePointOffset >= 0) {
2711        int i;
2712        for (i = 0; x < length && i < codePointOffset; i++) {
2713        if (isHighSurrogate(seq.charAt(x++))) {
2714            if (x < length && isLowSurrogate(seq.charAt(x))) {
2715            x++;
2716            }
2717        }
2718        }
2719        if (i < codePointOffset) {
2720        throw new IndexOutOfBoundsException  ();
2721        }
2722    } else {
2723        int i;
2724        for (i = codePointOffset; x > 0 && i < 0; i++) {
2725        if (isLowSurrogate(seq.charAt(--x))) {
2726            if (x > 0 && isHighSurrogate(seq.charAt(x-1))) {
2727            x--;
2728            }
2729        }
2730        }
2731        if (i < 0) {
2732        throw new IndexOutOfBoundsException  ();
2733        }
2734    }
2735    return x;
2736    }
2737
2738    /**
2739     * Returns the index within the given <code>char</code> subarray
2740     * that is offset from the given <code>index</code> by
2741     * <code>codePointOffset</code> code points. The
2742     * <code>start</code> and <code>count</code> arguments specify a
2743     * subarray of the <code>char</code> array. Unpaired surrogates
2744     * within the text range given by <code>index</code> and
2745     * <code>codePointOffset</code> count as one code point each.
2746     *
2747     * @param a the <code>char</code> array
2748     * @param start the index of the first <code>char</code> of the
2749     * subarray
2750     * @param count the length of the subarray in <code>char</code>s
2751     * @param index the index to be offset
2752     * @param codePointOffset the offset in code points
2753     * @return the index within the subarray
2754     * @exception NullPointerException if <code>a</code> is null.
2755     * @exception IndexOutOfBoundsException 
2756     *   if <code>start</code> or <code>count</code> is negative,
2757     *   or if <code>start + count</code> is larger than the length of
2758     *   the given array,
2759     *   or if <code>index</code> is less than <code>start</code> or
2760     *   larger then <code>start + count</code>,
2761     *   or if <code>codePointOffset</code> is positive and the text range
2762     *   starting with <code>index</code> and ending with <code>start
2763     *   + count - 1</code> has fewer than <code>codePointOffset</code> code
2764     *   points,
2765     *   or if <code>codePointOffset</code> is negative and the text range
2766     *   starting with <code>start</code> and ending with <code>index
2767     *   - 1</code> has fewer than the absolute value of
2768     *   <code>codePointOffset</code> code points.
2769     * @since 1.5
2770     */
2771    public static int offsetByCodePoints(char[] a, int start, int count,
2772                     int index, int codePointOffset) {
2773    if (count > a.length-start || start < 0 || count < 0
2774        || index < start || index > start+count) {
2775        throw new IndexOutOfBoundsException  ();
2776    }
2777    return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
2778    }
2779
2780    static int offsetByCodePointsImpl(char[]a, int start, int count,
2781                      int index, int codePointOffset) {
2782    int x = index;
2783    if (codePointOffset >= 0) {
2784        int limit = start + count;
2785        int i;
2786        for (i = 0; x < limit && i < codePointOffset; i++) {
2787        if (isHighSurrogate(a[x++])) {
2788            if (x < limit && isLowSurrogate(a[x])) {
2789            x++;
2790            }
2791        }
2792        }
2793        if (i < codePointOffset) {
2794        throw new IndexOutOfBoundsException  ();
2795        }
2796    } else {
2797        int i;
2798        for (i = codePointOffset; x > start && i < 0; i++) {
2799        if (isLowSurrogate(a[--x])) {
2800            if (x > start && isHighSurrogate(a[x-1])) {
2801            x--;
2802            }
2803        }
2804        } 
2805        if (i < 0) {
2806        throw new IndexOutOfBoundsException  ();
2807        }
2808    }
2809    return x;
2810    }
2811
2812   /**
2813     * Determines if the specified character is a lowercase character.
2814     * <p>
2815     * A character is lowercase if its general category type, provided
2816     * by <code>Character.getType(ch)</code>, is
2817     * <code>LOWERCASE_LETTER</code>.
2818     * <p>
2819     * The following are examples of lowercase characters:
2820     * <p><blockquote><pre>
2821     * a b c d e f g h i j k l m n o p q r s t u v w x y z
2822     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6' 
2823     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
2824     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
2825     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
2826     * </pre></blockquote>
2827     * <p> Many other Unicode characters are lowercase too.
2828     *
2829     * <p><b>Note:</b> This method cannot handle <a
2830     * HREF="#supplementary"> supplementary characters</a>. To support
2831     * all Unicode characters, including supplementary characters, use
2832     * the {@link #isLowerCase(int)} method.
2833     *
2834     * @param   ch   the character to be tested.
2835     * @return  <code>true</code> if the character is lowercase;
2836     *          <code>false</code> otherwise.
2837     * @see     java.lang.Character#isLowerCase(char)
2838     * @see     java.lang.Character#isTitleCase(char)
2839     * @see     java.lang.Character#toLowerCase(char)
2840     * @see     java.lang.Character#getType(char)
2841     */
2842    public static boolean isLowerCase(char ch) {
2843        return isLowerCase((int)ch);
2844    }
2845
2846    /**
2847     * Determines if the specified character (Unicode code point) is a
2848     * lowercase character.
2849     * <p>
2850     * A character is lowercase if its general category type, provided
2851     * by {@link Character#getType getType(codePoint)}, is
2852     * <code>LOWERCASE_LETTER</code>.
2853     * <p>
2854     * The following are examples of lowercase characters:
2855     * <p><blockquote><pre>
2856     * a b c d e f g h i j k l m n o p q r s t u v w x y z
2857     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6' 
2858     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
2859     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
2860     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
2861     * </pre></blockquote>
2862     * <p> Many other Unicode characters are lowercase too.
2863     *
2864     * @param   codePoint the character (Unicode code point) to be tested.
2865     * @return  <code>true</code> if the character is lowercase;
2866     *          <code>false</code> otherwise.
2867     * @see     java.lang.Character#isLowerCase(int)
2868     * @see     java.lang.Character#isTitleCase(int)
2869     * @see     java.lang.Character#toLowerCase(int)
2870     * @see     java.lang.Character#getType(int)
2871     * @since   1.5
2872     */
2873    public static boolean isLowerCase(int codePoint) {
2874        boolean bLowerCase = false;
2875
2876        // codePoint must be in the valid range of codepoints
2877        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
2878            bLowerCase = CharacterDataLatin1.isLowerCase(codePoint);
2879        } else {
2880            int plane = getPlane(codePoint);
2881            switch(plane) {
2882            case(0):
2883                bLowerCase = CharacterData00.isLowerCase(codePoint);
2884                break;
2885            case(1):
2886                bLowerCase = CharacterData01.isLowerCase(codePoint);
2887                break;
2888            case(2):
2889                bLowerCase = CharacterData02.isLowerCase(codePoint);
2890                break;
2891            case(3): // Undefined
2892            case(4): // Undefined
2893            case(5): // Undefined
2894            case(6): // Undefined
2895            case(7): // Undefined
2896            case(8): // Undefined
2897            case(9): // Undefined
2898            case(10): // Undefined
2899            case(11): // Undefined
2900            case(12): // Undefined
2901            case(13): // Undefined
2902                bLowerCase = CharacterDataUndefined.isLowerCase(codePoint);
2903                break;
2904            case(14): 
2905                bLowerCase = CharacterData0E.isLowerCase(codePoint);
2906                break;
2907            case(15): // Private Use
2908            case(16): // Private Use
2909                bLowerCase = CharacterDataPrivateUse.isLowerCase(codePoint);
2910                break;
2911            default:
2912                // the argument's plane is invalid, and thus is an invalid codepoint
2913                // bLowerCase remains false
2914                break;
2915            }
2916        }
2917        return bLowerCase;
2918    }
2919
2920   /**
2921     * Determines if the specified character is an uppercase character.
2922     * <p>
2923     * A character is uppercase if its general category type, provided by
2924     * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
2925     * <p>
2926     * The following are examples of uppercase characters:
2927     * <p><blockquote><pre>
2928     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
2929     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
2930     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
2931     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
2932     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
2933     * </pre></blockquote>
2934     * <p> Many other Unicode characters are uppercase too.<p>
2935     *
2936     * <p><b>Note:</b> This method cannot handle <a
2937     * HREF="#supplementary"> supplementary characters</a>. To support
2938     * all Unicode characters, including supplementary characters, use
2939     * the {@link #isUpperCase(int)} method.
2940     *
2941     * @param   ch   the character to be tested.
2942     * @return  <code>true</code> if the character is uppercase;
2943     *          <code>false</code> otherwise.
2944     * @see     java.lang.Character#isLowerCase(char)
2945     * @see     java.lang.Character#isTitleCase(char)
2946     * @see     java.lang.Character#toUpperCase(char)
2947     * @see     java.lang.Character#getType(char)
2948     * @since   1.0
2949     */
2950    public static boolean isUpperCase(char ch) {
2951        return isUpperCase((int)ch);
2952    }
2953
2954    /**
2955     * Determines if the specified character (Unicode code point) is an uppercase character.
2956     * <p>
2957     * A character is uppercase if its general category type, provided by
2958     * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
2959     * <p>
2960     * The following are examples of uppercase characters:
2961     * <p><blockquote><pre>
2962     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
2963     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
2964     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
2965     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
2966     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
2967     * </pre></blockquote>
2968     * <p> Many other Unicode characters are uppercase too.<p>
2969     *
2970     * @param   codePoint the character (Unicode code point) to be tested.
2971     * @return  <code>true</code> if the character is uppercase;
2972     *          <code>false</code> otherwise.
2973     * @see     java.lang.Character#isLowerCase(int)
2974     * @see     java.lang.Character#isTitleCase(int)
2975     * @see     java.lang.Character#toUpperCase(int)
2976     * @see     java.lang.Character#getType(int)
2977     * @since   1.5
2978     */
2979    public static boolean isUpperCase(int codePoint) {
2980        boolean bUpperCase = false;
2981
2982        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
2983            bUpperCase = CharacterDataLatin1.isUpperCase(codePoint);
2984        } else {
2985            int plane = getPlane(codePoint);
2986            switch(plane) {
2987            case(0):
2988                bUpperCase = CharacterData00.isUpperCase(codePoint);
2989                break;
2990            case(1):
2991                bUpperCase = CharacterData01.isUpperCase(codePoint);
2992                break;
2993            case(2):
2994                bUpperCase = CharacterData02.isUpperCase(codePoint);
2995                break;
2996            case(3): // Undefined
2997            case(4): // Undefined
2998            case(5): // Undefined
2999            case(6): // Undefined
3000            case(7): // Undefined
3001            case(8): // Undefined
3002            case(9): // Undefined
3003            case(10): // Undefined
3004            case(11): // Undefined
3005            case(12): // Undefined
3006            case(13): // Undefined
3007                bUpperCase = CharacterDataUndefined.isUpperCase(codePoint);
3008                break;
3009            case(14):
3010                bUpperCase = CharacterData0E.isUpperCase(codePoint);
3011                break;
3012            case(15): // Private Use
3013            case(16): // Private Use
3014                bUpperCase = CharacterDataPrivateUse.isUpperCase(codePoint);
3015                break;
3016            default:
3017                // the argument's plane is invalid, and thus is an invalid codepoint
3018                // bUpperCase remains false;
3019                break;
3020            }
3021        }
3022        return bUpperCase;
3023    }
3024
3025    /**
3026     * Determines if the specified character is a titlecase character.
3027     * <p> 
3028     * A character is a titlecase character if its general
3029     * category type, provided by <code>Character.getType(ch)</code>,
3030     * is <code>TITLECASE_LETTER</code>.
3031     * <p>
3032     * Some characters look like pairs of Latin letters. For example, there
3033     * is an uppercase letter that looks like "LJ" and has a corresponding
3034     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
3035     * is the appropriate form to use when rendering a word in lowercase
3036     * with initial capitals, as for a book title.
3037     * <p>
3038     * These are some of the Unicode characters for which this method returns
3039     * <code>true</code>:
3040     * <ul>
3041     * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
3042     * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
3043     * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
3044     * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
3045     * </ul>
3046     * <p> Many other Unicode characters are titlecase too.<p>
3047     *
3048     * <p><b>Note:</b> This method cannot handle <a
3049     * HREF="#supplementary"> supplementary characters</a>. To support
3050     * all Unicode characters, including supplementary characters, use
3051     * the {@link #isTitleCase(int)} method.
3052     *
3053     * @param   ch   the character to be tested.
3054     * @return  <code>true</code> if the character is titlecase;
3055     *          <code>false</code> otherwise.
3056     * @see     java.lang.Character#isLowerCase(char)
3057     * @see     java.lang.Character#isUpperCase(char)
3058     * @see     java.lang.Character#toTitleCase(char)
3059     * @see     java.lang.Character#getType(char)
3060     * @since   1.0.2
3061     */
3062    public static boolean isTitleCase(char ch) {
3063        return isTitleCase((int)ch);
3064    }
3065
3066    /**
3067     * Determines if the specified character (Unicode code point) is a titlecase character.
3068     * <p> 
3069     * A character is a titlecase character if its general
3070     * category type, provided by {@link Character#getType(int) getType(codePoint)},
3071     * is <code>TITLECASE_LETTER</code>.
3072     * <p>
3073     * Some characters look like pairs of Latin letters. For example, there
3074     * is an uppercase letter that looks like "LJ" and has a corresponding
3075     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
3076     * is the appropriate form to use when rendering a word in lowercase
3077     * with initial capitals, as for a book title.
3078     * <p>
3079     * These are some of the Unicode characters for which this method returns
3080     * <code>true</code>:
3081     * <ul>
3082     * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
3083     * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
3084     * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
3085     * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
3086     * </ul>
3087     * <p> Many other Unicode characters are titlecase too.<p>
3088     *
3089     * @param   codePoint the character (Unicode code point) to be tested.
3090     * @return  <code>true</code> if the character is titlecase;
3091     *          <code>false</code> otherwise.
3092     * @see     java.lang.Character#isLowerCase(int)
3093     * @see     java.lang.Character#isUpperCase(int)
3094     * @see     java.lang.Character#toTitleCase(int)
3095     * @see     java.lang.Character#getType(int)
3096     * @since   1.5
3097     */
3098    public static boolean isTitleCase(int codePoint) {
3099        boolean bTitleCase = false;
3100
3101        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
3102            bTitleCase = CharacterDataLatin1.isTitleCase(codePoint);
3103        } else {
3104            int plane = getPlane(codePoint);
3105            switch(plane) {
3106            case(0):
3107                bTitleCase = CharacterData00.isTitleCase(codePoint);
3108                break;
3109            case(1):
3110                bTitleCase = CharacterData01.isTitleCase(codePoint);
3111                break;
3112            case(2):
3113                bTitleCase = CharacterData02.isTitleCase(codePoint);
3114                break;
3115            case(3): // Undefined
3116            case(4): // Undefined
3117            case(5): // Undefined
3118            case(6): // Undefined
3119            case(7): // Undefined
3120            case(8): // Undefined
3121            case(9): // Undefined
3122            case(10): // Undefined
3123            case(11): // Undefined
3124            case(12): // Undefined
3125            case(13): // Undefined
3126                bTitleCase = CharacterDataUndefined.isTitleCase(codePoint);
3127                break;
3128            case(14): 
3129                bTitleCase = CharacterData0E.isTitleCase(codePoint);
3130                break;
3131            case(15): // Private Use
3132            case(16): // Private Use
3133                bTitleCase = CharacterDataPrivateUse.isTitleCase(codePoint);
3134                break;
3135            default:
3136                // the argument's plane is invalid, and thus is an invalid codepoint
3137                // bTitleCase remains false;
3138                break;
3139            }
3140        }
3141        return bTitleCase;
3142    }
3143
3144    /**
3145     * Determines if the specified character is a digit.
3146     * <p>
3147     * A character is a digit if its general category type, provided
3148     * by <code>Character.getType(ch)</code>, is
3149     * <code>DECIMAL_DIGIT_NUMBER</code>.
3150     * <p>
3151     * Some Unicode character ranges that contain digits:
3152     * <ul>
3153     * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>, 
3154     *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3155     * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
3156     *     Arabic-Indic digits
3157     * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
3158     *     Extended Arabic-Indic digits
3159     * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
3160     *     Devanagari digits
3161     * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
3162     *     Fullwidth digits
3163     * </ul>
3164     *
3165     * Many other character ranges contain digits as well.
3166     *
3167     * <p><b>Note:</b> This method cannot handle <a
3168     * HREF="#supplementary"> supplementary characters</a>. To support
3169     * all Unicode characters, including supplementary characters, use
3170     * the {@link #isDigit(int)} method.
3171     *
3172     * @param   ch   the character to be tested.
3173     * @return  <code>true</code> if the character is a digit;
3174     *          <code>false</code> otherwise.
3175     * @see     java.lang.Character#digit(char, int)
3176     * @see     java.lang.Character#forDigit(int, int)
3177     * @see     java.lang.Character#getType(char)
3178     */
3179    public static boolean isDigit(char ch) {
3180        return isDigit((int)ch);
3181    }
3182
3183    /**
3184     * Determines if the specified character (Unicode code point) is a digit.
3185     * <p>
3186     * A character is a digit if its general category type, provided
3187     * by {@link Character#getType(int) getType(codePoint)}, is
3188     * <code>DECIMAL_DIGIT_NUMBER</code>.
3189     * <p>
3190     * Some Unicode character ranges that contain digits:
3191     * <ul>
3192     * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>, 
3193     *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3194     * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
3195     *     Arabic-Indic digits
3196     * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
3197     *     Extended Arabic-Indic digits
3198     * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
3199     *     Devanagari digits
3200     * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
3201     *     Fullwidth digits
3202     * </ul>
3203     *
3204     * Many other character ranges contain digits as well.
3205     *
3206     * @param   codePoint the character (Unicode code point) to be tested.
3207     * @return  <code>true</code> if the character is a digit;
3208     *          <code>false</code> otherwise.
3209     * @see     java.lang.Character#forDigit(int, int)
3210     * @see     java.lang.Character#getType(int)
3211     * @since   1.5
3212     */
3213    public static boolean isDigit(int codePoint) {
3214        boolean bDigit = false;
3215
3216        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
3217            bDigit = CharacterDataLatin1.isDigit(codePoint);
3218        } else {
3219            int plane = getPlane(codePoint);
3220            switch(plane) {
3221            case(0):
3222                bDigit = CharacterData00.isDigit(codePoint);
3223                break;
3224            case(1):
3225                bDigit = CharacterData01.isDigit(codePoint);
3226                break;
3227            case(2):
3228                bDigit = CharacterData02.isDigit(codePoint);
3229                break;
3230            case(3): // Undefined
3231            case(4): // Undefined
3232            case(5): // Undefined
3233            case(6): // Undefined
3234            case(7): // Undefined
3235            case(8): // Undefined
3236            case(9): // Undefined
3237            case(10): // Undefined
3238            case(11): // Undefined
3239            case(12): // Undefined
3240            case(13): // Undefined
3241                bDigit = CharacterDataUndefined.isDigit(codePoint);
3242                break;
3243            case(14):
3244                bDigit = CharacterData0E.isDigit(codePoint);
3245                break;
3246            case(15): // Private Use
3247            case(16): // Private Use
3248                bDigit = CharacterDataPrivateUse.isDigit(codePoint);
3249                break;
3250            default:
3251                // the argument's plane is invalid, and thus is an invalid codepoint
3252                // bDigit remains false;
3253                break;                          
3254            }
3255        }
3256        return bDigit;
3257    }
3258
3259    /**
3260     * Determines if a character is defined in Unicode.
3261     * <p>
3262     * A character is defined if at least one of the following is true:
3263     * <ul>
3264     * <li>It has an entry in the UnicodeData file.
3265     * <li>It has a value in a range defined by the UnicodeData file.
3266     * </ul>
3267     *
3268     * <p><b>Note:</b> This method cannot handle <a
3269     * HREF="#supplementary"> supplementary characters</a>. To support
3270     * all Unicode characters, including supplementary characters, use
3271     * the {@link #isDefined(int)} method.
3272     *
3273     * @param   ch   the character to be tested
3274     * @return  <code>true</code> if the character has a defined meaning
3275     *          in Unicode; <code>false</code> otherwise.
3276     * @see     java.lang.Character#isDigit(char)
3277     * @see     java.lang.Character#isLetter(char)
3278     * @see     java.lang.Character#isLetterOrDigit(char)
3279     * @see     java.lang.Character#isLowerCase(char)
3280     * @see     java.lang.Character#isTitleCase(char)
3281     * @see     java.lang.Character#isUpperCase(char)
3282     * @since   1.0.2
3283     */
3284    public static boolean isDefined(char ch) {
3285        return isDefined((int)ch);
3286    }
3287
3288    /**
3289     * Determines if a character (Unicode code point) is defined in Unicode.
3290     * <p>
3291     * A character is defined if at least one of the following is true:
3292     * <ul>
3293     * <li>It has an entry in the UnicodeData file.
3294     * <li>It has a value in a range defined by the UnicodeData file.
3295     * </ul>
3296     *
3297     * @param   codePoint the character (Unicode code point) to be tested.
3298     * @return  <code>true</code> if the character has a defined meaning
3299     *          in Unicode; <code>false</code> otherwise.
3300     * @see     java.lang.Character#isDigit(int)
3301     * @see     java.lang.Character#isLetter(int)
3302     * @see     java.lang.Character#isLetterOrDigit(int)
3303     * @see     java.lang.Character#isLowerCase(int)
3304     * @see     java.lang.Character#isTitleCase(int)
3305     * @see     java.lang.Character#isUpperCase(int)
3306     * @since   1.5
3307     */
3308    public static boolean isDefined(int codePoint) {
3309        boolean bDefined = false;
3310
3311        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
3312            bDefined = CharacterDataLatin1.isDefined(codePoint);
3313        } else {
3314            int plane = getPlane(codePoint);
3315            switch(plane) {
3316            case(0):
3317                bDefined = CharacterData00.isDefined(codePoint);
3318                break;
3319            case(1):
3320                bDefined = CharacterData01.isDefined(codePoint);
3321                break;
3322            case(2):
3323                bDefined = CharacterData02.isDefined(codePoint);
3324                break;
3325            case(3): // Undefined
3326            case(4): // Undefined
3327            case(5): // Undefined
3328            case(6): // Undefined
3329            case(7): // Undefined
3330            case(8): // Undefined
3331            case(9): // Undefined
3332            case(10): // Undefined
3333            case(11): // Undefined
3334            case(12): // Undefined
3335            case(13): // Undefined
3336                bDefined = CharacterDataUndefined.isDefined(codePoint);
3337                break;
3338            case(14): 
3339                bDefined = CharacterData0E.isDefined(codePoint);
3340                break;
3341            case(15): // Private Use
3342            case(16): // Private Use
3343                bDefined = CharacterDataPrivateUse.isDefined(codePoint);
3344                break;
3345            default:
3346                // the argument's plane is invalid, and thus is an invalid codepoint
3347                // bDefined remains false;
3348                break;
3349            }
3350        }
3351        return bDefined;
3352    }
3353
3354    /**
3355     * Determines if the specified character is a letter.
3356     * <p>
3357     * A character is considered to be a letter if its general
3358     * category type, provided by <code>Character.getType(ch)</code>,
3359     * is any of the following:
3360     * <ul>
3361     * <li> <code>UPPERCASE_LETTER</code>
3362     * <li> <code>LOWERCASE_LETTER</code>
3363     * <li> <code>TITLECASE_LETTER</code>
3364     * <li> <code>MODIFIER_LETTER</code>
3365     * <li> <code>OTHER_LETTER</code>
3366     * </ul>
3367     *
3368     * Not all letters have case. Many characters are
3369     * letters but are neither uppercase nor lowercase nor titlecase.
3370     *
3371     * <p><b>Note:</b> This method cannot handle <a
3372     * HREF="#supplementary"> supplementary characters</a>. To support
3373     * all Unicode characters, including supplementary characters, use
3374     * the {@link #isLetter(int)} method.
3375     *
3376     * @param   ch   the character to be tested.
3377     * @return  <code>true</code> if the character is a letter;
3378     *          <code>false</code> otherwise.
3379     * @see     java.lang.Character#isDigit(char)
3380     * @see     java.lang.Character#isJavaIdentifierStart(char)
3381     * @see     java.lang.Character#isJavaLetter(char)
3382     * @see     java.lang.Character#isJavaLetterOrDigit(char)
3383     * @see     java.lang.Character#isLetterOrDigit(char)
3384     * @see     java.lang.Character#isLowerCase(char)
3385     * @see     java.lang.Character#isTitleCase(char)
3386     * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3387     * @see     java.lang.Character#isUpperCase(char)
3388     */
3389    public static boolean isLetter(char ch) {
3390        return isLetter((int)ch);
3391    }
3392
3393    /**
3394     * Determines if the specified character (Unicode code point) is a letter.
3395     * <p>
3396     * A character is considered to be a letter if its general
3397     * category type, provided by {@link Character#getType(int) getType(codePoint)},
3398     * is any of the following:
3399     * <ul>
3400     * <li> <code>UPPERCASE_LETTER</code>
3401     * <li> <code>LOWERCASE_LETTER</code>
3402     * <li> <code>TITLECASE_LETTER</code>
3403     * <li> <code>MODIFIER_LETTER</code>
3404     * <li> <code>OTHER_LETTER</code>
3405     * </ul>
3406     *
3407     * Not all letters have case. Many characters are
3408     * letters but are neither uppercase nor lowercase nor titlecase.
3409     *
3410     * @param   codePoint the character (Unicode code point) to be tested.
3411     * @return  <code>true</code> if the character is a letter;
3412     *          <code>false</code> otherwise.
3413     * @see     java.lang.Character#isDigit(int)
3414     * @see     java.lang.Character#isJavaIdentifierStart(int)
3415     * @see     java.lang.Character#isLetterOrDigit(int)
3416     * @see     java.lang.Character#isLowerCase(int)
3417     * @see     java.lang.Character#isTitleCase(int)
3418     * @see     java.lang.Character#isUnicodeIdentifierStart(int)
3419     * @see     java.lang.Character#isUpperCase(int)
3420     * @since   1.5
3421     */
3422    public static boolean isLetter(int codePoint) {
3423        boolean bLetter = false;
3424
3425        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
3426            bLetter = CharacterDataLatin1.isLetter(codePoint);
3427        } else {
3428            int plane = getPlane(codePoint);
3429            switch(plane) {
3430            case(0):
3431                bLetter = CharacterData00.isLetter(codePoint);
3432                break;
3433            case(1):
3434                bLetter = CharacterData01.isLetter(codePoint);
3435                break;
3436            case(2):
3437                bLetter = CharacterData02.isLetter(codePoint);
3438                break;
3439            case(3): // Undefined
3440            case(4): // Undefined
3441            case(5): // Undefined
3442            case(6): // Undefined
3443            case(7): // Undefined
3444            case(8): // Undefined
3445            case(9): // Undefined
3446            case(10): // Undefined
3447            case(11): // Undefined
3448            case(12): // Undefined
3449            case(13): // Undefined
3450                bLetter = CharacterDataUndefined.isLetter(codePoint);
3451                break;
3452            case(14):
3453                bLetter = CharacterData0E.isLetter(codePoint);
3454                break;
3455            case(15): // Private Use
3456            case(16): // Private Use
3457                bLetter = CharacterDataPrivateUse.isLetter(codePoint);
3458                break;
3459            default:
3460                // the argument's plane is invalid, and thus is an invalid codepoint
3461                // bLetter remains false;
3462                break;
3463            }
3464        }
3465        return bLetter;
3466    }
3467
3468    /**
3469     * Determines if the specified character is a letter or digit.
3470     * <p>
3471     * A character is considered to be a letter or digit if either
3472     * <code>Character.isLetter(char ch)</code> or
3473     * <code>Character.isDigit(char ch)</code> returns
3474     * <code>true</code> for the character.
3475     *
3476     * <p><b>Note:</b> This method cannot handle <a
3477     * HREF="#supplementary"> supplementary characters</a>. To support
3478     * all Unicode characters, including supplementary characters, use
3479     * the {@link #isLetterOrDigit(int)} method.
3480     *
3481     * @param   ch   the character to be tested.
3482     * @return  <code>true</code> if the character is a letter or digit;
3483     *          <code>false</code> otherwise.
3484     * @see     java.lang.Character#isDigit(char)
3485     * @see     java.lang.Character#isJavaIdentifierPart(char)
3486     * @see     java.lang.Character#isJavaLetter(char)
3487     * @see     java.lang.Character#isJavaLetterOrDigit(char)
3488     * @see     java.lang.Character#isLetter(char)
3489     * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3490     * @since   1.0.2
3491     */
3492    public static boolean isLetterOrDigit(char ch) {
3493        return isLetterOrDigit((int)ch);
3494    }
3495
3496    /**
3497     * Determines if the specified character (Unicode code point) is a letter or digit.
3498     * <p>
3499     * A character is considered to be a letter or digit if either
3500     * {@link #isLetter(int) isLetter(codePoint)} or
3501     * {@link #isDigit(int) isDigit(codePoint)} returns
3502     * <code>true</code> for the character.
3503     *
3504     * @param   codePoint the character (Unicode code point) to be tested.
3505     * @return  <code>true</code> if the character is a letter or digit;
3506     *          <code>false</code> otherwise.
3507     * @see     java.lang.Character#isDigit(int)
3508     * @see     java.lang.Character#isJavaIdentifierPart(int)
3509     * @see     java.lang.Character#isLetter(int)
3510     * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3511     * @since   1.5
3512     */
3513    public static boolean isLetterOrDigit(int codePoint) {
3514        boolean bLetterOrDigit = false;
3515
3516        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
3517            bLetterOrDigit = CharacterDataLatin1.isLetterOrDigit(codePoint);
3518        } else {
3519            int plane = getPlane(codePoint);
3520            switch(plane) {
3521            case(0):
3522                bLetterOrDigit = CharacterData00.isLetterOrDigit(codePoint);
3523                break;
3524            case(1):
3525                bLetterOrDigit = CharacterData01.isLetterOrDigit(codePoint);
3526                break;
3527            case(2):
3528                bLetterOrDigit = CharacterData02.isLetterOrDigit(codePoint);
3529                break;
3530            case(3): // Undefined
3531            case(4): // Undefined
3532            case(5): // Undefined
3533            case(6): // Undefined
3534            case(7): // Undefined
3535            case(8): // Undefined
3536            case(9): // Undefined
3537            case(10): // Undefined
3538            case(11): // Undefined
3539            case(12): // Undefined
3540            case(13): // Undefined
3541                bLetterOrDigit = CharacterDataUndefined.isLetterOrDigit(codePoint);
3542                break;
3543            case(14): // Undefined
3544                bLetterOrDigit = CharacterData0E.isLetterOrDigit(codePoint);
3545                break;
3546            case(15): // Private Use
3547            case(16): // Private Use
3548                bLetterOrDigit = CharacterDataPrivateUse.isLetterOrDigit(codePoint);
3549                break;
3550            default:
3551                // the argument's plane is invalid, and thus is an invalid codepoint
3552                // bLetterOrDigit remains false;
3553                break;
3554            }
3555        }
3556        return bLetterOrDigit;
3557    }
3558
3559    /**
3560     * Determines if the specified character is permissible as the first
3561     * character in a Java identifier.
3562     * <p>
3563     * A character may start a Java identifier if and only if
3564     * one of the following is true:
3565     * <ul>
3566     * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3567     * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3568     * <li> ch is a currency symbol (such as "$")
3569     * <li> ch is a connecting punctuation character (such as "_").
3570     * </ul>
3571     *
3572     * @param   ch the character to be tested.
3573     * @return  <code>true</code> if the character may start a Java
3574     *          identifier; <code>false</code> otherwise.
3575     * @see     java.lang.Character#isJavaLetterOrDigit(char)
3576     * @see     java.lang.Character#isJavaIdentifierStart(char)
3577     * @see     java.lang.Character#isJavaIdentifierPart(char)
3578     * @see     java.lang.Character#isLetter(char)
3579     * @see     java.lang.Character#isLetterOrDigit(char)
3580     * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3581     * @since   1.02
3582     * @deprecated Replaced by isJavaIdentifierStart(char).
3583     */
3584    @Deprecated  
3585    public static boolean isJavaLetter(char ch) {
3586        return isJavaIdentifierStart(ch);
3587    }
3588
3589    /**
3590     * Determines if the specified character may be part of a Java
3591     * identifier as other than the first character.
3592     * <p>
3593     * A character may be part of a Java identifier if and only if any
3594     * of the following are true:
3595     * <ul>
3596     * <li>  it is a letter
3597     * <li>  it is a currency symbol (such as <code>'$'</code>)
3598     * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3599     * <li>  it is a digit
3600     * <li>  it is a numeric letter (such as a Roman numeral character)
3601     * <li>  it is a combining mark
3602     * <li>  it is a non-spacing mark
3603     * <li> <code>isIdentifierIgnorable</code> returns
3604     * <code>true</code> for the character.
3605     * </ul>
3606     *
3607     * @param   ch the character to be tested.
3608     * @return  <code>true</code> if the character may be part of a
3609     *          Java identifier; <code>false</code> otherwise.
3610     * @see     java.lang.Character#isJavaLetter(char)
3611     * @see     java.lang.Character#isJavaIdentifierStart(char)
3612     * @see     java.lang.Character#isJavaIdentifierPart(char)
3613     * @see     java.lang.Character#isLetter(char)
3614     * @see     java.lang.Character#isLetterOrDigit(char)
3615     * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3616     * @see     java.lang.Character#isIdentifierIgnorable(char)
3617     * @since   1.02
3618     * @deprecated Replaced by isJavaIdentifierPart(char).
3619     */
3620    @Deprecated  
3621    public static boolean isJavaLetterOrDigit(char ch) {
3622        return isJavaIdentifierPart(ch);
3623    }
3624
3625    /**
3626     * Determines if the specified character is
3627     * permissible as the first character in a Java identifier.
3628     * <p>
3629     * A character may start a Java identifier if and only if
3630     * one of the following conditions is true:
3631     * <ul>
3632     * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3633     * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3634     * <li> ch is a currency symbol (such as "$")
3635     * <li> ch is a connecting punctuation character (such as "_").
3636     * </ul>
3637     *
3638     * <p><b>Note:</b> This method cannot handle <a
3639     * HREF="#supplementary"> supplementary characters</a>. To support
3640     * all Unicode characters, including supplementary characters, use
3641     * the {@link #isJavaIdentifierStart(int)} method.
3642     *
3643     * @param   ch the character to be tested.
3644     * @return  <code>true</code> if the character may start a Java identifier;
3645     *          <code>false</code> otherwise.
3646     * @see     java.lang.Character#isJavaIdentifierPart(char)
3647     * @see     java.lang.Character#isLetter(char)
3648     * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3649     * @since   1.1
3650     */
3651    public static boolean isJavaIdentifierStart(char ch) {
3652        return isJavaIdentifierStart((int)ch);
3653    }
3654
3655    /**
3656     * Determines if the character (Unicode code point) is
3657     * permissible as the first character in a Java identifier.
3658     * <p>
3659     * A character may start a Java identifier if and only if
3660     * one of the following conditions is true:
3661     * <ul>
3662     * <li> {@link #isLetter(int) isLetter(codePoint)}
3663     *      returns <code>true</code>
3664     * <li> {@link #getType(int) getType(codePoint)}
3665     *      returns <code>LETTER_NUMBER</code>
3666     * <li> the referenced character is a currency symbol (such as "$")
3667     * <li> the referenced character is a connecting punctuation character
3668     *      (such as "_").
3669     * </ul>
3670     *
3671     * @param   codePoint the character (Unicode code point) to be tested.
3672     * @return  <code>true</code> if the character may start a Java identifier;
3673     *          <code>false</code> otherwise.
3674     * @see     java.lang.Character#isJavaIdentifierPart(int)
3675     * @see     java.lang.Character#isLetter(int)
3676     * @see     java.lang.Character#isUnicodeIdentifierStart(int)
3677     * @since   1.5
3678     */
3679    public static boolean isJavaIdentifierStart(int codePoint) {
3680        boolean bJavaStart = false;
3681
3682        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
3683            bJavaStart = CharacterDataLatin1.isJavaIdentifierStart(codePoint);
3684        } else {
3685            int plane = getPlane(codePoint);
3686            switch(plane) {
3687            case(0):
3688                bJavaStart = CharacterData00.isJavaIdentifierStart(codePoint);
3689                break;
3690            case(1):
3691                bJavaStart = CharacterData01.isJavaIdentifierStart(codePoint);
3692                break;
3693            case(2):
3694                bJavaStart = CharacterData02.isJavaIdentifierStart(codePoint);
3695                break;
3696            case(3): // Undefined
3697            case(4): // Undefined
3698            case(5): // Undefined
3699            case(6): // Undefined
3700            case(7): // Undefined
3701            case(8): // Undefined
3702            case(9): // Undefined
3703            case(10): // Undefined
3704            case(11): // Undefined
3705            case(12): // Undefined
3706            case(13): // Undefined
3707                bJavaStart = CharacterDataUndefined.isJavaIdentifierStart(codePoint);
3708                break;
3709            case(14): 
3710                bJavaStart = CharacterData0E.isJavaIdentifierStart(codePoint);
3711                break;
3712            case(15): // Private Use
3713            case(16): // Private Use
3714                bJavaStart = CharacterDataPrivateUse.isJavaIdentifierStart(codePoint);
3715                break;
3716            default:
3717                // the argument's plane is invalid, and thus is an invalid codepoint
3718                // bJavaStart remains false;
3719                break;
3720            }   
3721        }
3722        return bJavaStart;
3723    }
3724
3725    /**
3726     * Determines if the specified character may be part of a Java
3727     * identifier as other than the first character.
3728     * <p>
3729     * A character may be part of a Java identifier if any of the following
3730     * are true:
3731     * <ul>
3732     * <li>  it is a letter
3733     * <li>  it is a currency symbol (such as <code>'$'</code>)
3734     * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3735     * <li>  it is a digit
3736     * <li>  it is a numeric letter (such as a Roman numeral character)
3737     * <li>  it is a combining mark
3738     * <li>  it is a non-spacing mark
3739     * <li> <code>isIdentifierIgnorable</code> returns
3740     * <code>true</code> for the character
3741     * </ul>
3742     *
3743     * <p><b>Note:</b> This method cannot handle <a
3744     * HREF="#supplementary"> supplementary characters</a>. To support
3745     * all Unicode characters, including supplementary characters, use
3746     * the {@link #isJavaIdentifierPart(int)} method.
3747     *
3748     * @param   ch      the character to be tested.
3749     * @return <code>true</code> if the character may be part of a
3750     *          Java identifier; <code>false</code> otherwise.
3751     * @see     java.lang.Character#isIdentifierIgnorable(char)
3752     * @see     java.lang.Character#isJavaIdentifierStart(char)
3753     * @see     java.lang.Character#isLetterOrDigit(char)
3754     * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3755     * @since   1.1
3756     */
3757    public static boolean isJavaIdentifierPart(char ch) {
3758        return isJavaIdentifierPart((int)ch);
3759    }
3760
3761    /**
3762     * Determines if the character (Unicode code point) may be part of a Java
3763     * identifier as other than the first character.
3764     * <p>
3765     * A character may be part of a Java identifier if any of the following
3766     * are true:
3767     * <ul>
3768     * <li>  it is a letter
3769     * <li>  it is a currency symbol (such as <code>'$'</code>)
3770     * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3771     * <li>  it is a digit
3772     * <li>  it is a numeric letter (such as a Roman numeral character)
3773     * <li>  it is a combining mark
3774     * <li>  it is a non-spacing mark
3775     * <li> {@link #isIdentifierIgnorable(int)
3776     * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
3777     * the character
3778     * </ul>
3779     *
3780     * @param   codePoint the character (Unicode code point) to be tested.
3781     * @return <code>true</code> if the character may be part of a
3782     *          Java identifier; <code>false</code> otherwise.
3783     * @see     java.lang.Character#isIdentifierIgnorable(int)
3784     * @see     java.lang.Character#isJavaIdentifierStart(int)
3785     * @see     java.lang.Character#isLetterOrDigit(int)
3786     * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3787     * @since   1.5
3788     */
3789    public static boolean isJavaIdentifierPart(int codePoint) {
3790        boolean bJavaPart = false;
3791
3792        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
3793            bJavaPart = CharacterDataLatin1.isJavaIdentifierPart(codePoint);
3794        } else {
3795            int plane = getPlane(codePoint);
3796            switch(plane) {
3797            case(0):
3798                bJavaPart = CharacterData00.isJavaIdentifierPart(codePoint);
3799                break;
3800            case(1):
3801                bJavaPart = CharacterData01.isJavaIdentifierPart(codePoint);
3802                break;
3803            case(2):
3804                bJavaPart = CharacterData02.isJavaIdentifierPart(codePoint);
3805                break;
3806            case(3): // Undefined
3807            case(4): // Undefined
3808            case(5): // Undefined
3809            case(6): // Undefined
3810            case(7): // Undefined
3811            case(8): // Undefined
3812            case(9): // Undefined
3813            case(10): // Undefined
3814            case(11): // Undefined
3815            case(12): // Undefined
3816            case(13): // Undefined
3817                bJavaPart = CharacterDataUndefined.isJavaIdentifierPart(codePoint);
3818                break;
3819            case(14): 
3820                bJavaPart = CharacterData0E.isJavaIdentifierPart(codePoint);
3821                break;
3822            case(15): // Private Use
3823            case(16): // Private Use
3824                bJavaPart = CharacterDataPrivateUse.isJavaIdentifierPart(codePoint);
3825                break;
3826            default:
3827                // the argument's plane is invalid, and thus is an invalid codepoint
3828                // bJavaPart remains false;
3829                break;
3830            }
3831        }
3832        return bJavaPart;
3833    }
3834
3835    /**
3836     * Determines if the specified character is permissible as the
3837     * first character in a Unicode identifier.
3838     * <p>
3839     * A character may start a Unicode identifier if and only if
3840     * one of the following conditions is true:
3841     * <ul>
3842     * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3843     * <li> {@link #getType(char) getType(ch)} returns 
3844     *      <code>LETTER_NUMBER</code>.
3845     * </ul>
3846     *
3847     * <p><b>Note:</b> This method cannot handle <a
3848     * HREF="#supplementary"> supplementary characters</a>. To support
3849     * all Unicode characters, including supplementary characters, use
3850     * the {@link #isUnicodeIdentifierStart(int)} method.
3851     *
3852     * @param   ch      the character to be tested.
3853     * @return  <code>true</code> if the character may start a Unicode 
3854     *          identifier; <code>false</code> otherwise.
3855     * @see     java.lang.Character#isJavaIdentifierStart(char)
3856     * @see     java.lang.Character#isLetter(char)
3857     * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3858     * @since   1.1
3859     */
3860    public static boolean isUnicodeIdentifierStart(char ch) {
3861        return isUnicodeIdentifierStart((int)ch);
3862    }
3863
3864    /**
3865     * Determines if the specified character (Unicode code point) is permissible as the
3866     * first character in a Unicode identifier.
3867     * <p>
3868     * A character may start a Unicode identifier if and only if
3869     * one of the following conditions is true:
3870     * <ul>
3871     * <li> {@link #isLetter(int) isLetter(codePoint)}
3872     *      returns <code>true</code>
3873     * <li> {@link #getType(int) getType(codePoint)}
3874     *      returns <code>LETTER_NUMBER</code>.
3875     * </ul>
3876     * @param   codePoint the character (Unicode code point) to be tested.
3877     * @return  <code>true</code> if the character may start a Unicode 
3878     *          identifier; <code>false</code> otherwise.
3879     * @see     java.lang.Character#isJavaIdentifierStart(int)
3880     * @see     java.lang.Character#isLetter(int)
3881     * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3882     * @since   1.5
3883     */
3884    public static boolean isUnicodeIdentifierStart(int codePoint) {
3885        boolean bUnicodeStart = false;
3886
3887        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
3888            bUnicodeStart = CharacterDataLatin1.isUnicodeIdentifierStart(codePoint);
3889        } else {
3890            int plane = getPlane(codePoint);
3891            switch(plane) {
3892            case(0):
3893                bUnicodeStart = CharacterData00.isUnicodeIdentifierStart(codePoint);
3894                break;
3895            case(1):
3896                bUnicodeStart = CharacterData01.isUnicodeIdentifierStart(codePoint);
3897                break;
3898            case(2):
3899                bUnicodeStart = CharacterData02.isUnicodeIdentifierStart(codePoint);
3900                break;
3901            case(3): // Undefined
3902            case(4): // Undefined
3903            case(5): // Undefined
3904            case(6): // Undefined
3905            case(7): // Undefined
3906            case(8): // Undefined
3907            case(9): // Undefined
3908            case(10): // Undefined
3909            case(11): // Undefined
3910            case(12): // Undefined
3911            case(13): // Undefined
3912                bUnicodeStart = CharacterDataUndefined.isUnicodeIdentifierStart(codePoint);
3913                break;
3914            case(14): 
3915                bUnicodeStart = CharacterData0E.isUnicodeIdentifierStart(codePoint);
3916                break;
3917            case(15): // Private Use
3918            case(16): // Private Use
3919                bUnicodeStart = CharacterDataPrivateUse.isUnicodeIdentifierStart(codePoint);
3920                break;
3921            default:
3922                // the argument's plane is invalid, and thus is an invalid codepoint
3923                // bUnicodeStart remains false;
3924                break;
3925            }
3926        }
3927        return bUnicodeStart;
3928    }
3929
3930    /**
3931     * Determines if the specified character may be part of a Unicode
3932     * identifier as other than the first character.
3933     * <p>
3934     * A character may be part of a Unicode identifier if and only if
3935     * one of the following statements is true:
3936     * <ul>
3937     * <li>  it is a letter
3938     * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3939     * <li>  it is a digit
3940     * <li>  it is a numeric letter (such as a Roman numeral character)
3941     * <li>  it is a combining mark
3942     * <li>  it is a non-spacing mark
3943     * <li> <code>isIdentifierIgnorable</code> returns
3944     * <code>true</code> for this character.
3945     * </ul>
3946     * 
3947     * <p><b>Note:</b> This method cannot handle <a
3948     * HREF="#supplementary"> supplementary characters</a>. To support
3949     * all Unicode characters, including supplementary characters, use
3950     * the {@link #isUnicodeIdentifierPart(int)} method.
3951     *
3952     * @param   ch      the character to be tested.
3953     * @return  <code>true</code> if the character may be part of a 
3954     *          Unicode identifier; <code>false</code> otherwise.
3955     * @see     java.lang.Character#isIdentifierIgnorable(char)
3956     * @see     java.lang.Character#isJavaIdentifierPart(char)
3957     * @see     java.lang.Character#isLetterOrDigit(char)
3958     * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3959     * @since   1.1
3960     */
3961    public static boolean isUnicodeIdentifierPart(char ch) {
3962        return isUnicodeIdentifierPart((int)ch);
3963    }
3964
3965    /**
3966     * Determines if the specified character (Unicode code point) may be part of a Unicode
3967     * identifier as other than the first character.
3968     * <p>
3969     * A character may be part of a Unicode identifier if and only if
3970     * one of the following statements is true:
3971     * <ul>
3972     * <li>  it is a letter
3973     * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3974     * <li>  it is a digit
3975     * <li>  it is a numeric letter (such as a Roman numeral character)
3976     * <li>  it is a combining mark
3977     * <li>  it is a non-spacing mark
3978     * <li> <code>isIdentifierIgnorable</code> returns
3979     * <code>true</code> for this character.
3980     * </ul>
3981     * @param   codePoint the character (Unicode code point) to be tested.
3982     * @return  <code>true</code> if the character may be part of a 
3983     *          Unicode identifier; <code>false</code> otherwise.
3984     * @see     java.lang.Character#isIdentifierIgnorable(int)
3985     * @see     java.lang.Character#isJavaIdentifierPart(int)
3986     * @see     java.lang.Character#isLetterOrDigit(int)
3987     * @see     java.lang.Character#isUnicodeIdentifierStart(int)
3988     * @since   1.5
3989     */
3990    public static boolean isUnicodeIdentifierPart(int codePoint) {
3991        boolean bUnicodePart = false;
3992
3993        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
3994            bUnicodePart = CharacterDataLatin1.isUnicodeIdentifierPart(codePoint);
3995        } else {
3996            int plane = getPlane(codePoint);
3997            switch(plane) {
3998            case(0):
3999                bUnicodePart = CharacterData00.isUnicodeIdentifierPart(codePoint);
4000                break;
4001            case(1):
4002                bUnicodePart = CharacterData01.isUnicodeIdentifierPart(codePoint);
4003                break;
4004            case(2):
4005                bUnicodePart = CharacterData02.isUnicodeIdentifierPart(codePoint);
4006                break;
4007            case(3): // Undefined
4008            case(4): // Undefined
4009            case(5): // Undefined
4010            case(6): // Undefined
4011            case(7): // Undefined
4012            case(8): // Undefined
4013            case(9): // Undefined
4014            case(10): // Undefined
4015            case(11): // Undefined
4016            case(12): // Undefined
4017            case(13): // Undefined
4018                bUnicodePart = CharacterDataUndefined.isUnicodeIdentifierPart(codePoint);
4019                break;
4020            case(14): 
4021                bUnicodePart = CharacterData0E.isUnicodeIdentifierPart(codePoint);      
4022                break;
4023            case(15): // Private Use
4024            case(16): // Private Use
4025                bUnicodePart = CharacterDataPrivateUse.isUnicodeIdentifierPart(codePoint);
4026                break;
4027            default:
4028                // the argument's plane is invalid, and thus is an invalid codepoint
4029                //bUnicodePart remains false;
4030                break;
4031            }
4032        }
4033        return bUnicodePart;
4034    }
4035
4036    /**
4037     * Determines if the specified character should be regarded as
4038     * an ignorable character in a Java identifier or a Unicode identifier.
4039     * <p>
4040     * The following Unicode characters are ignorable in a Java identifier
4041     * or a Unicode identifier:
4042     * <ul>
4043     * <li>ISO control characters that are not whitespace
4044     * <ul>
4045     * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
4046     * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
4047     * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
4048     * </ul>
4049     *
4050     * <li>all characters that have the <code>FORMAT</code> general
4051     * category value
4052     * </ul>
4053     *
4054     * <p><b>Note:</b> This method cannot handle <a
4055     * HREF="#supplementary"> supplementary characters</a>. To support
4056     * all Unicode characters, including supplementary characters, use
4057     * the {@link #isIdentifierIgnorable(int)} method.
4058     *
4059     * @param   ch      the character to be tested.
4060     * @return  <code>true</code> if the character is an ignorable control 
4061     *          character that may be part of a Java or Unicode identifier;
4062     *           <code>false</code> otherwise.
4063     * @see     java.lang.Character#isJavaIdentifierPart(char)
4064     * @see     java.lang.Character#isUnicodeIdentifierPart(char)
4065     * @since   1.1
4066     */
4067    public static boolean isIdentifierIgnorable(char ch) {
4068        return isIdentifierIgnorable((int)ch);
4069    }
4070
4071    /**
4072     * Determines if the specified character (Unicode code point) should be regarded as
4073     * an ignorable character in a Java identifier or a Unicode identifier.
4074     * <p>
4075     * The following Unicode characters are ignorable in a Java identifier
4076     * or a Unicode identifier:
4077     * <ul>
4078     * <li>ISO control characters that are not whitespace
4079     * <ul>
4080     * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
4081     * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
4082     * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
4083     * </ul>
4084     *
4085     * <li>all characters that have the <code>FORMAT</code> general
4086     * category value
4087     * </ul>
4088     *
4089     * @param   codePoint the character (Unicode code point) to be tested.
4090     * @return  <code>true</code> if the character is an ignorable control 
4091     *          character that may be part of a Java or Unicode identifier;
4092     *          <code>false</code> otherwise.
4093     * @see     java.lang.Character#isJavaIdentifierPart(int)
4094     * @see     java.lang.Character#isUnicodeIdentifierPart(int)
4095     * @since   1.5
4096     */
4097    public static boolean isIdentifierIgnorable(int codePoint) {
4098        boolean bIdentifierIgnorable = false;
4099
4100        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
4101            bIdentifierIgnorable = CharacterDataLatin1.isIdentifierIgnorable(codePoint);
4102        } else {
4103            int plane = getPlane(codePoint);
4104            switch(plane) {
4105            case(0):
4106                bIdentifierIgnorable = CharacterData00.isIdentifierIgnorable(codePoint);
4107                break;
4108            case(1):
4109                bIdentifierIgnorable = CharacterData01.isIdentifierIgnorable(codePoint);
4110                break;
4111            case(2):
4112                bIdentifierIgnorable = CharacterData02.isIdentifierIgnorable(codePoint);
4113                break;
4114            case(3): // Undefined
4115            case(4): // Undefined
4116            case(5): // Undefined
4117            case(6): // Undefined
4118            case(7): // Undefined
4119            case(8): // Undefined
4120            case(9): // Undefined
4121            case(10): // Undefined
4122            case(11): // Undefined
4123            case(12): // Undefined
4124            case(13): // Undefined
4125                bIdentifierIgnorable = CharacterDataUndefined.isIdentifierIgnorable(codePoint);
4126                break;
4127            case(14): 
4128                bIdentifierIgnorable = CharacterData0E.isIdentifierIgnorable(codePoint);
4129                break;
4130            case(15): // Private Use
4131            case(16): // Private Use
4132                bIdentifierIgnorable = CharacterDataPrivateUse.isIdentifierIgnorable(codePoint);
4133                break;
4134            default:
4135                // the argument's plane is invalid, and thus is an invalid codepoint
4136                // bIdentifierIgnorable remains false;
4137                break;
4138            }
4139        }
4140        return bIdentifierIgnorable;
4141    }
4142
4143    /**
4144     * Converts the character argument to lowercase using case
4145     * mapping information from the UnicodeData file.
4146     * <p>
4147     * Note that
4148     * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
4149     * does not always return <code>true</code> for some ranges of
4150     * characters, particularly those that are symbols or ideographs.
4151     *
4152     * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
4153     * characters to lowercase. <code>String</code> case mapping methods
4154     * have several benefits over <code>Character</code> case mapping methods.
4155     * <code>String</code> case mapping methods can perform locale-sensitive
4156     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4157     * the <code>Character</code> case mapping methods cannot.
4158     *
4159     * <p><b>Note:</b> This method cannot handle <a
4160     * HREF="#supplementary"> supplementary characters</a>. To support
4161     * all Unicode characters, including supplementary characters, use
4162     * the {@link #toLowerCase(int)} method.
4163     *
4164     * @param   ch   the character to be converted.
4165     * @return  the lowercase equivalent of the character, if any;
4166     *          otherwise, the character itself.
4167     * @see     java.lang.Character#isLowerCase(char)
4168     * @see     java.lang.String#toLowerCase()
4169     */
4170    public static char toLowerCase(char ch) {
4171        return (char)toLowerCase((int)ch);
4172    }
4173
4174    /**
4175     * Converts the character (Unicode code point) argument to
4176     * lowercase using case mapping information from the UnicodeData
4177     * file.
4178     *
4179     * <p> Note that
4180     * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
4181     * does not always return <code>true</code> for some ranges of
4182     * characters, particularly those that are symbols or ideographs.
4183     *
4184     * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
4185     * characters to lowercase. <code>String</code> case mapping methods
4186     * have several benefits over <code>Character</code> case mapping methods.
4187     * <code>String</code> case mapping methods can perform locale-sensitive
4188     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4189     * the <code>Character</code> case mapping methods cannot.
4190     *
4191     * @param   codePoint   the character (Unicode code point) to be converted.
4192     * @return  the lowercase equivalent of the character (Unicode code
4193     *          point), if any; otherwise, the character itself.
4194     * @see     java.lang.Character#isLowerCase(int)
4195     * @see     java.lang.String#toLowerCase()
4196     *
4197     * @since   1.5
4198     */
4199    public static int toLowerCase(int codePoint) {
4200        int lowerCase = codePoint;
4201        int plane = 0;
4202
4203        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
4204            lowerCase = CharacterDataLatin1.toLowerCase(codePoint);
4205        } else {
4206            plane = getPlane(codePoint);
4207            switch(plane) {
4208            case(0):
4209                lowerCase = CharacterData00.toLowerCase(codePoint);
4210                break;
4211            case(1):
4212                lowerCase = CharacterData01.toLowerCase(codePoint);
4213                break;
4214            case(2):
4215                lowerCase = CharacterData02.toLowerCase(codePoint);
4216                break;
4217            case(3): // Undefined
4218            case(4): // Undefined
4219            case(5): // Undefined
4220            case(6): // Undefined
4221            case(7): // Undefined
4222            case(8): // Undefined
4223            case(9): // Undefined
4224            case(10): // Undefined
4225            case(11): // Undefined
4226            case(12): // Undefined
4227            case(13): // Undefined
4228                lowerCase = CharacterDataUndefined.toLowerCase(codePoint);
4229                break;
4230            case(14):
4231                lowerCase = CharacterData0E.toLowerCase(codePoint);
4232                break;
4233            case(15): // Private Use
4234            case(16): // Private Use
4235                lowerCase = CharacterDataPrivateUse.toLowerCase(codePoint);
4236                break;
4237            default:
4238                // the argument's plane is invalid, and thus is an invalid codepoint
4239                // lowerCase remains codePoint;
4240                break;
4241            }
4242        }
4243        return lowerCase;
4244    }
4245
4246    /**
4247     * Converts the character argument to uppercase using case mapping
4248     * information from the UnicodeData file.
4249     * <p>
4250     * Note that
4251     * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
4252     * does not always return <code>true</code> for some ranges of
4253     * characters, particularly those that are symbols or ideographs.
4254     *
4255     * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
4256     * characters to uppercase. <code>String</code> case mapping methods
4257     * have several benefits over <code>Character</code> case mapping methods.
4258     * <code>String</code> case mapping methods can perform locale-sensitive
4259     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4260     * the <code>Character</code> case mapping methods cannot.
4261     *
4262     * <p><b>Note:</b> This method cannot handle <a
4263     * HREF="#supplementary"> supplementary characters</a>. To support
4264     * all Unicode characters, including supplementary characters, use
4265     * the {@link #toUpperCase(int)} method.
4266     *
4267     * @param   ch   the character to be converted.
4268     * @return  the uppercase equivalent of the character, if any;
4269     *          otherwise, the character itself.
4270     * @see     java.lang.Character#isUpperCase(char)
4271     * @see     java.lang.String#toUpperCase()
4272     */
4273    public static char toUpperCase(char ch) {
4274        return (char)toUpperCase((int)ch);
4275    }
4276
4277    /**
4278     * Converts the character (Unicode code point) argument to
4279     * uppercase using case mapping information from the UnicodeData
4280     * file.
4281     * 
4282     * <p>Note that
4283     * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
4284     * does not always return <code>true</code> for some ranges of
4285     * characters, particularly those that are symbols or ideographs.
4286     *
4287     * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
4288     * characters to uppercase. <code>String</code> case mapping methods
4289     * have several benefits over <code>Character</code> case mapping methods.
4290     * <code>String</code> case mapping methods can perform locale-sensitive
4291     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4292     * the <code>Character</code> case mapping methods cannot.
4293     *
4294     * @param   codePoint   the character (Unicode code point) to be converted.
4295     * @return  the uppercase equivalent of the character, if any;
4296     *          otherwise, the character itself.
4297     * @see     java.lang.Character#isUpperCase(int)
4298     * @see     java.lang.String#toUpperCase()
4299     * 
4300     * @since   1.5
4301     */
4302    public static int toUpperCase(int codePoint) {
4303        int upperCase = codePoint;
4304        int plane = 0;
4305
4306        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
4307            upperCase = CharacterDataLatin1.toUpperCase(codePoint);
4308        } else {
4309            plane = getPlane(codePoint);
4310            switch(plane) {
4311            case(0):
4312                upperCase = CharacterData00.toUpperCase(codePoint);
4313                break;
4314            case(1):
4315                upperCase = CharacterData01.toUpperCase(codePoint);
4316                break;
4317            case(2):
4318                upperCase = CharacterData02.toUpperCase(codePoint);
4319                break;
4320            case(3): // Undefined
4321            case(4): // Undefined
4322            case(5): // Undefined
4323            case(6): // Undefined
4324            case(7): // Undefined
4325            case(8): // Undefined
4326            case(9): // Undefined
4327            case(10): // Undefined
4328            case(11): // Undefined
4329            case(12): // Undefined
4330            case(13): // Undefined
4331                upperCase = CharacterDataUndefined.toUpperCase(codePoint);
4332                break;
4333            case(14): 
4334                upperCase = CharacterData0E.toUpperCase(codePoint);
4335                break;
4336            case(15): // Private Use
4337            case(16): // Private Use
4338                upperCase = CharacterDataPrivateUse.toUpperCase(codePoint);
4339                break;
4340            default:
4341                // the argument's plane is invalid, and thus is an invalid codepoint
4342                // upperCase remains codePoint;
4343                break;
4344            }
4345        }
4346        return upperCase;
4347    }
4348
4349    /**
4350     * Converts the character argument to titlecase using case mapping
4351     * information from the UnicodeData file. If a character has no
4352     * explicit titlecase mapping and is not itself a titlecase char
4353     * according to UnicodeData, then the uppercase mapping is
4354     * returned as an equivalent titlecase mapping. If the
4355     * <code>char</code> argument is already a titlecase
4356     * <code>char</code>, the same <code>char</code> value will be
4357     * returned.
4358     * <p>
4359     * Note that
4360     * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
4361     * does not always return <code>true</code> for some ranges of
4362     * characters.
4363     *
4364     * <p><b>Note:</b> This method cannot handle <a
4365     * HREF="#supplementary"> supplementary characters</a>. To support
4366     * all Unicode characters, including supplementary characters, use
4367     * the {@link #toTitleCase(int)} method.
4368     *
4369     * @param   ch   the character to be converted.
4370     * @return  the titlecase equivalent of the character, if any;
4371     *          otherwise, the character itself.
4372     * @see     java.lang.Character#isTitleCase(char)
4373     * @see     java.lang.Character#toLowerCase(char)
4374     * @see     java.lang.Character#toUpperCase(char)
4375     * @since   1.0.2
4376     */
4377    public static char toTitleCase(char ch) {
4378        return (char)toTitleCase((int)ch);
4379    }
4380
4381    /**
4382     * Converts the character (Unicode code point) argument to titlecase using case mapping
4383     * information from the UnicodeData file. If a character has no
4384     * explicit titlecase mapping and is not itself a titlecase char
4385     * according to UnicodeData, then the uppercase mapping is
4386     * returned as an equivalent titlecase mapping. If the
4387     * character argument is already a titlecase
4388     * character, the same character value will be
4389     * returned.
4390     * 
4391     * <p>Note that
4392     * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
4393     * does not always return <code>true</code> for some ranges of
4394     * characters.
4395     *
4396     * @param   codePoint   the character (Unicode code point) to be converted.
4397     * @return  the titlecase equivalent of the character, if any;
4398     *          otherwise, the character itself.
4399     * @see     java.lang.Character#isTitleCase(int)
4400     * @see     java.lang.Character#toLowerCase(int)
4401     * @see     java.lang.Character#toUpperCase(int)
4402     * @since   1.5
4403     */
4404    public static int toTitleCase(int codePoint) {
4405        int titleCase = codePoint;
4406        int plane = 0;
4407        
4408        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
4409            titleCase = CharacterDataLatin1.toTitleCase(codePoint);
4410        } else {
4411            plane = getPlane(codePoint);
4412            switch(plane) {
4413            case(0):
4414                titleCase = CharacterData00.toTitleCase(codePoint);
4415                break;
4416            case(1):
4417                titleCase = CharacterData01.toTitleCase(codePoint);
4418                break;
4419            case(2):
4420                titleCase = CharacterData02.toTitleCase(codePoint);
4421                break;
4422            case(3): // Undefined
4423            case(4): // Undefined
4424            case(5): // Undefined
4425            case(6): // Undefined
4426            case(7): // Undefined
4427            case(8): // Undefined
4428            case(9): // Undefined
4429            case(10): // Undefined
4430            case(11): // Undefined
4431            case(12): // Undefined
4432            case(13): // Undefined
4433                titleCase = CharacterDataUndefined.toTitleCase(codePoint);
4434                break;
4435            case(14): 
4436                titleCase = CharacterData0E.toTitleCase(codePoint);
4437                break;
4438            case(15): // Private Use
4439            case(16): // Private Use
4440                titleCase = CharacterDataPrivateUse.toTitleCase(codePoint);
4441                break;
4442            default:
4443                // the argument's plane is invalid, and thus is an invalid codepoint
4444                // titleCase remains codePoint;
4445                break;
4446            }
4447        }
4448        return titleCase;
4449    }
4450
4451    /**
4452     * Returns the numeric value of the character <code>ch</code> in the
4453     * specified radix.
4454     * <p>
4455     * If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
4456     * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
4457     * value of <code>ch</code> is not a valid digit in the specified
4458     * radix, <code>-1</code> is returned. A character is a valid digit
4459     * if at least one of the following is true:
4460     * <ul>
4461     * <li>The method <code>isDigit</code> is <code>true</code> of the character
4462     *     and the Unicode decimal digit value of the character (or its
4463     *     single-character decomposition) is less than the specified radix.
4464     *     In this case the decimal digit value is returned.
4465     * <li>The character is one of the uppercase Latin letters
4466     *     <code>'A'</code> through <code>'Z'</code> and its code is less than
4467     *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
4468     *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
4469     *     is returned.
4470     * <li>The character is one of the lowercase Latin letters
4471     *     <code>'a'</code> through <code>'z'</code> and its code is less than
4472     *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
4473     *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
4474     *     is returned.
4475     * </ul>
4476     *
4477     * <p><b>Note:</b> This method cannot handle <a
4478     * HREF="#supplementary"> supplementary characters</a>. To support
4479     * all Unicode characters, including supplementary characters, use
4480     * the {@link #digit(int, int)} method.
4481     *
4482     * @param   ch      the character to be converted.
4483     * @param   radix   the radix.
4484     * @return  the numeric value represented by the character in the
4485     *          specified radix.
4486     * @see     java.lang.Character#forDigit(int, int)
4487     * @see     java.lang.Character#isDigit(char)
4488     */
4489    public static int digit(char ch, int radix) {
4490        return digit((int)ch, radix);
4491    }
4492
4493    /**
4494     * Returns the numeric value of the specified character (Unicode
4495     * code point) in the specified radix.
4496     * 
4497     * <p>If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
4498     * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
4499     * character is not a valid digit in the specified
4500     * radix, <code>-1</code> is returned. A character is a valid digit
4501     * if at least one of the following is true:
4502     * <ul>
4503     * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
4504     *     and the Unicode decimal digit value of the character (or its
4505     *     single-character decomposition) is less than the specified radix.
4506     *     In this case the decimal digit value is returned.
4507     * <li>The character is one of the uppercase Latin letters
4508     *     <code>'A'</code> through <code>'Z'</code> and its code is less than
4509     *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
4510     *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
4511     *     is returned.
4512     * <li>The character is one of the lowercase Latin letters
4513     *     <code>'a'</code> through <code>'z'</code> and its code is less than
4514     *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
4515     *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
4516     *     is returned.
4517     * </ul>
4518     *
4519     * @param   codePoint the character (Unicode code point) to be converted.
4520     * @param   radix   the radix.
4521     * @return  the numeric value represented by the character in the
4522     *          specified radix.
4523     * @see     java.lang.Character#forDigit(int, int)
4524     * @see     java.lang.Character#isDigit(int)
4525     * @since   1.5
4526     */
4527    public static int digit(int codePoint, int radix) {
4528        int digit = -1;
4529
4530        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
4531            digit = CharacterDataLatin1.digit(codePoint, radix);
4532        } else {
4533            int plane = getPlane(codePoint);
4534            switch(plane) {
4535            case(0):
4536                digit = CharacterData00.digit(codePoint, radix);
4537                break;
4538            case(1):
4539                digit = CharacterData01.digit(codePoint, radix);
4540                break;
4541            case(2):
4542                digit = CharacterData02.digit(codePoint, radix);
4543                break;
4544            case(3): // Undefined
4545            case(4): // Undefined
4546            case(5): // Undefined
4547            case(6): // Undefined
4548            case(7): // Undefined
4549            case(8): // Undefined
4550            case(9): // Undefined
4551            case(10): // Undefined
4552            case(11): // Undefined
4553            case(12): // Undefined
4554            case(13): // Undefined
4555                digit = CharacterDataUndefined.digit(codePoint, radix);
4556                break;
4557            case(14): 
4558                digit = CharacterData0E.digit(codePoint, radix);
4559                break;
4560            case(15): // Private Use
4561            case(16): // Private Use
4562                digit = CharacterDataPrivateUse.digit(codePoint, radix);
4563                break;
4564            default:
4565                // the argument's plane is invalid, and thus is an invalid codepoint
4566                // digit remains -1;
4567                break;
4568            }
4569        }
4570        return digit;
4571    }
4572
4573    /**
4574     * Returns the <code>int</code> value that the specified Unicode
4575     * character represents. For example, the character
4576     * <code>'&#92;u216C'</code> (the roman numeral fifty) will return
4577     * an int with a value of 50.
4578     * <p>
4579     * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
4580     * <code>'&#92;u005A'</code>), lowercase
4581     * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
4582     * full width variant (<code>'&#92;uFF21'</code> through
4583     * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
4584     * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
4585     * through 35. This is independent of the Unicode specification,
4586     * which does not assign numeric values to these <code>char</code>
4587     * values.
4588     * <p>
4589     * If the character does not have a numeric value, then -1 is returned.
4590     * If the character has a numeric value that cannot be represented as a
4591     * nonnegative integer (for example, a fractional value), then -2
4592     * is returned.
4593     *
4594     * <p><b>Note:</b> This method cannot handle <a
4595     * HREF="#supplementary"> supplementary characters</a>. To support
4596     * all Unicode characters, including supplementary characters, use
4597     * the {@link #getNumericValue(int)} method.
4598     *
4599     * @param   ch      the character to be converted.
4600     * @return  the numeric value of the character, as a nonnegative <code>int</code>
4601     *           value; -2 if the character has a numeric value that is not a
4602     *          nonnegative integer; -1 if the character has no numeric value.
4603     * @see     java.lang.Character#forDigit(int, int)
4604     * @see     java.lang.Character#isDigit(char)
4605     * @since   1.1
4606     */
4607    public static int getNumericValue(char ch) {
4608        return getNumericValue((int)ch);
4609    }
4610
4611    /**
4612     * Returns the <code>int</code> value that the specified 
4613     * character (Unicode code point) represents. For example, the character
4614     * <code>'&#92;u216C'</code> (the Roman numeral fifty) will return
4615     * an <code>int</code> with a value of 50.
4616     * <p>
4617     * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
4618     * <code>'&#92;u005A'</code>), lowercase
4619     * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
4620     * full width variant (<code>'&#92;uFF21'</code> through
4621     * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
4622     * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
4623     * through 35. This is independent of the Unicode specification,
4624     * which does not assign numeric values to these <code>char</code>
4625     * values.
4626     * <p>
4627     * If the character does not have a numeric value, then -1 is returned.
4628     * If the character has a numeric value that cannot be represented as a
4629     * nonnegative integer (for example, a fractional value), then -2
4630     * is returned.
4631     *
4632     * @param   codePoint the character (Unicode code point) to be converted.
4633     * @return  the numeric value of the character, as a nonnegative <code>int</code>
4634     *          value; -2 if the character has a numeric value that is not a
4635     *          nonnegative integer; -1 if the character has no numeric value.
4636     * @see     java.lang.Character#forDigit(int, int)
4637     * @see     java.lang.Character#isDigit(int)
4638     * @since   1.5
4639     */
4640    public static int getNumericValue(int codePoint) {
4641        int numericValue = -1;
4642
4643        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
4644            numericValue = CharacterDataLatin1.getNumericValue(codePoint);
4645        } else {
4646            int plane = getPlane(codePoint);
4647            switch(plane) {
4648            case(0):
4649                numericValue = CharacterData00.getNumericValue(codePoint);
4650                break;
4651            case(1):
4652                numericValue = CharacterData01.getNumericValue(codePoint);
4653                break;
4654            case(2):
4655                numericValue = CharacterData02.getNumericValue(codePoint);
4656                break;
4657            case(3): // Undefined
4658            case(4): // Undefined
4659            case(5): // Undefined
4660            case(6): // Undefined
4661            case(7): // Undefined
4662            case(8): // Undefined
4663            case(9): // Undefined
4664            case(10): // Undefined
4665            case(11): // Undefined
4666            case(12): // Undefined
4667            case(13): // Undefined
4668                numericValue = CharacterDataUndefined.getNumericValue(codePoint);
4669                break;
4670            case(14): 
4671                numericValue = CharacterData0E.getNumericValue(codePoint);
4672                break;
4673            case(15): // Private Use
4674            case(16): // Private Use
4675                numericValue = CharacterDataPrivateUse.getNumericValue(codePoint);
4676                break;
4677            default:
4678                // the argument's plane is invalid, and thus is an invalid codepoint
4679                // numericValue remains -1
4680                break;
4681            }
4682        }
4683        return numericValue;
4684    }
4685
4686    /**
4687     * Determines if the specified character is ISO-LATIN-1 white space.
4688     * This method returns <code>true</code> for the following five
4689     * characters only:
4690     * <table>
4691     * <tr><td><code>'\t'</code></td>            <td><code>'&#92;u0009'</code></td>
4692     *     <td><code>HORIZONTAL TABULATION</code></td></tr>
4693     * <tr><td><code>'\n'</code></td>            <td><code>'&#92;u000A'</code></td>
4694     *     <td><code>NEW LINE</code></td></tr>
4695     * <tr><td><code>'\f'</code></td>            <td><code>'&#92;u000C'</code></td>
4696     *     <td><code>FORM FEED</code></td></tr>
4697     * <tr><td><code>'\r'</code></td>            <td><code>'&#92;u000D'</code></td>
4698     *     <td><code>CARRIAGE RETURN</code></td></tr>
4699     * <tr><td><code>'&nbsp;'</code></td>  <td><code>'&#92;u0020'</code></td>
4700     *     <td><code>SPACE</code></td></tr>
4701     * </table>
4702     *
4703     * @param      ch   the character to be tested.
4704     * @return     <code>true</code> if the character is ISO-LATIN-1 white
4705     *             space; <code>false</code> otherwise.
4706     * @see        java.lang.Character#isSpaceChar(char)
4707     * @see        java.lang.Character#isWhitespace(char)
4708     * @deprecated Replaced by isWhitespace(char).
4709     */
4710    @Deprecated  
4711    public static boolean isSpace(char ch) {
4712        return (ch <= 0x0020) &&
4713            (((((1L << 0x0009) |
4714            (1L << 0x000A) |
4715            (1L << 0x000C) |
4716            (1L << 0x000D) |
4717            (1L << 0x0020)) >> ch) & 1L) != 0);
4718    }
4719
4720
4721    /**
4722     * Determines if the specified character is a Unicode space character.
4723     * A character is considered to be a space character if and only if
4724     * it is specified to be a space character by the Unicode standard. This
4725     * method returns true if the character's general category type is any of
4726     * the following:
4727     * <ul>
4728     * <li> <code>SPACE_SEPARATOR</code>
4729     * <li> <code>LINE_SEPARATOR</code>
4730     * <li> <code>PARAGRAPH_SEPARATOR</code>
4731     * </ul>
4732     *
4733     * <p><b>Note:</b> This method cannot handle <a
4734     * HREF="#supplementary"> supplementary characters</a>. To support
4735     * all Unicode characters, including supplementary characters, use
4736     * the {@link #isSpaceChar(int)} method.
4737     *
4738     * @param   ch      the character to be tested.
4739     * @return  <code>true</code> if the character is a space character; 
4740     *          <code>false</code> otherwise.
4741     * @see     java.lang.Character#isWhitespace(char)
4742     * @since   1.1
4743     */
4744    public static boolean isSpaceChar(char ch) {
4745        return isSpaceChar((int)ch);
4746    }
4747
4748    /**
4749     * Determines if the specified character (Unicode code point) is a
4750     * Unicode space character.  A character is considered to be a
4751     * space character if and only if it is specified to be a space
4752     * character by the Unicode standard. This method returns true if
4753     * the character's general category type is any of the following:
4754     *
4755     * <ul>
4756     * <li> {@link #SPACE_SEPARATOR}
4757     * <li> {@link #LINE_SEPARATOR}
4758     * <li> {@link #PARAGRAPH_SEPARATOR}
4759     * </ul>
4760     *
4761     * @param   codePoint the character (Unicode code point) to be tested.
4762     * @return  <code>true</code> if the character is a space character; 
4763     *          <code>false</code> otherwise.
4764     * @see     java.lang.Character#isWhitespace(int)
4765     * @since   1.5
4766     */
4767    public static boolean isSpaceChar(int codePoint) {
4768        boolean bSpaceChar = false;
4769
4770        if (codePoint >= MIN_CODE_POINT && codePoint <=  FAST_PATH_MAX) {
4771            bSpaceChar =  CharacterDataLatin1.isSpaceChar(codePoint);
4772        } else {
4773            int plane = getPlane(codePoint);
4774            switch(plane) {
4775            case(0):
4776                bSpaceChar = CharacterData00.isSpaceChar(codePoint);
4777                break;
4778            case(1):
4779                bSpaceChar = CharacterData01.isSpaceChar(codePoint);
4780                break;
4781            case(2):
4782                bSpaceChar = CharacterData02.isSpaceChar(codePoint);
4783                break;
4784            case(3): // Undefined
4785            case(4): // Undefined
4786            case(5): // Undefined
4787            case(6): // Undefined
4788            case(7): // Undefined
4789            case(8): // Undefined
4790            case(9): // Undefined
4791            case(10): // Undefined
4792            case(11): // Undefined
4793            case(12): // Undefined
4794            case(13): // Undefined
4795                bSpaceChar = CharacterDataUndefined.isSpaceChar(codePoint);
4796                break;
4797            case(14): 
4798                bSpaceChar = CharacterData0E.isSpaceChar(codePoint);
4799                break;
4800            case(15): // Private Use
4801            case(16): // Private Use
4802                bSpaceChar = CharacterDataPrivateUse.isSpaceChar(codePoint);
4803                break;
4804            default:
4805                // the argument's plane is invalid, and thus is an invalid codepoint
4806                // bSpaceChar remains false
4807                break;
4808            }
4809        }
4810        return bSpaceChar;
4811    }
4812
4813    /**
4814     * Determines if the specified character is white space according to Java.
4815     * A character is a Java whitespace character if and only if it satisfies
4816     * one of the following criteria:
4817     * <ul>
4818     * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
4819     *      <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>) 
4820     *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
4821     *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
4822     * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
4823     * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
4824     * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
4825     * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
4826     * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
4827     * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
4828     * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
4829     * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
4830     * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
4831     * </ul>
4832     *
4833     * <p><b>Note:</b> This method cannot handle <a
4834     * HREF="#supplementary"> supplementary characters</a>. To support
4835     * all Unicode characters, including supplementary characters, use
4836     * the {@link #isWhitespace(int)} method.
4837     *
4838     * @param   ch the character to be tested.
4839     * @return  <code>true</code> if the character is a Java whitespace
4840     *          character; <code>false</code> otherwise.
4841     * @see     java.lang.Character#isSpaceChar(char)
4842     * @since   1.1
4843     */
4844    public static boolean isWhitespace(char ch) {
4845        return isWhitespace((int)ch);
4846    }
4847
4848    /**
4849     * Determines if the specified character (Unicode code point) is
4850     * white space according to Java.  A character is a Java
4851     * whitespace character if and only if it satisfies one of the
4852     * following criteria:
4853     * <ul>
4854     * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
4855     *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 
4856     *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
4857     *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
4858     * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
4859     * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
4860     * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
4861     * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
4862     * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
4863     * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
4864     * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
4865     * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
4866     * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
4867     * </ul>
4868     * <p>
4869     *
4870     * @param   codePoint the character (Unicode code point) to be tested.
4871     * @return  <code>true</code> if the character is a Java whitespace
4872     *          character; <code>false</code> otherwise.
4873     * @see     java.lang.Character#isSpaceChar(int)
4874     * @since   1.5
4875     */
4876    public static boolean isWhitespace(int codePoint) {
4877        boolean bWhiteSpace = false;
4878
4879        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
4880            bWhiteSpace =  CharacterDataLatin1.isWhitespace(codePoint);
4881        } else {
4882            int plane = getPlane(codePoint);
4883            switch(plane) {
4884            case(0):
4885                bWhiteSpace = CharacterData00.isWhitespace(codePoint);
4886                break;
4887            case(1):
4888                bWhiteSpace = CharacterData01.isWhitespace(codePoint);
4889                break;
4890            case(2):
4891                bWhiteSpace = CharacterData02.isWhitespace(codePoint);
4892                break;
4893            case(3): // Undefined
4894            case(4): // Undefined
4895            case(5): // Undefined
4896            case(6): // Undefined
4897            case(7): // Undefined
4898            case(8): // Undefined
4899            case(9): // Undefined
4900            case(10): // Undefined
4901            case(11): // Undefined
4902            case(12): // Undefined
4903            case(13): // Undefined
4904                bWhiteSpace = CharacterDataUndefined.isWhitespace(codePoint);
4905                break;
4906            case(14): 
4907                bWhiteSpace = CharacterData0E.isWhitespace(codePoint);
4908                break;
4909            case(15): // Private Use
4910            case(16): // Private Use
4911                bWhiteSpace = CharacterDataPrivateUse.isWhitespace(codePoint);
4912                break;
4913            default:
4914                // the argument's plane is invalid, and thus is an invalid codepoint
4915                // bWhiteSpace remains false
4916                break;
4917            }
4918        }
4919        return bWhiteSpace;
4920    }
4921
4922    /**
4923     * Determines if the specified character is an ISO control
4924     * character.  A character is considered to be an ISO control
4925     * character if its code is in the range <code>'&#92;u0000'</code>
4926     * through <code>'&#92;u001F'</code> or in the range
4927     * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
4928     *
4929     * <p><b>Note:</b> This method cannot handle <a
4930     * HREF="#supplementary"> supplementary characters</a>. To support
4931     * all Unicode characters, including supplementary characters, use
4932     * the {@link #isISOControl(int)} method.
4933     *
4934     * @param   ch      the character to be tested.
4935     * @return  <code>true</code> if the character is an ISO control character;
4936     *          <code>false</code> otherwise.
4937     *
4938     * @see     java.lang.Character#isSpaceChar(char)
4939     * @see     java.lang.Character#isWhitespace(char)
4940     * @since   1.1
4941     */
4942    public static boolean isISOControl(char ch) {
4943        return isISOControl((int)ch);
4944    }
4945
4946    /**
4947     * Determines if the referenced character (Unicode code point) is an ISO control
4948     * character.  A character is considered to be an ISO control
4949     * character if its code is in the range <code>'&#92;u0000'</code>
4950     * through <code>'&#92;u001F'</code> or in the range
4951     * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
4952     *
4953     * @param   codePoint the character (Unicode code point) to be tested.
4954     * @return  <code>true</code> if the character is an ISO control character;
4955     *          <code>false</code> otherwise.
4956     * @see     java.lang.Character#isSpaceChar(int)
4957     * @see     java.lang.Character#isWhitespace(int)
4958     * @since   1.5
4959     */
4960    public static boolean isISOControl(int codePoint) {
4961        return (codePoint >= 0x0000 && codePoint <= 0x001F) || 
4962            (codePoint >= 0x007F && codePoint <= 0x009F);
4963    }
4964
4965    /**
4966     * Returns a value indicating a character's general category.
4967     *
4968     * <p><b>Note:</b> This method cannot handle <a
4969     * HREF="#supplementary"> supplementary characters</a>. To support
4970     * all Unicode characters, including supplementary characters, use
4971     * the {@link #getType(int)} method.
4972     *
4973     * @param   ch      the character to be tested.
4974     * @return  a value of type <code>int</code> representing the 
4975     *          character's general category.
4976     * @see     java.lang.Character#COMBINING_SPACING_MARK
4977     * @see     java.lang.Character#CONNECTOR_PUNCTUATION
4978     * @see     java.lang.Character#CONTROL
4979     * @see     java.lang.Character#CURRENCY_SYMBOL
4980     * @see     java.lang.Character#DASH_PUNCTUATION
4981     * @see     java.lang.Character#DECIMAL_DIGIT_NUMBER
4982     * @see     java.lang.Character#ENCLOSING_MARK
4983     * @see     java.lang.Character#END_PUNCTUATION
4984     * @see     java.lang.Character#FINAL_QUOTE_PUNCTUATION
4985     * @see     java.lang.Character#FORMAT
4986     * @see     java.lang.Character#INITIAL_QUOTE_PUNCTUATION
4987     * @see     java.lang.Character#LETTER_NUMBER
4988     * @see     java.lang.Character#LINE_SEPARATOR
4989     * @see     java.lang.Character#LOWERCASE_LETTER
4990     * @see     java.lang.Character#MATH_SYMBOL
4991     * @see     java.lang.Character#MODIFIER_LETTER
4992     * @see     java.lang.Character#MODIFIER_SYMBOL
4993     * @see     java.lang.Character#NON_SPACING_MARK
4994     * @see     java.lang.Character#OTHER_LETTER
4995     * @see     java.lang.Character#OTHER_NUMBER
4996     * @see     java.lang.Character#OTHER_PUNCTUATION
4997     * @see     java.lang.Character#OTHER_SYMBOL
4998     * @see     java.lang.Character#PARAGRAPH_SEPARATOR
4999     * @see     java.lang.Character#PRIVATE_USE
5000     * @see     java.lang.Character#SPACE_SEPARATOR
5001     * @see     java.lang.Character#START_PUNCTUATION
5002     * @see     java.lang.Character#SURROGATE
5003     * @see     java.lang.Character#TITLECASE_LETTER
5004     * @see     java.lang.Character#UNASSIGNED
5005     * @see     java.lang.Character#UPPERCASE_LETTER
5006     * @since   1.1
5007     */
5008    public static int getType(char ch) {
5009        return getType((int)ch);
5010    }
5011
5012    /**
5013     * Returns a value indicating a character's general category.
5014     *
5015     * @param   codePoint the character (Unicode code point) to be tested.
5016     * @return  a value of type <code>int</code> representing the 
5017     *          character's general category.
5018     * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
5019     * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
5020     * @see     Character#CONTROL CONTROL
5021     * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
5022     * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
5023     * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
5024     * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
5025     * @see     Character#END_PUNCTUATION END_PUNCTUATION
5026     * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
5027     * @see     Character#FORMAT FORMAT
5028     * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
5029     * @see     Character#LETTER_NUMBER LETTER_NUMBER
5030     * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
5031     * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
5032     * @see     Character#MATH_SYMBOL MATH_SYMBOL
5033     * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
5034     * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
5035     * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
5036     * @see     Character#OTHER_LETTER OTHER_LETTER
5037     * @see     Character#OTHER_NUMBER OTHER_NUMBER
5038     * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
5039     * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
5040     * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
5041     * @see     Character#PRIVATE_USE PRIVATE_USE
5042     * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
5043     * @see     Character#START_PUNCTUATION START_PUNCTUATION
5044     * @see     Character#SURROGATE SURROGATE
5045     * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
5046     * @see     Character#UNASSIGNED UNASSIGNED
5047     * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
5048     * @since   1.5
5049     */
5050    public static int getType(int codePoint) {
5051        int type = Character.UNASSIGNED;
5052
5053        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
5054            type = CharacterDataLatin1.getType(codePoint);
5055        } else {
5056            int plane = getPlane(codePoint);
5057            switch(plane) {
5058            case(0):
5059                type = CharacterData00.getType(codePoint);
5060                break;
5061            case(1):
5062                type = CharacterData01.getType(codePoint);
5063                break;
5064            case(2):
5065                type = CharacterData02.getType(codePoint);
5066                break;
5067            case(3): // Undefined
5068            case(4): // Undefined
5069            case(5): // Undefined
5070            case(6): // Undefined
5071            case(7): // Undefined
5072            case(8): // Undefined
5073            case(9): // Undefined
5074            case(10): // Undefined
5075            case(11): // Undefined
5076            case(12): // Undefined
5077            case(13): // Undefined      
5078                type = CharacterDataUndefined.getType(codePoint);
5079                break;
5080            case(14): 
5081                type = CharacterData0E.getType(codePoint);
5082                break;
5083            case(15): // Private Use
5084            case(16): // Private Use
5085                type = CharacterDataPrivateUse.getType(codePoint);
5086                break;
5087            default:
5088                // the argument's plane is invalid, and thus is an invalid codepoint
5089                // type remains UNASSIGNED
5090                break;
5091            }
5092        }
5093        return type;
5094    }
5095
5096    /**
5097     * Determines the character representation for a specific digit in
5098     * the specified radix. If the value of <code>radix</code> is not a
5099     * valid radix, or the value of <code>digit</code> is not a valid
5100     * digit in the specified radix, the null character
5101     * (<code>'&#92;u0000'</code>) is returned.
5102     * <p>
5103     * The <code>radix</code> argument is valid if it is greater than or
5104     * equal to <code>MIN_RADIX</code> and less than or equal to
5105     * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
5106     * <code>0&nbsp;&lt;=digit&nbsp;&lt;&nbsp;radix</code>.
5107     * <p>
5108     * If the digit is less than 10, then
5109     * <code>'0'&nbsp;+ digit</code> is returned. Otherwise, the value
5110     * <code>'a'&nbsp;+ digit&nbsp;-&nbsp;10</code> is returned.
5111     *
5112     * @param   digit   the number to convert to a character.
5113     * @param   radix   the radix.
5114     * @return  the <code>char</code> representation of the specified digit
5115     *          in the specified radix.
5116     * @see     java.lang.Character#MIN_RADIX
5117     * @see     java.lang.Character#MAX_RADIX
5118     * @see     java.lang.Character#digit(char, int)
5119     */
5120    public static char forDigit(int digit, int radix) {
5121        if ((digit >= radix) || (digit < 0)) {
5122            return '\0';
5123        }
5124        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
5125            return '\0';
5126        }
5127        if (digit < 10) {
5128            return (char)('0' + digit);
5129        }
5130        return (char)('a' - 10 + digit);
5131    }
5132
5133    /**
5134     * Returns the Unicode directionality property for the given
5135     * character.  Character directionality is used to calculate the
5136     * visual ordering of text. The directionality value of undefined
5137     * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
5138     *
5139     * <p><b>Note:</b> This method cannot handle <a
5140     * HREF="#supplementary"> supplementary characters</a>. To support
5141     * all Unicode characters, including supplementary characters, use
5142     * the {@link #getDirectionality(int)} method.
5143     *
5144     * @param  ch <code>char</code> for which the directionality property 
5145     *            is requested.
5146     * @return the directionality property of the <code>char</code> value.
5147     *
5148     * @see Character#DIRECTIONALITY_UNDEFINED
5149     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
5150     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
5151     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
5152     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
5153     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
5154     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
5155     * @see Character#DIRECTIONALITY_ARABIC_NUMBER
5156     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
5157     * @see Character#DIRECTIONALITY_NONSPACING_MARK
5158     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
5159     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
5160     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
5161     * @see Character#DIRECTIONALITY_WHITESPACE
5162     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
5163     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
5164     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
5165     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
5166     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
5167     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
5168     * @since 1.4
5169     */
5170    public static byte getDirectionality(char ch) {
5171        return getDirectionality((int)ch);
5172    }
5173
5174    /**
5175     * Returns the Unicode directionality property for the given
5176     * character (Unicode code point).  Character directionality is
5177     * used to calculate the visual ordering of text. The
5178     * directionality value of undefined character is {@link
5179     * #DIRECTIONALITY_UNDEFINED}.
5180     *
5181     * @param   codePoint the character (Unicode code point) for which
5182     *          the directionality property * is requested.
5183     * @return the directionality property of the character.
5184     *
5185     * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
5186     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
5187     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
5188     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
5189     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
5190     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
5191     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
5192     * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
5193     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
5194     * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
5195     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
5196     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
5197     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
5198     * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
5199     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
5200     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
5201     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
5202     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
5203     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
5204     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
5205     * @since    1.5
5206     */
5207    public static byte getDirectionality(int codePoint) {
5208        byte directionality = Character.DIRECTIONALITY_UNDEFINED;
5209
5210        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
5211            directionality = CharacterDataLatin1.getDirectionality(codePoint);
5212        } else {
5213            int plane = getPlane(codePoint);
5214            switch(plane) {
5215            case(0):
5216                directionality = CharacterData00.getDirectionality(codePoint);
5217                break;
5218            case(1):
5219                directionality = CharacterData01.getDirectionality(codePoint);
5220                break;
5221            case(2):
5222                directionality = CharacterData02.getDirectionality(codePoint);
5223                break;
5224            case(3): // Undefined
5225            case(4): // Undefined
5226            case(5): // Undefined
5227            case(6): // Undefined
5228            case(7): // Undefined
5229            case(8): // Undefined
5230            case(9): // Undefined
5231            case(10): // Undefined
5232            case(11): // Undefined
5233            case(12): // Undefined
5234            case(13): // Undefined
5235                directionality = CharacterDataUndefined.getDirectionality(codePoint);
5236                break;
5237            case(14): 
5238                directionality = CharacterData0E.getDirectionality(codePoint);
5239                break;
5240            case(15): // Private Use
5241            case(16): // Private Use
5242                directionality = CharacterDataPrivateUse.getDirectionality(codePoint);
5243                break;
5244            default:
5245                // the argument's plane is invalid, and thus is an invalid codepoint
5246                // directionality remains DIRECTIONALITY_UNDEFINED
5247                break;
5248            }
5249        }
5250        return directionality;
5251    }
5252
5253    /**
5254     * Determines whether the character is mirrored according to the
5255     * Unicode specification.  Mirrored characters should have their
5256     * glyphs horizontally mirrored when displayed in text that is
5257     * right-to-left.  For example, <code>'&#92;u0028'</code> LEFT
5258     * PARENTHESIS is semantically defined to be an <i>opening
5259     * parenthesis</i>.  This will appear as a "(" in text that is
5260     * left-to-right but as a ")" in text that is right-to-left.
5261     *
5262     * <p><b>Note:</b> This method cannot handle <a
5263     * HREF="#supplementary"> supplementary characters</a>. To support
5264     * all Unicode characters, including supplementary characters, use
5265     * the {@link #isMirrored(int)} method.
5266     *
5267     * @param  ch <code>char</code> for which the mirrored property is requested
5268     * @return <code>true</code> if the char is mirrored, <code>false</code>
5269     *         if the <code>char</code> is not mirrored or is not defined.
5270     * @since 1.4
5271     */
5272    public static boolean isMirrored(char ch) {
5273        return isMirrored((int)ch);
5274    }
5275
5276    /**
5277     * Determines whether the specified character (Unicode code point)
5278     * is mirrored according to the Unicode specification.  Mirrored
5279     * characters should have their glyphs horizontally mirrored when
5280     * displayed in text that is right-to-left.  For example,
5281     * <code>'&#92;u0028'</code> LEFT PARENTHESIS is semantically
5282     * defined to be an <i>opening parenthesis</i>.  This will appear
5283     * as a "(" in text that is left-to-right but as a ")" in text
5284     * that is right-to-left.
5285     *
5286     * @param   codePoint the character (Unicode code point) to be tested.
5287     * @return  <code>true</code> if the character is mirrored, <code>false</code>
5288     *          if the character is not mirrored or is not defined.
5289     * @since   1.5
5290     */
5291    public static boolean isMirrored(int codePoint) {
5292        boolean bMirrored = false;
5293
5294        if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
5295           bMirrored = CharacterDataLatin1.isMirrored(codePoint);
5296        } else {
5297            int plane = getPlane(codePoint);
5298            switch(plane) {
5299            case(0):
5300                bMirrored = CharacterData00.isMirrored(codePoint);
5301                break;
5302            case(1):
5303                bMirrored = CharacterData01.isMirrored(codePoint);
5304                break;
5305            case(2):
5306                bMirrored = CharacterData02.isMirrored(codePoint);
5307                break;
5308            case(3): // Undefined
5309            case(4): // Undefined
5310            case(5): // Undefined
5311            case(6): // Undefined
5312            case(7): // Undefined
5313            case(8): // Undefined
5314            case(9): // Undefined
5315            case(10): // Undefined
5316            case(11): // Undefined
5317            case(12): // Undefined
5318            case(13): // Undefined
5319                bMirrored = CharacterDataUndefined.isMirrored(codePoint);
5320                break;
5321            case(14): 
5322                bMirrored = CharacterData0E.isMirrored(codePoint);
5323                break;
5324            case(15): // Private Use
5325            case(16): // Private Use
5326                bMirrored = CharacterDataPrivateUse.isMirrored(codePoint);
5327                break;
5328            default:
5329                // the argument's plane is invalid, and thus is an invalid codepoint
5330                // bMirrored remains false
5331                break;
5332            }
5333        }
5334        return bMirrored;
5335    }
5336
5337    /**
5338     * Compares two <code>Character</code> objects numerically.
5339     *
5340     * @param   anotherCharacter   the <code>Character</code> to be compared.
5341
5342     * @return  the value <code>0</code> if the argument <code>Character</code> 
5343     *          is equal to this <code>Character</code>; a value less than 
5344     *          <code>0</code> if this <code>Character</code> is numerically less 
5345     *          than the <code>Character</code> argument; and a value greater than 
5346     *          <code>0</code> if this <code>Character</code> is numerically greater 
5347     *          than the <code>Character</code> argument (unsigned comparison).  
5348     *          Note that this is strictly a numerical comparison; it is not 
5349     *          locale-dependent.
5350     * @since   1.2
5351     */
5352    public int compareTo(Character   anotherCharacter) {
5353        return this.value - anotherCharacter.value;
5354    }
5355
5356    /**
5357     * Converts the character (Unicode code point) argument to uppercase using
5358     * information from the UnicodeData file.
5359     * <p>
5360     *
5361     * @param   codePoint   the character (Unicode code point) to be converted.
5362     * @return  either the uppercase equivalent of the character, if 
5363     *          any, or an error flag (<code>Character.ERROR</code>) 
5364     *          that indicates that a 1:M <code>char</code> mapping exists.
5365     * @see     java.lang.Character#isLowerCase(char)
5366     * @see     java.lang.Character#isUpperCase(char)
5367     * @see     java.lang.Character#toLowerCase(char)
5368     * @see     java.lang.Character#toTitleCase(char)
5369     * @since 1.4
5370     */
5371    static int toUpperCaseEx(int codePoint) {
5372        int upperCase = codePoint;
5373        int plane = 0;
5374
5375        assert isValidCodePoint(codePoint);
5376
5377        if (codePoint <= FAST_PATH_MAX) {
5378            upperCase = CharacterDataLatin1.toUpperCaseEx(codePoint);
5379        } else {
5380            plane = getPlane(codePoint);
5381            switch(plane) {
5382            case(0):
5383                upperCase = CharacterData00.toUpperCaseEx(codePoint);
5384                break;
5385            case(1):
5386                upperCase = CharacterData01.toUpperCase(codePoint);
5387                break;
5388            case(2):
5389                upperCase = CharacterData02.toUpperCase(codePoint);
5390                break;
5391            case(3): // Undefined
5392            case(4): // Undefined
5393            case(5): // Undefined
5394            case(6): // Undefined
5395            case(7): // Undefined
5396            case(8): // Undefined
5397            case(9): // Undefined
5398            case(10): // Undefined
5399            case(11): // Undefined
5400            case(12): // Undefined
5401            case(13): // Undefined
5402                upperCase = CharacterDataUndefined.toUpperCase(codePoint);
5403                break;
5404            case(14):
5405                upperCase = CharacterData0E.toUpperCase(codePoint);
5406                break;
5407            case(15): // Private Use
5408            case(16): // Private Use
5409                upperCase = CharacterDataPrivateUse.toUpperCase(codePoint);
5410                break;
5411            default:
5412                // the argument's plane is invalid, and thus is an invalid codepoint
5413                // upperCase remains codePoint;
5414                break;
5415            }
5416        }
5417        return upperCase;
5418    }
5419
5420    /**
5421     * Converts the character (Unicode code point) argument to uppercase using case
5422     * mapping information from the SpecialCasing file in the Unicode
5423     * specification. If a character has no explicit uppercase
5424     * mapping, then the <code>char</code> itself is returned in the
5425     * <code>char[]</code>.
5426     *
5427     * @param   codePoint   the character (Unicode code point) to be converted.
5428     * @return a <code>char[]</code> with the uppercased character.
5429     * @since 1.4
5430     */
5431    static char[] toUpperCaseCharArray(int codePoint) {
5432        char[] upperCase = null;
5433
5434        // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
5435        assert isValidCodePoint(codePoint) &&
5436               !isSupplementaryCodePoint(codePoint);
5437
5438        if (codePoint <= FAST_PATH_MAX) {
5439            upperCase = CharacterDataLatin1.toUpperCaseCharArray(codePoint);
5440        } else {
5441            upperCase = CharacterData00.toUpperCaseCharArray(codePoint);
5442        }
5443        return upperCase;
5444    }
5445
5446    /**
5447     * The number of bits used to represent a <tt>char</tt> value in unsigned
5448     * binary form.
5449     *
5450     * @since 1.5
5451     */
5452    public static final int SIZE = 16;
5453
5454    /**
5455     * Returns the value obtained by reversing the order of the bytes in the
5456     * specified <tt>char</tt> value.
5457     *
5458     * @return the value obtained by reversing (or, equivalently, swapping)
5459     *     the bytes in the specified <tt>char</tt> value.
5460     * @since 1.5
5461     */
5462    public static char reverseBytes(char ch) {
5463        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
5464    }
5465}
5466
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags