KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > java > lang > Character


1 /*
2  * @(#)Character.java 1.31 04/09/14
3  *
4  * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7
8 package java.lang;
9 import java.util.Map JavaDoc;
10 import java.util.HashMap JavaDoc;
11 import java.util.Locale JavaDoc;
12
13 /**
14  * The <code>Character</code> class wraps a value of the primitive
15  * type <code>char</code> in an object. An object of type
16  * <code>Character</code> contains a single field whose type is
17  * <code>char</code>.
18  * <p>
19  * In addition, this class provides several methods for determining
20  * a character's category (lowercase letter, digit, etc.) and for converting
21  * characters from uppercase to lowercase and vice versa.
22  * <p>
23  * Character information is based on the Unicode Standard, version 4.0.
24  * <p>
25  * The methods and data of class <code>Character</code> are defined by
26  * the information in the <i>UnicodeData</i> file that is part of the
27  * Unicode Character Database maintained by the Unicode
28  * Consortium. This file specifies various properties including name
29  * and general category for every defined Unicode code point or
30  * character range.
31  * <p>
32  * The file and its description are available from the Unicode Consortium at:
33  * <ul>
34  * <li><a HREF="http://www.unicode.org">http://www.unicode.org</a>
35  * </ul>
36  *
37  * <h4><a name="unicode">Unicode Character Representations</a></h4>
38  *
39  * <p>The <code>char</code> data type (and therefore the value that a
40  * <code>Character</code> object encapsulates) are based on the
41  * original Unicode specification, which defined characters as
42  * fixed-width 16-bit entities. The Unicode standard has since been
43  * changed to allow for characters whose representation requires more
44  * than 16 bits. The range of legal <em>code point</em>s is now
45  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
46  * (Refer to the <a
47  * HREF="http://www.unicode.org/reports/tr27/#notation"><i>
48  * definition</i></a> of the U+<i>n</i> notation in the Unicode
49  * standard.)
50  *
51  * <p>The set of characters from U+0000 to U+FFFF is sometimes
52  * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
53  * name="supplementary">Characters</a> whose code points are greater
54  * than U+FFFF are called <em>supplementary character</em>s. The Java
55  * 2 platform uses the UTF-16 representation in <code>char</code>
56  * arrays and in the <code>String</code> and <code>StringBuffer</code>
57  * classes. In this representation, supplementary characters are
58  * represented as a pair of <code>char</code> values, the first from
59  * the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the
60  * second from the <em>low-surrogates</em> range
61  * (&#92;uDC00-&#92;uDFFF).
62  *
63  * <p>A <code>char</code> value, therefore, represents Basic
64  * Multilingual Plane (BMP) code points, including the surrogate
65  * code points, or code units of the UTF-16 encoding. An
66  * <code>int</code> value represents all Unicode code points,
67  * including supplementary code points. The lower (least significant)
68  * 21 bits of <code>int</code> are used to represent Unicode code
69  * points and the upper (most significant) 11 bits must be zero.
70  * Unless otherwise specified, the behavior with respect to
71  * supplementary characters and surrogate <code>char</code> values is
72  * as follows:
73  *
74  * <ul>
75  * <li>The methods that only accept a <code>char</code> value cannot support
76  * supplementary characters. They treat <code>char</code> values from the
77  * surrogate ranges as undefined characters. For example,
78  * <code>Character.isLetter('&#92;uD840')</code> returns <code>false</code>, even though
79  * this specific value if followed by any low-surrogate value in a string
80  * would represent a letter.
81  *
82  * <li>The methods that accept an <code>int</code> value support all
83  * Unicode characters, including supplementary characters. For
84  * example, <code>Character.isLetter(0x2F81A)</code> returns
85  * <code>true</code> because the code point value represents a letter
86  * (a CJK ideograph).
87  * </ul>
88  *
89  * <p>In the J2SE API documentation, <em>Unicode code point</em> is
90  * used for character values in the range between U+0000 and U+10FFFF,
91  * and <em>Unicode code unit</em> is used for 16-bit
92  * <code>char</code> values that are code units of the <em>UTF-16</em>
93  * encoding. For more information on Unicode terminology, refer to the
94  * <a HREF="http://www.unicode.org/glossary/">Unicode Glossary</a>.
95  *
96  * @author Lee Boynton
97  * @author Guy Steele
98  * @author Akira Tanaka
99  * @since 1.0
100  */

101 public final
102 class Character extends Object JavaDoc implements java.io.Serializable JavaDoc, Comparable JavaDoc<Character JavaDoc> {
103     /**
104      * The minimum radix available for conversion to and from strings.
105      * The constant value of this field is the smallest value permitted
106      * for the radix argument in radix-conversion methods such as the
107      * <code>digit</code> method, the <code>forDigit</code>
108      * method, and the <code>toString</code> method of class
109      * <code>Integer</code>.
110      *
111      * @see java.lang.Character#digit(char, int)
112      * @see java.lang.Character#forDigit(int, int)
113      * @see java.lang.Integer#toString(int, int)
114      * @see java.lang.Integer#valueOf(java.lang.String)
115      */

116     public static final int MIN_RADIX = 2;
117
118     /**
119      * The maximum radix available for conversion to and from strings.
120      * The constant value of this field is the largest value permitted
121      * for the radix argument in radix-conversion methods such as the
122      * <code>digit</code> method, the <code>forDigit</code>
123      * method, and the <code>toString</code> method of class
124      * <code>Integer</code>.
125      *
126      * @see java.lang.Character#digit(char, int)
127      * @see java.lang.Character#forDigit(int, int)
128      * @see java.lang.Integer#toString(int, int)
129      * @see java.lang.Integer#valueOf(java.lang.String)
130      */

131     public static final int MAX_RADIX = 36;
132
133     /**
134      * The constant value of this field is the smallest value of type
135      * <code>char</code>, <code>'&#92;u0000'</code>.
136      *
137      * @since 1.0.2
138      */

139     public static final char MIN_VALUE = '\u0000';
140
141     /**
142      * The constant value of this field is the largest value of type
143      * <code>char</code>, <code>'&#92;uFFFF'</code>.
144      *
145      * @since 1.0.2
146      */

147     public static final char MAX_VALUE = '\uffff';
148
149     /**
150      * The <code>Class</code> instance representing the primitive type
151      * <code>char</code>.
152      *
153      * @since 1.1
154      */

155     public static final Class JavaDoc<Character JavaDoc> TYPE = Class.getPrimitiveClass("char");
156
157    /*
158     * Normative general types
159     */

160
161    /*
162     * General character types
163     */

164
165    /**
166     * General category "Cn" in the Unicode specification.
167     * @since 1.1
168     */

169     public static final byte
170         UNASSIGNED = 0;
171
172    /**
173     * General category "Lu" in the Unicode specification.
174     * @since 1.1
175     */

176     public static final byte
177         UPPERCASE_LETTER = 1;
178
179    /**
180     * General category "Ll" in the Unicode specification.
181     * @since 1.1
182     */

183     public static final byte
184         LOWERCASE_LETTER = 2;
185
186    /**
187     * General category "Lt" in the Unicode specification.
188     * @since 1.1
189     */

190     public static final byte
191         TITLECASE_LETTER = 3;
192
193    /**
194     * General category "Lm" in the Unicode specification.
195     * @since 1.1
196     */

197     public static final byte
198         MODIFIER_LETTER = 4;
199
200    /**
201     * General category "Lo" in the Unicode specification.
202     * @since 1.1
203     */

204     public static final byte
205         OTHER_LETTER = 5;
206
207    /**
208     * General category "Mn" in the Unicode specification.
209     * @since 1.1
210     */

211     public static final byte
212         NON_SPACING_MARK = 6;
213
214    /**
215     * General category "Me" in the Unicode specification.
216     * @since 1.1
217     */

218     public static final byte
219         ENCLOSING_MARK = 7;
220
221    /**
222     * General category "Mc" in the Unicode specification.
223     * @since 1.1
224     */

225     public static final byte
226         COMBINING_SPACING_MARK = 8;
227
228    /**
229     * General category "Nd" in the Unicode specification.
230     * @since 1.1
231     */

232     public static final byte
233         DECIMAL_DIGIT_NUMBER = 9;
234
235    /**
236     * General category "Nl" in the Unicode specification.
237     * @since 1.1
238     */

239     public static final byte
240         LETTER_NUMBER = 10;
241
242    /**
243     * General category "No" in the Unicode specification.
244     * @since 1.1
245     */

246     public static final byte
247         OTHER_NUMBER = 11;
248
249    /**
250     * General category "Zs" in the Unicode specification.
251     * @since 1.1
252     */

253     public static final byte
254         SPACE_SEPARATOR = 12;
255
256    /**
257     * General category "Zl" in the Unicode specification.
258     * @since 1.1
259     */

260     public static final byte
261         LINE_SEPARATOR = 13;
262
263    /**
264     * General category "Zp" in the Unicode specification.
265     * @since 1.1
266     */

267     public static final byte
268         PARAGRAPH_SEPARATOR = 14;
269
270    /**
271     * General category "Cc" in the Unicode specification.
272     * @since 1.1
273     */

274     public static final byte
275         CONTROL = 15;
276
277    /**
278     * General category "Cf" in the Unicode specification.
279     * @since 1.1
280     */

281     public static final byte
282         FORMAT = 16;
283
284    /**
285     * General category "Co" in the Unicode specification.
286     * @since 1.1
287     */

288     public static final byte
289         PRIVATE_USE = 18;
290
291    /**
292     * General category "Cs" in the Unicode specification.
293     * @since 1.1
294     */

295     public static final byte
296         SURROGATE = 19;
297
298    /**
299     * General category "Pd" in the Unicode specification.
300     * @since 1.1
301     */

302     public static final byte
303         DASH_PUNCTUATION = 20;
304
305    /**
306     * General category "Ps" in the Unicode specification.
307     * @since 1.1
308     */

309     public static final byte
310         START_PUNCTUATION = 21;
311
312    /**
313     * General category "Pe" in the Unicode specification.
314     * @since 1.1
315     */

316     public static final byte
317         END_PUNCTUATION = 22;
318
319    /**
320     * General category "Pc" in the Unicode specification.
321     * @since 1.1
322     */

323     public static final byte
324         CONNECTOR_PUNCTUATION = 23;
325
326    /**
327     * General category "Po" in the Unicode specification.
328     * @since 1.1
329     */

330     public static final byte
331         OTHER_PUNCTUATION = 24;
332
333    /**
334     * General category "Sm" in the Unicode specification.
335     * @since 1.1
336     */

337     public static final byte
338         MATH_SYMBOL = 25;
339
340    /**
341     * General category "Sc" in the Unicode specification.
342     * @since 1.1
343     */

344     public static final byte
345         CURRENCY_SYMBOL = 26;
346
347    /**
348     * General category "Sk" in the Unicode specification.
349     * @since 1.1
350     */

351     public static final byte
352         MODIFIER_SYMBOL = 27;
353
354    /**
355     * General category "So" in the Unicode specification.
356     * @since 1.1
357     */

358     public static final byte
359         OTHER_SYMBOL = 28;
360
361    /**
362     * General category "Pi" in the Unicode specification.
363     * @since 1.4
364     */

365     public static final byte
366         INITIAL_QUOTE_PUNCTUATION = 29;
367
368    /**
369     * General category "Pf" in the Unicode specification.
370     * @since 1.4
371     */

372     public static final byte
373         FINAL_QUOTE_PUNCTUATION = 30;
374
375     /**
376      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
377      */

378      static final int ERROR = 0xFFFFFFFF;
379
380
381     /**
382      * Undefined bidirectional character type. Undefined <code>char</code>
383      * values have undefined directionality in the Unicode specification.
384      * @since 1.4
385      */

386      public static final byte DIRECTIONALITY_UNDEFINED = -1;
387
388     /**
389      * Strong bidirectional character type "L" in the Unicode specification.
390      * @since 1.4
391      */

392     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
393
394     /**
395      * Strong bidirectional character type "R" in the Unicode specification.
396      * @since 1.4
397      */

398     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
399
400     /**
401     * Strong bidirectional character type "AL" in the Unicode specification.
402      * @since 1.4
403      */

404     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
405
406     /**
407      * Weak bidirectional character type "EN" in the Unicode specification.
408      * @since 1.4
409      */

410     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
411
412     /**
413      * Weak bidirectional character type "ES" in the Unicode specification.
414      * @since 1.4
415      */

416     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
417
418     /**
419      * Weak bidirectional character type "ET" in the Unicode specification.
420      * @since 1.4
421      */

422     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
423
424     /**
425      * Weak bidirectional character type "AN" in the Unicode specification.
426      * @since 1.4
427      */

428     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
429
430     /**
431      * Weak bidirectional character type "CS" in the Unicode specification.
432      * @since 1.4
433      */

434     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
435
436     /**
437      * Weak bidirectional character type "NSM" in the Unicode specification.
438      * @since 1.4
439      */

440     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
441
442     /**
443      * Weak bidirectional character type "BN" in the Unicode specification.
444      * @since 1.4
445      */

446     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
447
448     /**
449      * Neutral bidirectional character type "B" in the Unicode specification.
450      * @since 1.4
451      */

452     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
453
454     /**
455      * Neutral bidirectional character type "S" in the Unicode specification.
456      * @since 1.4
457      */

458     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
459
460     /**
461      * Neutral bidirectional character type "WS" in the Unicode specification.
462      * @since 1.4
463      */

464     public static final byte DIRECTIONALITY_WHITESPACE = 12;
465
466     /**
467      * Neutral bidirectional character type "ON" in the Unicode specification.
468      * @since 1.4
469      */

470     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
471
472     /**
473      * Strong bidirectional character type "LRE" in the Unicode specification.
474      * @since 1.4
475      */

476     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
477
478     /**
479      * Strong bidirectional character type "LRO" in the Unicode specification.
480      * @since 1.4
481      */

482     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
483
484     /**
485      * Strong bidirectional character type "RLE" in the Unicode specification.
486      * @since 1.4
487      */

488     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
489
490     /**
491      * Strong bidirectional character type "RLO" in the Unicode specification.
492      * @since 1.4
493      */

494     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
495
496     /**
497      * Weak bidirectional character type "PDF" in the Unicode specification.
498      * @since 1.4
499      */

500     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
501
502     /**
503      * The minimum value of a Unicode high-surrogate code unit in the
504      * UTF-16 encoding. A high-surrogate is also known as a
505      * <i>leading-surrogate</i>.
506      *
507      * @since 1.5
508      */

509     public static final char MIN_HIGH_SURROGATE = '\uD800';
510
511     /**
512      * The maximum value of a Unicode high-surrogate code unit in the
513      * UTF-16 encoding. A high-surrogate is also known as a
514      * <i>leading-surrogate</i>.
515      *
516      * @since 1.5
517      */

518     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
519
520     /**
521      * The minimum value of a Unicode low-surrogate code unit in the
522      * UTF-16 encoding. A low-surrogate is also known as a
523      * <i>trailing-surrogate</i>.
524      *
525      * @since 1.5
526      */

527     public static final char MIN_LOW_SURROGATE = '\uDC00';
528
529     /**
530      * The maximum value of a Unicode low-surrogate code unit in the
531      * UTF-16 encoding. A low-surrogate is also known as a
532      * <i>trailing-surrogate</i>.
533      *
534      * @since 1.5
535      */

536     public static final char MAX_LOW_SURROGATE = '\uDFFF';
537
538     /**
539      * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
540      *
541      * @since 1.5
542      */

543     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
544
545     /**
546      * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
547      *
548      * @since 1.5
549      */

550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
551
552     /**
553      * The minimum value of a supplementary code point.
554      *
555      * @since 1.5
556      */

557     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
558
559     /**
560      * The minimum value of a Unicode code point.
561      *
562      * @since 1.5
563      */

564     public static final int MIN_CODE_POINT = 0x000000;
565
566     /**
567      * The maximum value of a Unicode code point.
568      *
569      * @since 1.5
570      */

571     public static final int MAX_CODE_POINT = 0x10ffff;
572
573
574     /**
575      * Instances of this class represent particular subsets of the Unicode
576      * character set. The only family of subsets defined in the
577      * <code>Character</code> class is <code>{@link Character.UnicodeBlock
578      * UnicodeBlock}</code>. Other portions of the Java API may define other
579      * subsets for their own purposes.
580      *
581      * @since 1.2
582      */

583     public static class Subset {
584
585         private String JavaDoc name;
586
587         /**
588          * Constructs a new <code>Subset</code> instance.
589          *
590          * @exception NullPointerException if name is <code>null</code>
591          * @param name The name of this subset
592          */

593         protected Subset(String JavaDoc name) {
594             if (name == null) {
595                 throw new NullPointerException JavaDoc("name");
596             }
597             this.name = name;
598         }
599
600         /**
601          * Compares two <code>Subset</code> objects for equality.
602          * This method returns <code>true</code> if and only if
603          * <code>this</code> and the argument refer to the same
604          * object; since this method is <code>final</code>, this
605          * guarantee holds for all subclasses.
606          */

607         public final boolean equals(Object JavaDoc obj) {
608             return (this == obj);
609         }
610
611         /**
612          * Returns the standard hash code as defined by the
613          * <code>{@link Object#hashCode}</code> method. This method
614          * is <code>final</code> in order to ensure that the
615          * <code>equals</code> and <code>hashCode</code> methods will
616          * be consistent in all subclasses.
617          */

618         public final int hashCode() {
619             return super.hashCode();
620         }
621
622         /**
623          * Returns the name of this subset.
624          */

625         public final String JavaDoc toString() {
626             return name;
627         }
628     }
629
630     /**
631      * A family of character subsets representing the character blocks in the
632      * Unicode specification. Character blocks generally define characters
633      * used for a specific script or purpose. A character is contained by
634      * at most one Unicode block.
635      *
636      * @since 1.2
637      */

638     public static final class UnicodeBlock extends Subset {
639
640         private static Map JavaDoc map = new HashMap JavaDoc();
641
642         /**
643          * Create a UnicodeBlock with the given identifier name.
644          * This name must be the same as the block identifier.
645          */

646         private UnicodeBlock(String JavaDoc idName) {
647             super(idName);
648             map.put(idName.toUpperCase(Locale.US), this);
649         }
650
651         /**
652          * Create a UnicodeBlock with the given identifier name and
653          * alias name.
654          */

655         private UnicodeBlock(String JavaDoc idName, String JavaDoc alias) {
656             this(idName);
657             map.put(alias.toUpperCase(Locale.US), this);
658         }
659
660         /**
661          * Create a UnicodeBlock with the given identifier name and
662          * alias names.
663          */

664         private UnicodeBlock(String JavaDoc idName, String JavaDoc[] aliasName) {
665             this(idName);
666             if (aliasName != null) {
667                 for(int x=0; x<aliasName.length; ++x) {
668                     map.put(aliasName[x].toUpperCase(Locale.US), this);
669                 }
670             }
671         }
672
673         /**
674          * Constant for the "Basic Latin" Unicode character block.
675          * @since 1.2
676          */

677         public static final UnicodeBlock BASIC_LATIN =
678             new UnicodeBlock("BASIC_LATIN", new String JavaDoc[] {"Basic Latin", "BasicLatin" });
679
680         /**
681          * Constant for the "Latin-1 Supplement" Unicode character block.
682          * @since 1.2
683          */

684         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
685             new UnicodeBlock("LATIN_1_SUPPLEMENT", new String JavaDoc[]{ "Latin-1 Supplement", "Latin-1Supplement"});
686
687         /**
688          * Constant for the "Latin Extended-A" Unicode character block.
689          * @since 1.2
690          */

691         public static final UnicodeBlock LATIN_EXTENDED_A =
692             new UnicodeBlock("LATIN_EXTENDED_A", new String JavaDoc[]{ "Latin Extended-A", "LatinExtended-A"});
693
694         /**
695          * Constant for the "Latin Extended-B" Unicode character block.
696          * @since 1.2
697          */

698         public static final UnicodeBlock LATIN_EXTENDED_B =
699             new UnicodeBlock("LATIN_EXTENDED_B", new String JavaDoc[] {"Latin Extended-B", "LatinExtended-B"});
700
701         /**
702          * Constant for the "IPA Extensions" Unicode character block.
703          * @since 1.2
704          */

705         public static final UnicodeBlock IPA_EXTENSIONS =
706             new UnicodeBlock("IPA_EXTENSIONS", new String JavaDoc[] {"IPA Extensions", "IPAExtensions"});
707
708         /**
709          * Constant for the "Spacing Modifier Letters" Unicode character block.
710          * @since 1.2
711          */

712         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
713             new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String JavaDoc[] { "Spacing Modifier Letters",
714                                                                         "SpacingModifierLetters"});
715
716         /**
717          * Constant for the "Combining Diacritical Marks" Unicode character block.
718          * @since 1.2
719          */

720         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
721             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String JavaDoc[] {"Combining Diacritical Marks",
722                                                                           "CombiningDiacriticalMarks" });
723
724         /**
725          * Constant for the "Greek and Coptic" Unicode character block.
726          * <p>
727          * This block was previously known as the "Greek" block.
728          *
729          * @since 1.2
730          */

731         public static final UnicodeBlock GREEK
732             = new UnicodeBlock("GREEK", new String JavaDoc[] {"Greek and Coptic", "GreekandCoptic"});
733
734         /**
735          * Constant for the "Cyrillic" Unicode character block.
736          * @since 1.2
737          */

738         public static final UnicodeBlock CYRILLIC =
739             new UnicodeBlock("CYRILLIC");
740
741         /**
742          * Constant for the "Armenian" Unicode character block.
743          * @since 1.2
744          */

745         public static final UnicodeBlock ARMENIAN =
746             new UnicodeBlock("ARMENIAN");
747
748         /**
749          * Constant for the "Hebrew" Unicode character block.
750          * @since 1.2
751          */

752         public static final UnicodeBlock HEBREW =
753             new UnicodeBlock("HEBREW");
754
755         /**
756          * Constant for the "Arabic" Unicode character block.
757          * @since 1.2
758          */

759         public static final UnicodeBlock ARABIC =
760             new UnicodeBlock("ARABIC");
761
762         /**
763          * Constant for the "Devanagari" Unicode character block.
764          * @since 1.2
765          */

766         public static final UnicodeBlock DEVANAGARI =
767             new UnicodeBlock("DEVANAGARI");
768
769         /**
770          * Constant for the "Bengali" Unicode character block.
771          * @since 1.2
772          */

773         public static final UnicodeBlock BENGALI =
774             new UnicodeBlock("BENGALI");
775
776         /**
777          * Constant for the "Gurmukhi" Unicode character block.
778          * @since 1.2
779          */

780         public static final UnicodeBlock GURMUKHI =
781             new UnicodeBlock("GURMUKHI");
782
783         /**
784          * Constant for the "Gujarati" Unicode character block.
785          * @since 1.2
786          */

787         public static final UnicodeBlock GUJARATI =
788             new UnicodeBlock("GUJARATI");
789
790         /**
791          * Constant for the "Oriya" Unicode character block.
792          * @since 1.2
793          */

794         public static final UnicodeBlock ORIYA =
795             new UnicodeBlock("ORIYA");
796
797         /**
798          * Constant for the "Tamil" Unicode character block.
799          * @since 1.2
800          */

801         public static final UnicodeBlock TAMIL =
802             new UnicodeBlock("TAMIL");
803
804         /**
805          * Constant for the "Telugu" Unicode character block.
806          * @since 1.2
807          */

808         public static final UnicodeBlock TELUGU =
809             new UnicodeBlock("TELUGU");
810
811         /**
812          * Constant for the "Kannada" Unicode character block.
813          * @since 1.2
814          */

815         public static final UnicodeBlock KANNADA =
816             new UnicodeBlock("KANNADA");
817
818         /**
819          * Constant for the "Malayalam" Unicode character block.
820          * @since 1.2
821          */

822         public static final UnicodeBlock MALAYALAM =
823             new UnicodeBlock("MALAYALAM");
824
825         /**
826          * Constant for the "Thai" Unicode character block.
827          * @since 1.2
828          */

829         public static final UnicodeBlock THAI =
830             new UnicodeBlock("THAI");
831
832         /**
833          * Constant for the "Lao" Unicode character block.
834          * @since 1.2
835          */

836         public static final UnicodeBlock LAO =
837             new UnicodeBlock("LAO");
838
839         /**
840          * Constant for the "Tibetan" Unicode character block.
841          * @since 1.2
842          */

843         public static final UnicodeBlock TIBETAN =
844             new UnicodeBlock("TIBETAN");
845
846         /**
847          * Constant for the "Georgian" Unicode character block.
848          * @since 1.2
849          */

850         public static final UnicodeBlock GEORGIAN =
851             new UnicodeBlock("GEORGIAN");
852
853         /**
854          * Constant for the "Hangul Jamo" Unicode character block.
855          * @since 1.2
856          */

857         public static final UnicodeBlock HANGUL_JAMO =
858             new UnicodeBlock("HANGUL_JAMO", new String JavaDoc[] {"Hangul Jamo", "HangulJamo"});
859
860         /**
861          * Constant for the "Latin Extended Additional" Unicode character block.
862          * @since 1.2
863          */

864         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
865             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String JavaDoc[] {"Latin Extended Additional",
866                                                                         "LatinExtendedAdditional"});
867
868         /**
869          * Constant for the "Greek Extended" Unicode character block.
870          * @since 1.2
871          */

872         public static final UnicodeBlock GREEK_EXTENDED =
873             new UnicodeBlock("GREEK_EXTENDED", new String JavaDoc[] {"Greek Extended", "GreekExtended"});
874
875         /**
876          * Constant for the "General Punctuation" Unicode character block.
877          * @since 1.2
878          */

879         public static final UnicodeBlock GENERAL_PUNCTUATION =
880             new UnicodeBlock("GENERAL_PUNCTUATION", new String JavaDoc[] {"General Punctuation", "GeneralPunctuation"});
881
882         /**
883          * Constant for the "Superscripts and Subscripts" Unicode character block.
884          * @since 1.2
885          */

886         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
887             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String JavaDoc[] {"Superscripts and Subscripts",
888                                                                           "SuperscriptsandSubscripts" });
889
890         /**
891          * Constant for the "Currency Symbols" Unicode character block.
892          * @since 1.2
893          */

894         public static final UnicodeBlock CURRENCY_SYMBOLS =
895             new UnicodeBlock("CURRENCY_SYMBOLS", new String JavaDoc[] { "Currency Symbols", "CurrencySymbols"});
896
897         /**
898          * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
899          * <p>
900          * This block was previously known as "Combining Marks for Symbols".
901          * @since 1.2
902          */

903         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
904             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String JavaDoc[] {"Combining Diacritical Marks for Symbols",
905                                                                                                                                                   "CombiningDiacriticalMarksforSymbols",
906                                                                           "Combining Marks for Symbols",
907                                                                           "CombiningMarksforSymbols" });
908
909         /**
910          * Constant for the "Letterlike Symbols" Unicode character block.
911          * @since 1.2
912          */

913         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
914             new UnicodeBlock("LETTERLIKE_SYMBOLS", new String JavaDoc[] { "Letterlike Symbols", "LetterlikeSymbols"});
915
916         /**
917          * Constant for the "Number Forms" Unicode character block.
918          * @since 1.2
919          */

920         public static final UnicodeBlock NUMBER_FORMS =
921             new UnicodeBlock("NUMBER_FORMS", new String JavaDoc[] {"Number Forms", "NumberForms"});
922
923         /**
924          * Constant for the "Arrows" Unicode character block.
925          * @since 1.2
926          */

927         public static final UnicodeBlock ARROWS =
928             new UnicodeBlock("ARROWS");
929
930         /**
931          * Constant for the "Mathematical Operators" Unicode character block.
932          * @since 1.2
933          */

934         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
935             new UnicodeBlock("MATHEMATICAL_OPERATORS", new String JavaDoc[] {"Mathematical Operators",
936                                                                      "MathematicalOperators"});
937
938         /**
939          * Constant for the "Miscellaneous Technical" Unicode character block.
940          * @since 1.2
941          */

942  &n