1 8 9 package com.ibm.icu.lang; 10 11 import java.io.IOException ; 12 import java.lang.ref.SoftReference ; 13 import java.util.HashMap ; 14 import java.util.Locale ; 15 import java.util.Map ; 16 import java.util.MissingResourceException ; 17 18 import com.ibm.icu.impl.UBiDiProps; 19 import com.ibm.icu.impl.UCaseProps; 20 import com.ibm.icu.impl.NormalizerImpl; 21 import com.ibm.icu.impl.UCharacterUtility; 22 import com.ibm.icu.impl.UCharacterName; 23 import com.ibm.icu.impl.UCharacterNameChoice; 24 import com.ibm.icu.impl.UPropertyAliases; 25 import com.ibm.icu.lang.UCharacterEnums.*; 26 import com.ibm.icu.text.BreakIterator; 27 import com.ibm.icu.text.UTF16; 28 import com.ibm.icu.impl.UCharacterProperty; 29 import com.ibm.icu.util.RangeValueIterator; 30 import com.ibm.icu.util.ULocale; 31 import com.ibm.icu.util.ValueIterator; 32 import com.ibm.icu.util.VersionInfo; 33 34 158 159 public final class UCharacter implements ECharacterCategory, ECharacterDirection 160 { 161 163 170 public static final class UnicodeBlock extends Character.Subset 171 { 172 174 177 public static final UnicodeBlock NO_BLOCK 178 = new UnicodeBlock("NO_BLOCK", 0); 179 180 183 public static final UnicodeBlock BASIC_LATIN 184 = new UnicodeBlock("BASIC_LATIN", 1); 185 188 public static final UnicodeBlock LATIN_1_SUPPLEMENT 189 = new UnicodeBlock("LATIN_1_SUPPLEMENT", 2); 190 193 public static final UnicodeBlock LATIN_EXTENDED_A 194 = new UnicodeBlock("LATIN_EXTENDED_A", 3); 195 198 public static final UnicodeBlock LATIN_EXTENDED_B 199 = new UnicodeBlock("LATIN_EXTENDED_B", 4); 200 203 public static final UnicodeBlock IPA_EXTENSIONS 204 = new UnicodeBlock("IPA_EXTENSIONS", 5); 205 208 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 209 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 6); 210 213 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 214 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 7); 215 219 public static final UnicodeBlock GREEK 220 = new UnicodeBlock("GREEK", 8); 221 224 public static final UnicodeBlock CYRILLIC 225 = new UnicodeBlock("CYRILLIC", 9); 226 229 public static final UnicodeBlock ARMENIAN 230 = new UnicodeBlock("ARMENIAN", 10); 231 234 public static final UnicodeBlock HEBREW 235 = new UnicodeBlock("HEBREW", 11); 236 239 public static final UnicodeBlock ARABIC 240 = new UnicodeBlock("ARABIC", 12); 241 244 public static final UnicodeBlock SYRIAC 245 = new UnicodeBlock("SYRIAC", 13); 246 249 public static final UnicodeBlock THAANA 250 = new UnicodeBlock("THAANA", 14); 251 254 public static final UnicodeBlock DEVANAGARI 255 = new UnicodeBlock("DEVANAGARI", 15); 256 259 public static final UnicodeBlock BENGALI 260 = new UnicodeBlock("BENGALI", 16); 261 264 public static final UnicodeBlock GURMUKHI 265 = new UnicodeBlock("GURMUKHI", 17); 266 269 public static final UnicodeBlock GUJARATI 270 = new UnicodeBlock("GUJARATI", 18); 271 274 public static final UnicodeBlock ORIYA 275 = new UnicodeBlock("ORIYA", 19); 276 279 public static final UnicodeBlock TAMIL 280 = new UnicodeBlock("TAMIL", 20); 281 284 public static final UnicodeBlock TELUGU 285 = new UnicodeBlock("TELUGU", 21); 286 289 public static final UnicodeBlock KANNADA 290 = new UnicodeBlock("KANNADA", 22); 291 294 public static final UnicodeBlock MALAYALAM 295 = new UnicodeBlock("MALAYALAM", 23); 296 299 public static final UnicodeBlock SINHALA 300 = new UnicodeBlock("SINHALA", 24); 301 304 public static final UnicodeBlock THAI 305 = new UnicodeBlock("THAI", 25); 306 309 public static final UnicodeBlock LAO 310 = new UnicodeBlock("LAO", 26); 311 314 public static final UnicodeBlock TIBETAN 315 = new UnicodeBlock("TIBETAN", 27); 316 319 public static final UnicodeBlock MYANMAR 320 = new UnicodeBlock("MYANMAR", 28); 321 324 public static final UnicodeBlock GEORGIAN 325 = new UnicodeBlock("GEORGIAN", 29); 326 329 public static final UnicodeBlock HANGUL_JAMO 330 = new UnicodeBlock("HANGUL_JAMO", 30); 331 334 public static final UnicodeBlock ETHIOPIC 335 = new UnicodeBlock("ETHIOPIC", 31); 336 339 public static final UnicodeBlock CHEROKEE 340 = new UnicodeBlock("CHEROKEE", 32); 341 344 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 345 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 33); 346 349 public static final UnicodeBlock OGHAM 350 = new UnicodeBlock("OGHAM", 34); 351 354 public static final UnicodeBlock RUNIC 355 = new UnicodeBlock("RUNIC", 35); 356 359 public static final UnicodeBlock KHMER 360 = new UnicodeBlock("KHMER", 36); 361 364 public static final UnicodeBlock MONGOLIAN 365 = new UnicodeBlock("MONGOLIAN", 37); 366 369 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 370 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 38); 371 374 public static final UnicodeBlock GREEK_EXTENDED 375 = new UnicodeBlock("GREEK_EXTENDED", 39); 376 379 public static final UnicodeBlock GENERAL_PUNCTUATION 380 = new UnicodeBlock("GENERAL_PUNCTUATION", 40); 381 384 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 385 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 41); 386 389 public static final UnicodeBlock CURRENCY_SYMBOLS 390 = new UnicodeBlock("CURRENCY_SYMBOLS", 42); 391 396 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 397 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 43); 398 401 public static final UnicodeBlock LETTERLIKE_SYMBOLS 402 = new UnicodeBlock("LETTERLIKE_SYMBOLS", 44); 403 406 public static final UnicodeBlock NUMBER_FORMS 407 = new UnicodeBlock("NUMBER_FORMS", 45); 408 411 public static final UnicodeBlock ARROWS 412 = new UnicodeBlock("ARROWS", 46); 413 416 public static final UnicodeBlock MATHEMATICAL_OPERATORS 417 = new UnicodeBlock("MATHEMATICAL_OPERATORS", 47); 418 421 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 422 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 48); 423 426 public static final UnicodeBlock CONTROL_PICTURES 427 = new UnicodeBlock("CONTROL_PICTURES", 49); 428 431 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 432 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 50); 433 436 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 437 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 51); 438 441 public static final UnicodeBlock BOX_DRAWING 442 = new UnicodeBlock("BOX_DRAWING", 52); 443 446 public static final UnicodeBlock BLOCK_ELEMENTS 447 = new UnicodeBlock("BLOCK_ELEMENTS", 53); 448 451 public static final UnicodeBlock GEOMETRIC_SHAPES 452 = new UnicodeBlock("GEOMETRIC_SHAPES", 54); 453 456 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 457 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 55); 458 461 public static final UnicodeBlock DINGBATS 462 = new UnicodeBlock("DINGBATS", 56); 463 466 public static final UnicodeBlock BRAILLE_PATTERNS 467 = new UnicodeBlock("BRAILLE_PATTERNS", 57); 468 471 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 472 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 58); 473 476 public static final UnicodeBlock KANGXI_RADICALS 477 = new UnicodeBlock("KANGXI_RADICALS", 59); 478 481 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 482 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 60); 483 486 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 487 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 61); 488 491 public static final UnicodeBlock HIRAGANA 492 = new UnicodeBlock("HIRAGANA", 62); 493 496 public static final UnicodeBlock KATAKANA 497 = new UnicodeBlock("KATAKANA", 63); 498 501 public static final UnicodeBlock BOPOMOFO 502 = new UnicodeBlock("BOPOMOFO", 64); 503 506 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 507 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 65); 508 511 public static final UnicodeBlock KANBUN 512 = new UnicodeBlock("KANBUN", 66); 513 516 public static final UnicodeBlock BOPOMOFO_EXTENDED 517 = new UnicodeBlock("BOPOMOFO_EXTENDED", 67); 518 521 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 522 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 68); 523 526 public static final UnicodeBlock CJK_COMPATIBILITY 527 = new UnicodeBlock("CJK_COMPATIBILITY", 69); 528 531 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 532 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 70); 533 536 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 537 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 71); 538 541 public static final UnicodeBlock YI_SYLLABLES 542 = new UnicodeBlock("YI_SYLLABLES", 72); 543 546 public static final UnicodeBlock YI_RADICALS 547 = new UnicodeBlock("YI_RADICALS", 73); 548 551 public static final UnicodeBlock HANGUL_SYLLABLES 552 = new UnicodeBlock("HANGUL_SYLLABLES", 74); 553 556 public static final UnicodeBlock HIGH_SURROGATES 557 = new UnicodeBlock("HIGH_SURROGATES", 75); 558 561 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 562 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 76); 563 566 public static final UnicodeBlock LOW_SURROGATES 567 = new UnicodeBlock("LOW_SURROGATES", 77); 568 576 public static final UnicodeBlock PRIVATE_USE_AREA 577 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 578 586 public static final UnicodeBlock PRIVATE_USE 587 = PRIVATE_USE_AREA; 588 591 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 592 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 79); 593 596 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 597 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 80); 598 601 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 602 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 81); 603 606 public static final UnicodeBlock COMBINING_HALF_MARKS 607 = new UnicodeBlock("COMBINING_HALF_MARKS", 82); 608 611 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 612 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 83); 613 616 public static final UnicodeBlock SMALL_FORM_VARIANTS 617 = new UnicodeBlock("SMALL_FORM_VARIANTS", 84); 618 621 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 622 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 85); 623 626 public static final UnicodeBlock SPECIALS 627 = new UnicodeBlock("SPECIALS", 86); 628 631 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 632 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 87); 633 636 public static final UnicodeBlock OLD_ITALIC 637 = new UnicodeBlock("OLD_ITALIC", 88); 638 641 public static final UnicodeBlock GOTHIC 642 = new UnicodeBlock("GOTHIC", 89); 643 646 public static final UnicodeBlock DESERET 647 = new UnicodeBlock("DESERET", 90); 648 651 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 652 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 91); 653 656 public static final UnicodeBlock MUSICAL_SYMBOLS 657 = new UnicodeBlock("MUSICAL_SYMBOLS", 92); 658 661 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 662 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 93); 663 666 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 667 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 94); 668 671 public static final UnicodeBlock 672 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 673 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 95); 674 677 public static final UnicodeBlock TAGS 678 = new UnicodeBlock("TAGS", 96); 679 680 682 686 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 687 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 97); 688 692 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 693 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", 97); 694 697 public static final UnicodeBlock TAGALOG 698 = new UnicodeBlock("TAGALOG", 98); 699 702 public static final UnicodeBlock HANUNOO 703 = new UnicodeBlock("HANUNOO", 99); 704 707 public static final UnicodeBlock BUHID 708 = new UnicodeBlock("BUHID", 100); 709 712 public static final UnicodeBlock TAGBANWA 713 = new UnicodeBlock("TAGBANWA", 101); 714 717 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 718 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 102); 719 722 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 723 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 103); 724 727 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 728 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 104); 729 732 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 733 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 105); 734 737 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 738 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 106); 739 742 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 743 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 107); 744 747 public static final UnicodeBlock VARIATION_SELECTORS 748 = new UnicodeBlock("VARIATION_SELECTORS", 108); 749 752 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 753 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 109); 754 757 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 758 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 110); 759 760 763 public static final UnicodeBlock LIMBU 764 = new UnicodeBlock("LIMBU", 111); 765 768 public static final UnicodeBlock TAI_LE 769 = new UnicodeBlock("TAI LE", 112); 770 773 public static final UnicodeBlock KHMER_SYMBOLS 774 = new UnicodeBlock("KHMER SYMBOLS", 113); 775 776 779 public static final UnicodeBlock PHONETIC_EXTENSIONS 780 = new UnicodeBlock("PHONETIC EXTENSIONS", 114); 781 782 785 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 786 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 115); 787 790 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 791 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 116); 792 795 public static final UnicodeBlock LINEAR_B_SYLLABARY 796 = new UnicodeBlock("LINEAR_B_SYLLABARY", 117); 797 800 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 801 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 118); 802 805 public static final UnicodeBlock AEGEAN_NUMBERS 806 = new UnicodeBlock("AEGEAN_NUMBERS", 119); 807 810 public static final UnicodeBlock UGARITIC 811 = new UnicodeBlock("UGARITIC", 120); 812 815 public static final UnicodeBlock SHAVIAN 816 = new UnicodeBlock("SHAVIAN", 121); 817 820 public static final UnicodeBlock OSMANYA 821 = new UnicodeBlock("OSMANYA", 122); 822 825 public static final UnicodeBlock CYPRIOT_SYLLABARY 826 = new UnicodeBlock("CYPRIOT_SYLLABARY", 123); 827 830 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 831 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 124); 832 833 836 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 837 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 125); 838 839 840 841 845 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 126); 846 847 851 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 127); 852 853 857 public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT", 128); 858 859 863 public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE", 129); 864 865 869 public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES", 130); 870 871 875 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 131); 876 877 881 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", 132); 882 883 887 public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED", 133); 888 889 893 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 134); 894 895 899 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT", 135); 900 901 905 public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC", 136); 906 907 911 public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI", 137); 912 913 917 public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS", 138); 918 919 923 public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE", 139); 924 925 929 public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN", 140); 930 931 935 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 141); 936 937 941 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 142); 942 943 947 public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI", 143); 948 949 953 public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH", 144); 954 955 959 public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS", 145); 960 961 965 public static final UnicodeBlock NKO = new UnicodeBlock( "NKO", 146); 966 970 public static final UnicodeBlock BALINESE = new UnicodeBlock( "BALINESE", 147); 971 975 public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock( "LATIN_EXTENDED_C", 148); 976 980 public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock( "LATIN_EXTENDED_D", 149); 981 985 public static final UnicodeBlock PHAGS_PA = new UnicodeBlock( "PHAGS_PA", 150); 986 990 public static final UnicodeBlock PHOENICIAN = new UnicodeBlock( "PHOENICIAN", 151); 991 995 public static final UnicodeBlock CUNEIFORM = new UnicodeBlock( "CUNEIFORM", 152); 996 1000 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock( "CUNEIFORM_NUMBERS_AND_PUNCTUATION", 153); 1001 1005 public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock( "COUNTING_ROD_NUMERALS", 154); 1006 1007 1010 public static final UnicodeBlock INVALID_CODE 1011 = new UnicodeBlock("INVALID_CODE", -1); 1012 1013 1015 1018 public static final int INVALID_CODE_ID = -1; 1019 1022 public static final int BASIC_LATIN_ID = 1; 1023 1026 public static final int LATIN_1_SUPPLEMENT_ID = 2; 1027 1030 public static final int LATIN_EXTENDED_A_ID = 3; 1031 1034 public static final int LATIN_EXTENDED_B_ID = 4; 1035 1038 public static final int IPA_EXTENSIONS_ID = 5; 1039 1042 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 1043 1046 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 1047 1051 public static final int GREEK_ID = 8; 1052 1055 public static final int CYRILLIC_ID = 9; 1056 1059 public static final int ARMENIAN_ID = 10; 1060 1063 public static final int HEBREW_ID = 11; 1064 1067 public static final int ARABIC_ID = 12; 1068 1071 public static final int SYRIAC_ID = 13; 1072 1075 public static final int THAANA_ID = 14; 1076 1079 public static final int DEVANAGARI_ID = 15; 1080 1083 public static final int BENGALI_ID = 16; 1084 1087 public static final int GURMUKHI_ID = 17; 1088 1091 public static final int GUJARATI_ID = 18; 1092 1095 public static final int ORIYA_ID = 19; 1096 1099 public static final int TAMIL_ID = 20; 1100 1103 public static final int TELUGU_ID = 21; 1104 1107 public static final int KANNADA_ID = 22; 1108 1111 public static final int MALAYALAM_ID = 23; 1112 1115 public static final int SINHALA_ID = 24; 1116 1119 public static final int THAI_ID = 25; 1120 1123 public static final int LAO_ID = 26; 1124 1127 public static final int TIBETAN_ID = 27; 1128 1131 public static final int MYANMAR_ID = 28; 1132 1135 public static final int GEORGIAN_ID = 29; 1136 1139 public static final int HANGUL_JAMO_ID = 30; 1140 1143 public static final int ETHIOPIC_ID = 31; 1144 1147 public static final int CHEROKEE_ID = 32; 1148 1151 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 1152 1155 public static final int OGHAM_ID = 34; 1156 1159 public static final int RUNIC_ID = 35; 1160 1163 public static final int KHMER_ID = 36; 1164 1167 public static final int MONGOLIAN_ID = 37; 1168 1171 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 1172 1175 public static final int GREEK_EXTENDED_ID = 39; 1176 1179 public static final int GENERAL_PUNCTUATION_ID = 40; 1180 1183 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 1184 1187 public static final int CURRENCY_SYMBOLS_ID = 42; 1188 1193 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 1194 1197 public static final int LETTERLIKE_SYMBOLS_ID = 44; 1198 1201 public static final int NUMBER_FORMS_ID = 45; 1202 1205 public static final int ARROWS_ID = 46; 1206 1209 public static final int MATHEMATICAL_OPERATORS_ID = 47; 1210 1213 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 1214 1217 public static final int CONTROL_PICTURES_ID = 49; 1218 1221 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 1222 1225 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 1226 1229 public static final int BOX_DRAWING_ID = 52; 1230 1233 public static final int BLOCK_ELEMENTS_ID = 53; 1234 1237 public static final int GEOMETRIC_SHAPES_ID = 54; 1238 1241 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 1242 1245 public static final int DINGBATS_ID = 56; 1246 1249 public static final int BRAILLE_PATTERNS_ID = 57; 1250 1253 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 1254 1257 public static final int KANGXI_RADICALS_ID = 59; 1258 1261 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 1262 1265 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 1266 1269 public static final int HIRAGANA_ID = 62; 1270 1273 public static final int KATAKANA_ID = 63; 1274 1277 public static final int BOPOMOFO_ID = 64; 1278 1281 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 1282 1285 public static final int KANBUN_ID = 66; 1286 1289 public static final int BOPOMOFO_EXTENDED_ID = 67; 1290 1293 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 1294 1297 public static final int CJK_COMPATIBILITY_ID = 69; 1298 1301 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 1302 1305 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 1306 1309 public static final int YI_SYLLABLES_ID = 72; 1310 1313 public static final int YI_RADICALS_ID = 73; 1314 1317 public static final int HANGUL_SYLLABLES_ID = 74; 1318 1321 public static final int HIGH_SURROGATES_ID = 75; 1322 1325 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 1326 1329 public static final int LOW_SURROGATES_ID = 77; 1330 1338 public static final int PRIVATE_USE_AREA_ID = 78; 1339 1347 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 1348 1351 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 1352 1355 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 1356 1359 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 1360 1363 public static final int COMBINING_HALF_MARKS_ID = 82; 1364 1367 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 1368 1371 public static final int SMALL_FORM_VARIANTS_ID = 84; 1372 1375 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 1376 1379 public static final int SPECIALS_ID = 86; 1380 1383 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 1384 1387 public static final int OLD_ITALIC_ID = 88; 1388 1391 public static final int GOTHIC_ID = 89; 1392 1395 public static final int DESERET_ID = 90; 1396 1399 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 1400 1403 public static final int MUSICAL_SYMBOLS_ID = 92; 1404 1407 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 1408 1411 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 1412 1415 public static final int 1416 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 1417 1420 public static final int TAGS_ID = 96; 1421 1422 1424 1428 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 1429 1433 1434 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 1435 1438 public static final int TAGALOG_ID = 98; 1439 1442 public static final int HANUNOO_ID = 99; 1443 1446 public static final int BUHID_ID = 100; 1447 1450 public static final int TAGBANWA_ID = 101; 1451 1454 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 1455 1458 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 1459 1462 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 1463 1466 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 1467 1470 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 1471 1474 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 1475 1478 public static final int VARIATION_SELECTORS_ID = 108; 1479 1482 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 1483 1486 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 1487 1488 1491 public static final int LIMBU_ID = 111; 1492 1495 public static final int TAI_LE_ID = 112; 1496 1499 public static final int KHMER_SYMBOLS_ID = 113; 1500 1503 public static final int PHONETIC_EXTENSIONS_ID = 114; 1504 1507 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; 1508 1511 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; 1512 1515 public static final int LINEAR_B_SYLLABARY_ID = 117; 1516 1519 public static final int LINEAR_B_IDEOGRAMS_ID = 118; 1520 1523 public static final int AEGEAN_NUMBERS_ID = 119; 1524 1527 public static final int UGARITIC_ID = 120; 1528 1531 public static final int SHAVIAN_ID = 121; 1532 1535 public static final int OSMANYA_ID = 122; 1536 1539 public static final int CYPRIOT_SYLLABARY_ID = 123; 1540 1543 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; 1544 1547 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; 1548 1549 1550 1551 1555 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; 1556 1557 1561 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; 1562 1563 1567 public static final int ARABIC_SUPPLEMENT_ID = 128; 1568 1569 1573 public static final int BUGINESE_ID = 129; 1574 1575 1579 public static final int CJK_STROKES_ID = 130; 1580 1581 1585 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; 1586 1587 1591 public static final int COPTIC_ID = 132; 1592 1593 1597 public static final int ETHIOPIC_EXTENDED_ID = 133; 1598 1599 1603 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; 1604 1605 1609 public static final int GEORGIAN_SUPPLEMENT_ID = 135; 1610 1611 1615 public static final int GLAGOLITIC_ID = 136; 1616 1617 1621 public static final int KHAROSHTHI_ID = 137; 1622 1623 1627 public static final int MODIFIER_TONE_LETTERS_ID = 138; 1628 1629 1633 public static final int NEW_TAI_LUE_ID = 139; 1634 1635 1639 public static final int OLD_PERSIAN_ID = 140; 1640 1641 1645 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; 1646 1647 1651 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; 1652 1653 1657 public static final int SYLOTI_NAGRI_ID = 143; 1658 1659 1663 public static final int TIFINAGH_ID = 144; 1664 1665 1669 public static final int VERTICAL_FORMS_ID = 145; 1670 1671 1672 1673 1677 public static final int NKO_ID = 146; 1678 1682 public static final int BALINESE_ID = 147; 1683 1687 public static final int LATIN_EXTENDED_C_ID = 148; 1688 1692 public static final int LATIN_EXTENDED_D_ID = 149; 1693 1697 public static final int PHAGS_PA_ID = 150; 1698 1702 public static final int PHOENICIAN_ID = 151; 1703 1707 public static final int CUNEIFORM_ID = 152; 1708 1712 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; 1713 1717 public static final int COUNTING_ROD_NUMERALS_ID = 154; 1718 1719 1722 public static final int COUNT = 155; 1723 1724 1726 1735 public static UnicodeBlock getInstance(int id) 1736 { 1737 if (id >= 0 && id < BLOCKS_.length) { 1738 return BLOCKS_[id]; 1739 } 1740 return INVALID_CODE; 1741 } 1742 1743 1750 public static UnicodeBlock of(int ch) 1751 { 1752 if (ch > MAX_VALUE) { 1753 return INVALID_CODE; 1754 } 1755 1756 return UnicodeBlock.getInstance((PROPERTY_.getAdditional(ch, 0) 1757 & BLOCK_MASK_) >> BLOCK_SHIFT_); 1758 } 1759 1760 1767 static int idOf(int ch) { 1768 if (ch < 0 || ch > MAX_VALUE) { 1769 return -1; 1770 } 1771 1772 return (PROPERTY_.getAdditional(ch, 0) & BLOCK_MASK_) >> BLOCK_SHIFT_; 1773 } 1774 1775 1785 public static final UnicodeBlock forName(String blockName) { 1786 Map m = null; 1787 if (mref != null) { 1788 m = (Map )mref.get(); 1789 } 1790 if (m == null) { 1791 m = new HashMap (BLOCKS_.length); 1792 for (int i = 0; i < BLOCKS_.length; ++i) { 1793 UnicodeBlock b = BLOCKS_[i]; 1794 String name = getPropertyValueName(UProperty.BLOCK, b.getID(), UProperty.NameChoice.LONG); 1795 m.put(name.toUpperCase(), b); 1796 m.put(name.replace('_',' ').toUpperCase(), b); 1797 m.put(b.toString().toUpperCase(), b); 1798 } 1799 mref = new SoftReference (m); 1800 } 1801 UnicodeBlock b = (UnicodeBlock)m.get(blockName.toUpperCase()); 1802 if (b == null) { 1803 throw new IllegalArgumentException (); 1804 } 1805 return b; 1806 } 1807 private static SoftReference mref; 1808 1809 1814 public int getID() 1815 { 1816 return m_id_; 1817 } 1818 1819 1821 1824 private final static UnicodeBlock BLOCKS_[] = { 1825 NO_BLOCK, BASIC_LATIN, 1826 LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A, 1827 LATIN_EXTENDED_B, IPA_EXTENSIONS, 1828 SPACING_MODIFIER_LETTERS, COMBINING_DIACRITICAL_MARKS, 1829 GREEK, CYRILLIC, 1830 ARMENIAN, HEBREW, 1831 ARABIC, SYRIAC, 1832 THAANA, DEVANAGARI, 1833 BENGALI, GURMUKHI, 1834 GUJARATI, ORIYA, 1835 TAMIL, TELUGU, 1836 KANNADA, MALAYALAM, 1837 SINHALA, THAI, 1838 LAO, TIBETAN, 1839 MYANMAR, GEORGIAN, 1840 HANGUL_JAMO, ETHIOPIC, 1841 CHEROKEE, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 1842 OGHAM, RUNIC, 1843 KHMER, MONGOLIAN, 1844 LATIN_EXTENDED_ADDITIONAL, GREEK_EXTENDED, 1845 GENERAL_PUNCTUATION, SUPERSCRIPTS_AND_SUBSCRIPTS, 1846 CURRENCY_SYMBOLS, COMBINING_MARKS_FOR_SYMBOLS, 1847 LETTERLIKE_SYMBOLS, NUMBER_FORMS, 1848 ARROWS, MATHEMATICAL_OPERATORS, 1849 MISCELLANEOUS_TECHNICAL, CONTROL_PICTURES, 1850 OPTICAL_CHARACTER_RECOGNITION, ENCLOSED_ALPHANUMERICS, 1851 BOX_DRAWING, BLOCK_ELEMENTS, 1852 GEOMETRIC_SHAPES, MISCELLANEOUS_SYMBOLS, 1853 DINGBATS, BRAILLE_PATTERNS, 1854 CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS, 1855 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, CJK_SYMBOLS_AND_PUNCTUATION, 1856 HIRAGANA, KATAKANA, 1857 BOPOMOFO, HANGUL_COMPATIBILITY_JAMO, 1858 KANBUN, BOPOMOFO_EXTENDED, 1859 ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY, 1860 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, CJK_UNIFIED_IDEOGRAPHS, 1861 YI_SYLLABLES, YI_RADICALS, 1862 HANGUL_SYLLABLES, HIGH_SURROGATES, 1863 HIGH_PRIVATE_USE_SURROGATES, LOW_SURROGATES, 1864 PRIVATE_USE_AREA, CJK_COMPATIBILITY_IDEOGRAPHS, 1865 ALPHABETIC_PRESENTATION_FORMS, ARABIC_PRESENTATION_FORMS_A, 1866 COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS, 1867 SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B, 1868 SPECIALS, HALFWIDTH_AND_FULLWIDTH_FORMS, 1869 OLD_ITALIC, GOTHIC, 1870 DESERET, BYZANTINE_MUSICAL_SYMBOLS, 1871 MUSICAL_SYMBOLS, MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 1872 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 1873 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 1874 TAGS, CYRILLIC_SUPPLEMENT, 1875 TAGALOG, HANUNOO, 1876 BUHID, TAGBANWA, 1877 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, SUPPLEMENTAL_ARROWS_A, 1878 SUPPLEMENTAL_ARROWS_B, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 1879 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 1880 KATAKANA_PHONETIC_EXTENSIONS, 1881 VARIATION_SELECTORS, SUPPLEMENTARY_PRIVATE_USE_AREA_A, 1882 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 1883 LIMBU, TAI_LE, KHMER_SYMBOLS, PHONETIC_EXTENSIONS, 1884 MISCELLANEOUS_SYMBOLS_AND_ARROWS, YIJING_HEXAGRAM_SYMBOLS, 1885 LINEAR_B_SYLLABARY, LINEAR_B_IDEOGRAMS, AEGEAN_NUMBERS, 1886 UGARITIC, SHAVIAN, OSMANYA, CYPRIOT_SYLLABARY, 1887 TAI_XUAN_JING_SYMBOLS, VARIATION_SELECTORS_SUPPLEMENT, 1888 1889 1890 ANCIENT_GREEK_MUSICAL_NOTATION, 1891 ANCIENT_GREEK_NUMBERS, 1892 ARABIC_SUPPLEMENT, 1893 BUGINESE, 1894 CJK_STROKES, 1895 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 1896 COPTIC, 1897 ETHIOPIC_EXTENDED, 1898 ETHIOPIC_SUPPLEMENT, 1899 GEORGIAN_SUPPLEMENT, 1900 GLAGOLITIC, 1901 KHAROSHTHI, 1902 MODIFIER_TONE_LETTERS, 1903 NEW_TAI_LUE, 1904 OLD_PERSIAN, 1905 PHONETIC_EXTENSIONS_SUPPLEMENT, 1906 SUPPLEMENTAL_PUNCTUATION, 1907 SYLOTI_NAGRI, 1908 TIFINAGH, 1909 VERTICAL_FORMS, 1910 NKO, 1911 BALINESE, 1912 LATIN_EXTENDED_C, 1913 LATIN_EXTENDED_D, 1914 PHAGS_PA, 1915 PHOENICIAN, 1916 CUNEIFORM, 1917 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 1918 COUNTING_ROD_NUMERALS 1919 }; 1920 1921 static { 1922 if (COUNT!=BLOCKS_.length) { 1923 throw new java.lang.IllegalStateException ("UnicodeBlock fields are inconsistent!"); 1924 } 1925 } 1926 1929 private int m_id_; 1930 1931 1933 1939 private UnicodeBlock(String name, int id) 1940 { 1941 super(name); 1942 m_id_ = id; 1943 } 1944 } 1945 1946 1952 public static interface EastAsianWidth 1953 { 1954 1957 public static final int NEUTRAL = 0; 1958 1961 public static final int AMBIGUOUS = 1; 1962 1965 public static final int HALFWIDTH = 2; 1966 1969 public static final int FULLWIDTH = 3; 1970 1973 public static final int NARROW = 4; 1974 1977 public static final int WIDE = 5; 1978 1981 public static final int COUNT = 6; 1982 } 1983 1984 1989 public static interface DecompositionType 1990 { 1991 1994 public static final int NONE = 0; 1995 1998 public static final int CANONICAL = 1; 1999 2002 public static final int COMPAT = 2; 2003 2006 public static final int CIRCLE = 3; 2007 2010 public static final int FINAL = 4; 2011 2014 public static final int FONT = 5; 2015 2018 public static final int FRACTION = 6; 2019 2022 public static final int INITIAL = 7; 2023 2026 public static final int ISOLATED = 8; 2027 2030 public static final int MEDIAL = 9; 2031 2034 public static final int NARROW = 10; 2035 2038 public static final int NOBREAK = 11; 2039 2042 public static final int SMALL = 12; 2043 2046 public static final int SQUARE = 13; 2047 2050 public static final int SUB = 14; 2051 2054 public static final int SUPER = 15; 2055 2058 public static final int VERTICAL = 16; 2059 2062 public static final int WIDE = 17; 2063 2066 public static final int COUNT = 18; 2067 } 2068 2069 2074 public static interface JoiningType 2075 { 2076 2079 public static final int NON_JOINING = 0; 2080 2083 public static final int JOIN_CAUSING = 1; 2084 2087 public static final int DUAL_JOINING = 2; 2088 2091 public static final int LEFT_JOINING = 3; 2092 2095 public static final int RIGHT_JOINING = 4; 2096 2099 public static final int TRANSPARENT = 5; 2100 2103 public static final int COUNT = 6; 2104 } 2105 2106 2111 public static interface JoiningGroup 2112 { 2113 2116 public static final int NO_JOINING_GROUP = 0; 2117 2120 public static final int AIN = 1; 2121 2124 public static final int ALAPH = 2; 2125 2128 public static final int ALEF = 3; 2129 2132 public static final int BEH = 4; 2133 2136 public static final int BETH = 5; 2137 2140 public static final int DAL = 6; 2141 2144 public static final int DALATH_RISH = 7; 2145 2148 public static final int E = 8; 2149 2152 public static final int FEH = 9; 2153 2156 public static final int FINAL_SEMKATH = 10; 2157 2160 public static final int GAF = 11; 2161 2164 public static final int GAMAL = 12; 2165 2168 public static final int HAH = 13; 2169 2172 public static final int HAMZA_ON_HEH_GOAL = 14; 2173 2176 public static final int HE = 15; 2177 2180 public static final int HEH = 16; 2181 2184 public static final int HEH_GOAL = 17; 2185 2188 public static final int HETH = 18; 2189 2192 public static final int KAF = 19; 2193 2196 public static final int KAPH = 20; 2197 2200 public static final int KNOTTED_HEH = 21; 2201 2204 public static final int LAM = 22; 2205 2208 public static final int LAMADH = 23; 2209 2212 public static final int MEEM = 24; 2213 2216 public static final int MIM = 25; 2217 2220 public static final int NOON = 26; 2221 2224 public static final int NUN = 27; 2225 2228 public static final int PE = 28; 2229 2232 public static final int QAF = 29; 2233 2236 public static final int QAPH = 30; 2237 2240 public static final int REH = 31; 2241 2244 public static final int REVERSED_PE = 32; 2245 2248 public static final int SAD = 33; 2249 2252 public static final int SADHE = 34; 2253 2256 public static final int SEEN = 35; 2257 2260 public static final int SEMKATH = 36; 2261 2264 public static final int SHIN = 37; 2265 2268 public static final int SWASH_KAF = 38; 2269 2272 public static final int SYRIAC_WAW = 39; 2273 2276 public static final int TAH = 40; 2277 2280 public static final int TAW = 41; 2281 2284 public static final int TEH_MARBUTA = 42; 2285 2288 public static final int TETH = 43; 2289 2292 public static final int WAW = 44; 2293 2296 public static final int YEH = 45; 2297 2300 public static final int YEH_BARREE = 46; 2301 2304 public static final int YEH_WITH_TAIL = 47; 2305 2308 public static final int YUDH = 48; 2309 2312 public static final int YUDH_HE = 49; 2313 2316 public static final int ZAIN = 50; 2317 2320 public static final int FE = 51; 2321 2324 public static final int KHAPH = 52; 2325 2328 public static final int ZHAIN =53; 2329 2332 public static final int COUNT = 54; 2333 } 2334 2335 2341 public static interface GraphemeClusterBreak { 2342 2346 public static final int OTHER = 0; 2347 2351 public static final int CONTROL = 1; 2352 2356 public static final int CR = 2; 2357 2361 public static final int EXTEND = 3; 2362 2366 public static final int L = 4; 2367 2371 public static final int LF = 5; 2372 2376 public static final int LV = 6; 2377 2381 public static final int LVT = 7; 2382 2386 public static final int T = 8; 2387 2391 public static final int V = 9; 2392 2396 public static final int COUNT = 10; 2397 } 2398 2399 2405 public static interface WordBreak { 2406 2410 public static final int OTHER = 0; 2411 2415 public static final int ALETTER = 1; 2416 2420 public static final int FORMAT = 2; 2421 2425 public static final int KATAKANA = 3; 2426 2430 public static final int MIDLETTER = 4; 2431 2435 public static final int MIDNUM = 5; 2436 2440 public static final int NUMERIC = 6; 2441 2445 public static final int EXTENDNUMLET = 7; 2446 2450 public static final int COUNT = 8; 2451 } 2452 2453 2459 public static interface SentenceBreak { 2460 2464 public static final int OTHER = 0; 2465 2469 public static final int ATERM = 1; 2470 2474 public static final int CLOSE = 2; 2475 2479 public static final int FORMAT = 3; 2480 2484 public static final int LOWER = 4; 2485 2489 public static final int NUMERIC = 5; 2490 2494 public static final int OLETTER = 6; 2495 2499 public static final int SEP = 7; 2500 2504 public static final int SP = 8; 2505 2509 public static final int STERM = 9; 2510 2514 public static final int UPPER = 10; 2515 2519 public static final int COUNT = 11; 2520 } 2521 2522 2527 public static interface LineBreak 2528 { 2529 2532 public static final int UNKNOWN = 0; 2533 2536 public static final int AMBIGUOUS = 1; 2537 2540 public static final int ALPHABETIC = 2; 2541 2544 public static final int BREAK_BOTH = 3; 2545 2548 public static final int BREAK_AFTER = 4; 2549 2552 public static final int BREAK_BEFORE = 5; 2553 2556 public static final int MANDATORY_BREAK = 6; 2557 2560 public static final int CONTINGENT_BREAK = 7; 2561 2564 public static final int CLOSE_PUNCTUATION = 8; 2565 2568 public static final int COMBINING_MARK = 9; 2569 2572 public static final int CARRIAGE_RETURN = 10; 2573 2576 public static final int EXCLAMATION = 11; 2577 2580 public static final int GLUE = 12; 2581 2584 public static final int HYPHEN = 13; 2585 2588 public static final int IDEOGRAPHIC = 14; 2589 2593 public static final int INSEPERABLE = 15; 2594 2598 public static final int INSEPARABLE = 15; 2599 2602 public static final int INFIX_NUMERIC = 16; 2603 2606 public static final int LINE_FEED = 17; 2607 2610 public static final int NONSTARTER = 18; 2611 2614 public static final int NUMERIC = 19; 2615 2618 public static final int OPEN_PUNCTUATION = 20; 2619 2622 public static final int POSTFIX_NUMERIC = 21; 2623 2626 public static final int PREFIX_NUMERIC = 22; 2627 2630 public static final int QUOTATION = 23; 2631 2634 public static final int COMPLEX_CONTEXT = 24; 2635 2638 public static final int SURROGATE = 25; 2639 2642 public static final int SPACE = 26; 2643 2646 public static final int BREAK_SYMBOLS = 27; 2647 2650 public static final int ZWSPACE = 28; 2651 2652 2655 public static final int NEXT_LINE = 29; 2656 2657 2660 public static final int WORD_JOINER = 30; 2661 2662 2663 2664 2668 public static final int H2 = 31; 2669 2673 public static final int H3 = 32; 2674 2678 public static final int JL = 33; 2679 2683 public static final int JT = 34; 2684 2688 public static final int JV = 35; 2689 2690 2693 public static final int COUNT = 36; 2694 } 2695 2696 2701 public static interface NumericType 2702 { 2703 2706 public static final int NONE = 0; 2707 2710 public static final int DECIMAL = 1; 2711 2714 public static final int DIGIT = 2; 2715 2718 public static final int NUMERIC = 3; 2719 2722 public static final int COUNT = 4; 2723 } 2724 2725 2731 public static interface HangulSyllableType 2732 { 2733 2736 public static final int NOT_APPLICABLE = 0; 2737 2740 public static final int LEADING_JAMO = 1; 2741 2744 public static final int VOWEL_JAMO = 2; 2745 2748 public static final int TRAILING_JAMO = 3; 2749 2752 public static final int LV_SYLLABLE = 4; 2753 2756 public static final int LVT_SYLLABLE = 5; 2757 2760 public static final int COUNT = 6; 2761 } 2762 2763 2765 2769 public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE; 2770 2771 2778 public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE; 2779 2780 2784 public static final int SUPPLEMENTARY_MIN_VALUE = 2785 UTF16.SUPPLEMENTARY_MIN_VALUE; 2786 2787 2792 public static final int REPLACEMENT_CHAR = '\uFFFD'; 2793 2794 2800 public static final double NO_NUMERIC_VALUE = -123456789; 2801 2802 2807 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 2808 2809 2814 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 2815 2816 2818 2841 public static int digit(int ch, int radix) 2842 { 2843 int props = getProperty(ch); 2845 int value; 2846 if (getNumericType(props) == NumericType.DECIMAL) { 2847 value = UCharacterProperty.getUnsignedValue(props); 2848 } else { 2849 value = getEuropeanDigit(ch); 2850 } 2851 return (0 <= value && value < radix) ? value : -1; 2852 } 2853 2854 2867 public static int digit(int ch) 2868 { 2869 int props = getProperty(ch); 2870 if (getNumericType(props) == NumericType.DECIMAL) { 2871 return UCharacterProperty.getUnsignedValue(props); 2872 } else { 2873 return -1; 2874 } 2875 } 2876 2877 2891 public static int getNumericValue(int ch) 2892 { 2893 int props = PROPERTY_.getProperty(ch); 2895 int numericType = getNumericType(props); 2896 2897 if(numericType==0) { 2898 return getEuropeanDigit(ch); 2899 } 2900 if(numericType==UCharacterProperty.NT_FRACTION || numericType>=UCharacterProperty.NT_COUNT) { 2901 return -2; 2902 } 2903 2904 int numericValue = UCharacterProperty.getUnsignedValue(props); 2905 2906 if(numericType<NumericType.COUNT) { 2907 2908 return numericValue; 2909 } else { 2910 2911 long numValue; 2912 int mant, exp; 2913 2914 mant=numericValue>>LARGE_MANT_SHIFT; 2915 exp=numericValue&LARGE_EXP_MASK; 2916 if(mant==0) { 2917 mant=1; 2918 exp+=LARGE_EXP_OFFSET_EXTRA; 2919 } else if(mant>9) { 2920 return -2; 2921 } else { 2922 exp+=LARGE_EXP_OFFSET; 2923 } 2924 if(exp>9) { 2925 return -2; 2926 } 2927 2928 numValue=mant; 2929 2930 2931 while(exp>=4) { 2932 numValue*=10000.; 2933 exp-=4; 2934 } 2935 switch(exp) { 2936 case 3: 2937 numValue*=1000.; 2938 break; 2939 case 2: 2940 numValue*=100.; 2941 break; 2942 case 1: 2943 numValue*=10.; 2944 break; 2945 case 0: 2946 default: 2947 break; 2948 } 2949 if(numValue<=Integer.MAX_VALUE) { 2950 return (int)numValue; 2951 } else { 2952 return -2; 2953 } 2954 } 2955 } 2956 2957 2973 public static double getUnicodeNumericValue(int ch) 2974 { 2975 int props = PROPERTY_.getProperty(ch); 2977 int numericType = getNumericType(props); 2978 2979 if(numericType==0 || numericType>=UCharacterProperty.NT_COUNT) { 2980 return NO_NUMERIC_VALUE; 2981 } 2982 2983 int numericValue = UCharacterProperty.getUnsignedValue(props); 2984 2985 if(numericType<NumericType.COUNT) { 2986 2987 return numericValue; 2988 } else if(numericType==UCharacterProperty.NT_FRACTION) { 2989 2990 int numerator, denominator; 2991 2992 numerator=numericValue>>FRACTION_NUM_SHIFT; 2993 denominator=(numericValue&FRACTION_DEN_MASK)+FRACTION_DEN_OFFSET; 2994 2995 if(numerator==0) { 2996 numerator=-1; 2997 } 2998 return (double)numerator/(double)denominator; 2999 } else { 3000 3001 double numValue; 3002 int mant, exp; 3003 3004 mant=numericValue>>LARGE_MANT_SHIFT; 3005 exp=numericValue&LARGE_EXP_MASK; 3006 if(mant==0) { 3007 mant=1; 3008 exp+=LARGE_EXP_OFFSET_EXTRA; 3009 } else if(mant>9) { 3010 return NO_NUMERIC_VALUE; 3011 } else { 3012 exp+=LARGE_EXP_OFFSET; 3013 } 3014 3015 numValue=mant; 3016 3017 3018 while(exp>=4) { 3019 numValue*=10000.; 3020 exp-=4; 3021 } 3022 switch(exp) { 3023 case 3: 3024 numValue*=1000.; 3025 break; 3026 case 2: 3027 numValue*=100.; 3028 break; 3029 case 1: 3030 numValue*=10.; 3031 break; 3032 case 0: 3033 default: 3034 break; 3035 } 3036 3037 return numValue; 3038 } 3039 } 3040 3041 3050 public static boolean isSpace(int ch) { 3051 return ch <= 0x20 && 3052 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3053 } 3054 3055 3070 public static int getType(int ch) 3071 { 3072 return getProperty(ch) & UCharacterProperty.TYPE_MASK; 3073 } 3074 3075 3086 public static boolean isDefined(int ch) 3087 { 3088 return getType(ch) != 0; 3089 } 3090 3091 3103 public static boolean isDigit(int ch) 3104 { 3105 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3106 } 3107 3108 3118 public static boolean isISOControl(int ch) 3119 { 3120 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3121 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3122 } 3123 3124 3131 public static boolean isLetter(int ch) 3132 { 3133 return ((1 << getType(ch)) 3135 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3136 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3137 | (1 << UCharacterCategory.TITLECASE_LETTER) 3138 | (1 << UCharacterCategory.MODIFIER_LETTER) 3139 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3140 } 3141 3142 3150 public static boolean isLetterOrDigit(int ch) 3151 { 3152 return ((1 << getType(ch)) 3153 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3154 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3155 | (1 << UCharacterCategory.TITLECASE_LETTER) 3156 | (1 << UCharacterCategory.MODIFIER_LETTER) 3157 | (1 << UCharacterCategory.OTHER_LETTER) 3158 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3159 } 3160 3161 3169 public static boolean isJavaLetter(int cp) { 3170 return isJavaIdentifierStart(cp); 3171 } 3172 3173 3181 public static boolean isJavaLetterOrDigit(int cp) { 3182 return isJavaIdentifierPart(cp); 3183 } 3184 3185 3193 public static boolean isJavaIdentifierStart(int cp) { 3194 return java.lang.Character.isJavaIdentifierStart((char)cp); 3196 } 3197 3198 3206 public static boolean isJavaIdentifierPart(int cp) { 3207 return java.lang.Character.isJavaIdentifierPart((char)cp); 3209 } 3210 3211 3224 public static boolean isLowerCase(int ch) 3225 { 3226 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 3228 } 3229 3230 3256 public static boolean isWhitespace(int ch) 3257 { 3258 return ((1 << getType(ch)) & 3261 ((1 << UCharacterCategory.SPACE_SEPARATOR) 3262 | (1 << UCharacterCategory.LINE_SEPARATOR) 3263 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 3264 && (ch != NO_BREAK_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 3265 && (ch != ZERO_WIDTH_NO_BREAK_SPACE_) 3266 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 3269 } 3270 3271 3279 public static boolean isSpaceChar(int ch) 3280 { 3281 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 3283 | (1 << UCharacterCategory.LINE_SEPARATOR) 3284 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 3285 != 0; 3286 } 3287 3288 3301 public static boolean isTitleCase(int ch) 3302 { 3303 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 3305 } 3306 3307 3334 public static boolean isUnicodeIdentifierPart(int ch) 3335 { 3336 return ((1 << getType(ch)) 3339 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3340 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3341 | (1 << UCharacterCategory.TITLECASE_LETTER) 3342 | (1 << UCharacterCategory.MODIFIER_LETTER) 3343 | (1 << UCharacterCategory.OTHER_LETTER) 3344 | (1 << UCharacterCategory.LETTER_NUMBER) 3345 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 3346 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 3347 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 3348 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 3349 || isIdentifierIgnorable(ch); 3350 } 3351 3352 3372 public static boolean isUnicodeIdentifierStart(int ch) 3373 { 3374 3375 return ((1 << getType(ch)) 3377 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3378 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3379 | (1 << UCharacterCategory.TITLECASE_LETTER) 3380 | (1 << UCharacterCategory.MODIFIER_LETTER) 3381 | (1 << UCharacterCategory.OTHER_LETTER) 3382 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 3383 } 3384 3385 3398 public static boolean isIdentifierIgnorable(int ch) 3399 { 3400 if (ch <= 0x9f) { 3403 return isISOControl(ch) 3404 && !((ch >= 0x9 && ch <= 0xd) 3405 || (ch >= 0x1c && ch <= 0x1f)); 3406 } 3407 return getType(ch) == UCharacterCategory.FORMAT; 3408 } 3409 3410 3427 public static boolean isUpperCase(int ch) 3428 { 3429 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 3431 } 3432 3433 3452 public static int toLowerCase(int ch) { 3453 return gCsp.tolower(ch); 3454 } 3455 3456 3469 public static String toString(int ch) 3470 { 3471 if (ch < MIN_VALUE || ch > MAX_VALUE) { 3472 return null; 3473 } 3474 3475 if (ch < SUPPLEMENTARY_MIN_VALUE) { 3476 return String.valueOf((char)ch); 3477 } 3478 3479 StringBuffer result = new StringBuffer (); 3480 result.append(UTF16.getLeadSurrogate(ch)); 3481 result.append(UTF16.getTrailSurrogate(ch)); 3482 return result.toString(); 3483 } 3484 3485 3505 public static int toTitleCase(int ch) { 3506 return gCsp.totitle(ch); 3507 } 3508 3509 3528 public static int toUpperCase(int ch) { 3529 return gCsp.toupper(ch); 3530 } 3531 3532 3534 3543 public static boolean isSupplementary(int ch) 3544 { 3545 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 3546 ch <= UCharacter.MAX_VALUE; 3547 } 3548 3549 3556 public static boolean isBMP(int ch) 3557 { 3558 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 3559 } 3560 3561 3568 public static boolean isPrintable(int ch) 3569 { 3570 int cat = getType(ch); 3571 return (cat != UCharacterCategory.UNASSIGNED && 3573 cat != UCharacterCategory.CONTROL && 3574 cat != UCharacterCategory.FORMAT && 3575 cat != UCharacterCategory.PRIVATE_USE && 3576 cat != UCharacterCategory.SURROGATE && 3577 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 3578 } 3579 3580 3588 public static boolean isBaseForm(int ch) 3589 { 3590 int cat = getType(ch); 3591 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 3593 cat == UCharacterCategory.OTHER_NUMBER || 3594 cat == UCharacterCategory.LETTER_NUMBER || 3595 cat == UCharacterCategory.UPPERCASE_LETTER || 3596 cat == UCharacterCategory.LOWERCASE_LETTER || 3597 cat == UCharacterCategory.TITLECASE_LETTER || 3598 cat == UCharacterCategory.MODIFIER_LETTER || 3599 cat == UCharacterCategory.OTHER_LETTER || 3600 cat == UCharacterCategory.NON_SPACING_MARK || 3601 cat == UCharacterCategory.ENCLOSING_MARK || 3602 cat == UCharacterCategory.COMBINING_SPACING_MARK; 3603 } 3604 3605 3615 public static int getDirection(int ch) 3616 { 3617 return gBdp.getClass(ch); 3618 } 3619 3620 3629 public static boolean isMirrored(int ch) 3630 { 3631 return gBdp.isMirrored(ch); 3632 } 3633 3634 3648 public static int getMirror(int ch) 3649 { 3650 return gBdp.getMirror(ch); 3651 } 3652 3653 3659 public static int getCombiningClass(int ch) 3660 { 3661 if (ch < MIN_VALUE || ch > MAX_VALUE) { 3662 throw new IllegalArgumentException ("Codepoint out of bounds"); 3663 } 3664 return NormalizerImpl.getCombiningClass(ch); 3665 } 3666 3667 3679 public static boolean isLegal(int ch) 3680 { 3681 if (ch < MIN_VALUE) { 3682 return false; 3683 } 3684 if (ch < UTF16.SURROGATE_MIN_VALUE) { 3685 return true; 3686 } 3687 if (ch <= UTF16.SURROGATE_MAX_VALUE) { 3688 return false; 3689 } 3690 if (UCharacterUtility.isNonCharacter(ch)) { 3691 return false; 3692 } 3693 return (ch <= MAX_VALUE); 3694 } 3695 3696 3709 public static boolean isLegal(String str) 3710 { 3711 int size = str.length(); 3712 int codepoint; 3713 for (int i = 0; i < size; i ++) 3714 { 3715 codepoint = UTF16.charAt(str, i); 3716 if (!isLegal(codepoint)) { 3717 return false; 3718 } 3719 if (isSupplementary(codepoint)) { 3720 i ++; 3721 } 3722 } 3723 return true; 3724 } 3725 3726 3731 public static VersionInfo getUnicodeVersion() 3732 { 3733 return PROPERTY_.m_unicodeVersion_; 3734 } 3735 3736 3747 public static String getName(int ch) 3748 { 3749 if(NAME_==null){ 3750 throw new MissingResourceException ("Could not load unames.icu","",""); 3751 } 3752 return NAME_.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 3753 } 3754 3755 3763 public static String getName(String s, String separator) { 3764 if (s.length() == 1) { return getName(s.charAt(0)); 3766 } 3767 int cp; 3768 StringBuffer sb = new StringBuffer (); 3769 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 3770 cp = UTF16.charAt(s,i); 3771 if (i != 0) sb.append(separator); 3772 sb.append(UCharacter.getName(cp)); 3773 } 3774 return sb.toString(); 3775 } 3776 3777 3788 public static String getName1_0(int ch) 3789 { 3790 if(NAME_==null){ 3791 throw new MissingResourceException ("Could not load unames.icu","",""); 3792 } 3793 return NAME_.getName(ch, 3794 UCharacterNameChoice.UNICODE_10_CHAR_NAME); 3795 } 3796 3797 3815 public static String getExtendedName(int ch) 3816 { 3817 if(NAME_==null){ 3818 throw new MissingResourceException ("Could not load unames.icu","",""); 3819 } 3820 return NAME_.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 3821 } 3822 3823 3833 public static String getISOComment(int ch) 3834 { 3835 if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE) { 3836 return null; 3837 } 3838 if(NAME_==null){ 3839 throw new MissingResourceException ("Could not load unames.icu","",""); 3840 } 3841 String result = NAME_.getGroupName(ch, 3842 UCharacterNameChoice.ISO_COMMENT_); 3843 return result; 3844 } 3845 3846 3856 public static int getCharFromName(String name) 3857 { 3858 if(NAME_==null){ 3859 throw new MissingResourceException ("Could not load unames.icu","",""); 3860 } 3861 return NAME_.getCharFromName( 3862 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 3863 } 3864 3865 3875 public static int getCharFromName1_0(String name) 3876 { 3877 if(NAME_==null){ 3878 throw new MissingResourceException ("Could not load unames.icu","",""); 3879 } 3880 return NAME_.getCharFromName( 3881 UCharacterNameChoice.UNICODE_10_CHAR_NAME, name); 3882 } 3883 3884 3903 public static int getCharFromExtendedName(String name) 3904 { 3905 if(NAME_==null){ 3906 throw new MissingResourceException ("Could not load unames.icu","",""); 3907 } 3908 return NAME_.getCharFromName( 3909 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 3910 } 3911 3912 3945 public static String getPropertyName(int property, 3946 int nameChoice) { 3947 return PNAMES_.getPropertyName(property, nameChoice); 3948 } 3949 3950 3972 public static int getPropertyEnum(String propertyAlias) { 3973 return PNAMES_.getPropertyEnum(propertyAlias); 3974 } 3975 3976 4024 public static String getPropertyValueName(int property, 4025 int value, 4026 int nameChoice) 4027 { 4028 if (property == UProperty.CANONICAL_COMBINING_CLASS 4029 && value >= UCharacter.getIntPropertyMinValue( 4030 UProperty.CANONICAL_COMBINING_CLASS) 4031 && value <= UCharacter.getIntPropertyMaxValue( 4032 UProperty.CANONICAL_COMBINING_CLASS) 4033 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4034 try { 4037 return PNAMES_.getPropertyValueName(property, value, 4038 nameChoice); 4039 } 4040 catch (IllegalArgumentException e) { 4041 return null; 4042 } 4043 } 4044 return PNAMES_.getPropertyValueName(property, value, nameChoice); 4045 } 4046 4047 4078 public static int getPropertyValueEnum(int property, 4079 String valueAlias) { 4080 return PNAMES_.getPropertyValueEnum(property, valueAlias); 4081 } 4082 4083 4092 public static int getCodePoint(char lead, char trail) 4093 { 4094 if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE && 4095 lead <= UTF16.LEAD_SURROGATE_MAX_VALUE && 4096 trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE && 4097 trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) { 4098 return UCharacterProperty.getRawSupplementary(lead, trail); 4099 } 4100 throw new IllegalArgumentException ("Illegal surrogate characters"); 4101 } 4102 4103 4111 public static int getCodePoint(char char16) 4112 { 4113 if (UCharacter.isLegal(char16)) { 4114 return char16; 4115 } 4116 throw new IllegalArgumentException ("Illegal codepoint"); 4117 } 4118 4119 4123 private static class StringContextIterator implements UCaseProps.ContextIterator { 4124 4128 StringContextIterator(String s) { 4129 this.s=s; 4130 limit=s.length(); 4131 cpStart=cpLimit=index=0; 4132 dir=0; 4133 } 4134 4135 4145 public void setLimit(int lim) { 4146 if(0<=lim && lim<=s.length()) { 4147 limit=lim; 4148 } else { 4149 limit=s.length(); 4150 } 4151 } 4152 4153 4166 public int nextCaseMapCP() { 4167 cpStart=cpLimit; 4168 if(cpLimit<limit) { 4169 int c=s.charAt(cpLimit++); 4170 if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE) { 4171 char c2; 4172 if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit && 4173 UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) && c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE 4174 ) { 4175 ++cpLimit; 4177 c=UCharacterProperty.getRawSupplementary((char)c, c2); 4178 } 4180 } 4182 return c; 4183 } else { 4184 return -1; 4185 } 4186 } 4187 4188 4192 public int getCPStart() { 4193 return cpStart; 4194 } 4195 4196 public void reset(int dir) { 4198 if(dir>0) { 4199 4200 this.dir=1; 4201 index=cpLimit; 4202 } else if(dir<0) { 4203 4204 this.dir=-1; 4205 index=cpStart; 4206 } else { 4207 this.dir=0; 4209 index=0; 4210 } 4211 } 4212 4213 public int next() { 4214 int c; 4215 4216 if(dir>0 && index<s.length()) { 4217 c=UTF16.charAt(s, index); 4218 index+=UTF16.getCharCount(c); 4219 return c; 4220 } else if(dir<0 && index>0) { 4221 c=UTF16.charAt(s, index-1); 4222 index-=UTF16.getCharCount(c); 4223 return c; 4224 } 4225 return -1; 4226 } 4227 4228 protected String s; 4230 protected int index, limit, cpStart, cpLimit; 4231 protected int dir; } 4233 4234 4241 public static String toUpperCase(String str) 4242 { 4243 return toUpperCase(ULocale.getDefault(), str); 4244 } 4245 4246 4253 public static String toLowerCase(String str) 4254 { 4255 return toLowerCase(ULocale.getDefault(), str); 4256 } 4257 4258 4276 public static String toTitleCase(String str, BreakIterator breakiter) 4277 { 4278 return toTitleCase(ULocale.getDefault(), str, breakiter); 4279 } 4280 4281 4289 public static String toUpperCase(Locale locale, String str) 4290 { 4291 return toUpperCase(ULocale.forLocale(locale), str); 4292 } 4293 4294 4303 public static String toUpperCase(ULocale locale, String str) { 4304 StringContextIterator iter = new StringContextIterator(str); 4305 StringBuffer result = new StringBuffer (str.length()); 4306 int[] locCache = new int[1]; 4307 int c; 4308 4309 if (locale == null) { 4310 locale = ULocale.getDefault(); 4311 } 4312 locCache[0]=0; 4313 4314 while((c=iter.nextCaseMapCP())>=0) { 4315 c=gCsp.toFullUpper(c, iter, result, locale, locCache); 4316 4317 4318 if(c<0) { 4319 4320 c=~c; 4321 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4322 4323 continue; 4324 4325 } 4326 if(c<=0xffff) { 4327 result.append((char)c); 4328 } else { 4329 UTF16.append(result, c); 4330 } 4331 } 4332 return result.toString(); 4333 } 4334 4335 4343 public static String toLowerCase(Locale locale, String str) 4344 { 4345 return toLowerCase(ULocale.forLocale(locale), str); 4346 } 4347 4348 4357 public static String toLowerCase(ULocale locale, String str) { 4358 StringContextIterator iter = new StringContextIterator(str); 4359 StringBuffer result = new StringBuffer (str.length()); 4360 int[] locCache = new int[1]; 4361 int c; 4362 4363 if (locale == null) { 4364 locale = ULocale.getDefault(); 4365 } 4366 locCache[0]=0; 4367 4368 while((c=iter.nextCaseMapCP())>=0) { 4369 c=gCsp.toFullLower(c, iter, result, locale, locCache); 4370 4371 4372 if(c<0) { 4373 4374 c=~c; 4375 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4376 4377 continue; 4378 4379 } 4380 if(c<=0xffff) { 4381 result.append((char)c); 4382 } else { 4383 UTF16.append(result, c); 4384 } 4385 } 4386 return result.toString(); 4387 } 4388 4389 4408 public static String toTitleCase(Locale locale, String str, 4409 BreakIterator breakiter) 4410 { 4411 return toTitleCase(ULocale.forLocale(locale), str, breakiter); 4412 } 4413 4414 4434 public static String toTitleCase(ULocale locale, String str, 4435 BreakIterator titleIter) { 4436 StringContextIterator iter = new StringContextIterator(str); 4437 StringBuffer result = new StringBuffer (str.length()); 4438 int[] locCache = new int[1]; 4439 int c, srcLength = str.length(); 4440 4441 if (locale == null) { 4442 locale = ULocale.getDefault(); 4443 } 4444 locCache[0]=0; 4445 4446 if(titleIter == null) { 4447 titleIter = BreakIterator.getWordInstance(locale); 4448 } 4449 titleIter.setText(str); 4450 4451 int prev, titleStart, index; 4452 boolean isFirstIndex; 4453 4454 4455 prev=0; 4456 isFirstIndex=true; 4457 4458 4459 while(prev<srcLength) { 4460 4461 if(isFirstIndex) { 4462 isFirstIndex=false; 4463 index=titleIter.first(); 4464 } else { 4465 index=titleIter.next(); 4466 } 4467 if(index==BreakIterator.DONE || index>srcLength) { 4468 index=srcLength; 4469 } 4470 4471 4484 if(prev<index) { 4485 4486 iter.setLimit(index); 4487 while((c=iter.nextCaseMapCP())>=0 && UCaseProps.NONE==gCsp.getType(c)) {} 4488 titleStart=iter.getCPStart(); 4489 if(prev<titleStart) { 4490 result.append(str.substring(prev, titleStart)); 4492 } 4493 4494 if(titleStart<index) { 4495 4496 c=gCsp.toFullTitle(c, iter, result, locale, locCache); 4497 4498 4499 for(;;) { 4500 if(c<0) { 4501 4502 c=~c; 4503 if(c<=0xffff) { 4504 result.append((char)c); 4505 } else { 4506 UTF16.append(result, c); 4507 } 4508 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4509 4510 } else { 4511 4512 if(c<=0xffff) { 4513 result.append((char)c); 4514 } else { 4515 UTF16.append(result, c); 4516 } 4517 } 4518 4519 if((c=iter.nextCaseMapCP())>=0) { 4520 c=gCsp.toFullLower(c, iter, result, locale, locCache); 4521 } else { 4522 break; 4523 } 4524 } 4525 } 4526 } 4527 4528 prev=index; 4529 } 4530 return result.toString(); 4531 } 4532 4533 4557 public static int foldCase(int ch, boolean defaultmapping) { 4558 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4559 } 4560 4561 4578 public static String foldCase(String str, boolean defaultmapping) { 4579 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4580 } 4581 4582 4586 public static final int FOLD_CASE_DEFAULT = 0x0000; 4587 4592 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 4593 4594 4616 public static int foldCase(int ch, int options) { 4617 return gCsp.fold(ch, options); 4618 } 4619 4620 4635 public static final String foldCase(String str, int options) { 4636 StringBuffer result = new StringBuffer (str.length()); 4637 int c, i, length; 4638 4639 length = str.length(); 4640 for(i=0; i<length;) { 4641 c=UTF16.charAt(str, i); 4642 i+=UTF16.getCharCount(c); 4643 c=gCsp.toFullFolding(c, result, options); 4644 4645 4646 if(c<0) { 4647 4648 c=~c; 4649 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 4650 4651 continue; 4652 4653 } 4654 if(c<=0xffff) { 4655 result.append((char)c); 4656 } else { 4657 UTF16.append(result, c); 4658 } 4659 } 4660 return result.toString(); 4661 } 4662 4663 4675 public static int getHanNumericValue(int ch) 4676 { 4677 switch(ch) 4679 { 4680 case IDEOGRAPHIC_NUMBER_ZERO_ : 4681 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 4682 return 0; case CJK_IDEOGRAPH_FIRST_ : 4684 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 4685 return 1; case CJK_IDEOGRAPH_SECOND_ : 4687 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 4688 return 2; case CJK_IDEOGRAPH_THIRD_ : 4690 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 4691 return 3; case CJK_IDEOGRAPH_FOURTH_ : 4693 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 4694 return 4; case CJK_IDEOGRAPH_FIFTH_ : 4696 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 4697 return 5; case CJK_IDEOGRAPH_SIXTH_ : 4699 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 4700 return 6; case CJK_IDEOGRAPH_SEVENTH_ : 4702 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 4703 return 7; case CJK_IDEOGRAPH_EIGHTH_ : 4705 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 4706 return 8; case CJK_IDEOGRAPH_NINETH_ : 4708 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 4709 return 9; case CJK_IDEOGRAPH_TEN_ : 4711 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 4712 return 10; 4713 case CJK_IDEOGRAPH_HUNDRED_ : 4714 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 4715 return 100; 4716 case CJK_IDEOGRAPH_THOUSAND_ : 4717 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 4718 return 1000; 4719 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 4720 return 10000; 4721 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 4722 return 100000000; 4723 } 4724 return -1; } 4726 4727 4745 public static RangeValueIterator getTypeIterator() 4746 { 4747 return new UCharacterTypeIterator(PROPERTY_); 4748 } 4749 4750 4770 public static ValueIterator getNameIterator() 4771 { 4772 if(NAME_==null){ 4773 throw new RuntimeException ("Could not load unames.icu"); 4774 } 4775 return new UCharacterNameIterator(NAME_, 4776 UCharacterNameChoice.UNICODE_CHAR_NAME); 4777 } 4778 4779 4798 public static ValueIterator getName1_0Iterator() 4799 { 4800 if(NAME_==null){ 4801 throw new RuntimeException ("Could not load unames.icu"); 4802 } 4803 return new UCharacterNameIterator(NAME_, 4804 UCharacterNameChoice.UNICODE_10_CHAR_NAME); 4805 } 4806 4807 4826 public static ValueIterator getExtendedNameIterator() 4827 { 4828 if(NAME_==null){ 4829 throw new MissingResourceException ("Could not load unames.icu","",""); 4830 } 4831 return new UCharacterNameIterator(NAME_, 4832 UCharacterNameChoice.EXTENDED_CHAR_NAME); 4833 } 4834 4835 4847 public static VersionInfo getAge(int ch) 4848 { 4849 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4850 throw new IllegalArgumentException ("Codepoint out of bounds"); 4851 } 4852 return PROPERTY_.getAge(ch); 4853 } 4854 4855 4880 public static boolean hasBinaryProperty(int ch, int property) 4881 { 4882 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4883 throw new IllegalArgumentException ("Codepoint out of bounds"); 4884 } 4885 return PROPERTY_.hasBinaryProperty(ch, property); 4886 } 4887 4888 4895 public static boolean isUAlphabetic(int ch) 4896 { 4897 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 4898 } 4899 4900 4907 public static boolean isULowercase(int ch) 4908 { 4909 return hasBinaryProperty(ch, UProperty.LOWERCASE); 4910 } 4911 4912 4919 public static boolean isUUppercase(int ch) 4920 { 4921 return hasBinaryProperty(ch, UProperty.UPPERCASE); 4922 } 4923 4924 4932 public static boolean isUWhiteSpace(int ch) 4933 { 4934 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 4935 } 4936 4937 4938 4977 public static int getIntPropertyValue(int ch, int type) 4978 { 4979 if (type < UProperty.BINARY_START) { 4980 return 0; } 4982 else if (type < UProperty.BINARY_LIMIT) { 4983 return hasBinaryProperty(ch, type) ? 1 : 0; 4984 } 4985 else if (type < UProperty.INT_START) { 4986 return 0; } 4988 else if (type < UProperty.INT_LIMIT) { 4989 switch (type) { 4991 case UProperty.BIDI_CLASS: 4992 return getDirection(ch); 4993 case UProperty.BLOCK: 4994 return UnicodeBlock.idOf(ch); 4995 case UProperty.CANONICAL_COMBINING_CLASS: 4996 return getCombiningClass(ch); 4997 case UProperty.DECOMPOSITION_TYPE: 4998 return PROPERTY_.getAdditional(ch, 2) 4999 & DECOMPOSITION_TYPE_MASK_; 5000 case UProperty.EAST_ASIAN_WIDTH: 5001 return (PROPERTY_.getAdditional(ch, 0) 5002 & EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_; 5003 case UProperty.GENERAL_CATEGORY: 5004 return getType(ch); 5005 case UProperty.JOINING_GROUP: 5006 return gBdp.getJoiningGroup(ch); 5007 case UProperty.JOINING_TYPE: 5008 return gBdp.getJoiningType(ch); 5009 case UProperty.LINE_BREAK: 5010 return (int)(PROPERTY_.getAdditional(ch, 0)& LINE_BREAK_MASK_)>>LINE_BREAK_SHIFT_; 5011 case UProperty.NUMERIC_TYPE: 5012 type=getNumericType(PROPERTY_.getProperty(ch)); 5013 if(type>NumericType.NUMERIC) { 5014 5015 type=NumericType.NUMERIC; 5016 } 5017 return type; 5018 case UProperty.SCRIPT: 5019 return UScript.getScript(ch); 5020 case UProperty.HANGUL_SYLLABLE_TYPE: 5021 5022 if(ch<NormalizerImpl.JAMO_L_BASE) { 5023 5024 } else if(ch<=0x11ff) { 5025 5026 if(ch<=0x115f) { 5027 5028 if(ch==0x115f || ch<=0x1159 || getType(ch)==UCharacterCategory.OTHER_LETTER) { 5029 return HangulSyllableType.LEADING_JAMO; 5030 } 5031 } else if(ch<=0x11a7) { 5032 5033 if(ch<=0x11a2 || getType(ch)==UCharacterCategory.OTHER_LETTER) { 5034 return HangulSyllableType.VOWEL_JAMO; 5035 } 5036 } else { 5037 5038 if(ch<=0x11f9 || getType(ch)==UCharacterCategory.OTHER_LETTER) { 5039 return HangulSyllableType.TRAILING_JAMO; 5040 } 5041 } 5042 } else if((ch-=NormalizerImpl.HANGUL_BASE)<0) { 5043 5044 } else if(ch<NormalizerImpl.HANGUL_COUNT) { 5045 5046 return ch%NormalizerImpl.JAMO_T_COUNT==0 ? HangulSyllableType.LV_SYLLABLE : HangulSyllableType.LVT_SYLLABLE; 5047 } 5048 return 0; 5049 5050 case UProperty.NFD_QUICK_CHECK: 5051 case UProperty.NFKD_QUICK_CHECK: 5052 case UProperty.NFC_QUICK_CHECK: 5053 case UProperty.NFKC_QUICK_CHECK: 5054 return NormalizerImpl.quickCheck(ch, (type-UProperty.NFD_QUICK_CHECK)+2); case UProperty.LEAD_CANONICAL_COMBINING_CLASS: 5056 return NormalizerImpl.getFCD16(ch)>>8; 5057 case UProperty.TRAIL_CANONICAL_COMBINING_CLASS: 5058 return NormalizerImpl.getFCD16(ch)&0xff; 5059 case UProperty.GRAPHEME_CLUSTER_BREAK: 5060 return (int)(PROPERTY_.getAdditional(ch, 2)& GCB_MASK)>>GCB_SHIFT; 5061 case UProperty.SENTENCE_BREAK: 5062 return (int)(PROPERTY_.getAdditional(ch, 2)& SB_MASK)>>SB_SHIFT; 5063 case UProperty.WORD_BREAK: 5064 return (int)(PROPERTY_.getAdditional(ch, 2)& WB_MASK)>>WB_SHIFT; 5065 default: 5066 5067 return 0; 5068 } 5069 } else if (type == UProperty.GENERAL_CATEGORY_MASK) { 5070 return UCharacterProperty.getMask(getType(ch)); 5071 } 5072 return 0; } 5074 5083 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5084 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5086 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5087 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), nameChoice); 5088 } 5089 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5090 return String.valueOf(getUnicodeNumericValue(codepoint)); 5091 } 5092 switch (propertyEnum) { 5094 case UProperty.AGE: return getAge(codepoint).toString(); 5095 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5096 case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(getMirror(codepoint)); 5097 case UProperty.CASE_FOLDING: return foldCase(UTF16.valueOf(codepoint), true); 5098 case UProperty.LOWERCASE_MAPPING: return toLowerCase(UTF16.valueOf(codepoint)); 5099 case UProperty.NAME: return getName(codepoint); 5100 case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(foldCase(codepoint,true)); 5101 case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(toLowerCase(codepoint)); 5102 case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(toTitleCase(codepoint)); 5103 case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(toUpperCase(codepoint)); 5104 case UProperty.TITLECASE_MAPPING: return toTitleCase(UTF16.valueOf(codepoint),null); 5105 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5106 case UProperty.UPPERCASE_MAPPING: return toUpperCase(UTF16.valueOf(codepoint)); 5107 } 5108 throw new IllegalArgumentException ("Illegal Property Enum"); 5109 } 5110 5111 5129 public static int getIntPropertyMinValue(int type) 5130 { 5131 5132 return 0; } 5135 5136 5137 5161 public static int getIntPropertyMaxValue(int type) 5162 { 5163 if (type < UProperty.BINARY_START) { 5164 return -1; } 5166 else if (type < UProperty.BINARY_LIMIT) { 5167 return 1; } 5169 else if (type < UProperty.INT_START) { 5170 return -1; } 5172 else if (type < UProperty.INT_LIMIT) { 5173 switch (type) { 5174 case UProperty.BIDI_CLASS: 5175 case UProperty.JOINING_GROUP: 5176 case UProperty.JOINING_TYPE: 5177 return gBdp.getMaxValue(type); 5178 case UProperty.BLOCK: 5179 return (PROPERTY_.getMaxValues(0) & BLOCK_MASK_) >> BLOCK_SHIFT_; 5180 case UProperty.CANONICAL_COMBINING_CLASS: 5181 case UProperty.LEAD_CANONICAL_COMBINING_CLASS: 5182 case UProperty.TRAIL_CANONICAL_COMBINING_CLASS: 5183 return 0xff; case UProperty.DECOMPOSITION_TYPE: 5186 return PROPERTY_.getMaxValues(2) & DECOMPOSITION_TYPE_MASK_; 5187 case UProperty.EAST_ASIAN_WIDTH: 5188 return (PROPERTY_.getMaxValues(0) & EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_; 5189 case UProperty.GENERAL_CATEGORY: 5190 return UCharacterCategory.CHAR_CATEGORY_COUNT - 1; 5191 case UProperty.LINE_BREAK: 5192 return (PROPERTY_.getMaxValues(0) & LINE_BREAK_MASK_) >> LINE_BREAK_SHIFT_; 5193 case UProperty.NUMERIC_TYPE: 5194 return NumericType.COUNT - 1; 5195 case UProperty.SCRIPT: 5196 return PROPERTY_.getMaxValues(0) & SCRIPT_MASK_; 5197 case UProperty.HANGUL_SYLLABLE_TYPE: 5198 return HangulSyllableType.COUNT-1; 5199 case UProperty.NFD_QUICK_CHECK: 5200 case UProperty.NFKD_QUICK_CHECK: 5201 return 1; case UProperty.NFC_QUICK_CHECK: 5203 case UProperty.NFKC_QUICK_CHECK: 5204 return 2; case UProperty.GRAPHEME_CLUSTER_BREAK: 5206 return (PROPERTY_.getMaxValues(2) & GCB_MASK) >> GCB_SHIFT; 5207 case UProperty.SENTENCE_BREAK: 5208 return (PROPERTY_.getMaxValues(2) & SB_MASK) >> SB_SHIFT; 5209 case UProperty.WORD_BREAK: 5210 return (PROPERTY_.getMaxValues(2) & WB_MASK) >> WB_SHIFT; 5211 default: 5212 return -1; } 5214 5215 } 5216 return -1; } 5218 5219 5223 public static char forDigit(int digit, int radix) { 5224 return java.lang.Character.forDigit(digit, radix); 5225 } 5226 5227 5229 5234 public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE; 5235 5236 5241 public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE; 5242 5243 5248 public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE; 5249 5250 5255 public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE; 5256 5257 5262 public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE; 5263 5264 5269 public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE; 5270 5271 5276 public static final int MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE; 5277 5278 5283 public static final int MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE; 5284 5285 5290 public static final int MIN_CODE_POINT = UTF16.CODEPOINT_MIN_VALUE; 5291 5292 5298 public static final boolean isValidCodePoint(int cp) { 5299 return cp >= 0 && cp <= MAX_CODE_POINT; 5300 } 5301 5302 5308 public static final boolean isSupplementaryCodePoint(int cp) { 5309 return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE 5310 && cp <= UTF16.CODEPOINT_MAX_VALUE; 5311 } 5312 5313 5319 public static boolean isHighSurrogate(char ch) { 5320 return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE; 5321 } 5322 5323 5329 public static boolean isLowSurrogate(char ch) { 5330 return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE; 5331 } 5332 5333 5341 public static final boolean isSurrogatePair(char high, char low) { 5342 return isHighSurrogate(high) && isLowSurrogate(low); 5343 } 5344 5345 5354 public static int charCount(int cp) { 5355 return UTF16.getCharCount(cp); 5356 } 5357 5358 5366 public static final int toCodePoint(char high, char low) { 5367 return UCharacterProperty.getRawSupplementary(high, low); 5368 } 5369 5370 5379 public static final int codePointAt(String seq, int index) { 5381 char c1 = seq.charAt(index++); 5382 if (isHighSurrogate(c1)) { 5383 if (index < seq.length()) { 5384 char c2 = seq.charAt(index); 5385 if (isLowSurrogate(c2)) { 5386 return toCodePoint(c1, c2); 5387 } 5388 } 5389 } 5390 return c1; 5391 } 5392 public static final int codePointAt(StringBuffer seq, int index) { 5393 return codePointAt(seq.toString(), index); 5394 } 5395 5418 5427 public static final int codePointAt(char[] text, int index) { 5428 char c1 = text[index++]; 5429 if (isHighSurrogate(c1)) { 5430 if (index < text.length) { 5431 char c2 = text[index]; 5432 if (isLowSurrogate(c2)) { 5433 return toCodePoint(c1, c2); 5434 } 5435 } 5436 } 5437 return c1; 5438 } 5439 5440 5450 public static final int codePointAt(char[] text, int index, int limit) { 5451 if (index >= limit || limit > text.length) { 5452 throw new IndexOutOfBoundsException (); 5453 } 5454 char c1 = text[index++]; 5455 if (isHighSurrogate(c1)) { 5456 if (index < limit) { 5457 char c2 = text[index]; 5458 if (isLowSurrogate(c2)) { 5459 return toCodePoint(c1, c2); 5460 } 5461 } 5462 } 5463 return c1; 5464 } 5465 5466 5475 public static final int codePointBefore(StringBuffer seq, int index) { 5477 return codePointBefore(seq.toString(), index); 5478 } 5479 public static final int codePointBefore(String seq, int index) { 5480 char c2 = seq.charAt(--index); 5481 if (isLowSurrogate(c2)) { 5482 if (index > 0) { 5483 char c1 = seq.charAt(--index); 5484 if (isHighSurrogate(c1)) { 5485 return toCodePoint(c1, c2); 5486 } 5487 } 5488 } 5489 return c2; 5490 } 5491 5514 5523 public static final int codePointBefore(char[] text, int index) { 5524 char c2 = text[--index]; 5525 if (isLowSurrogate(c2)) { 5526 if (index > 0) { 5527 char c1 = text[--index]; 5528 if (isHighSurrogate(c1)) { 5529 return toCodePoint(c1, c2); 5530 } 5531 } 5532 } 5533 return c2; 5534 } 5535 5536 5546 public static final int codePointBefore(char[] text, int index, int limit) { 5547 if (index <= limit || limit < 0) { 5548 throw new IndexOutOfBoundsException (); 5549 } 5550 char c2 = text[--index]; 5551 if (isLowSurrogate(c2)) { 5552 if (index > limit) { 5553 char c1 = text[--index]; 5554 if (isHighSurrogate(c1)) { 5555 return toCodePoint(c1, c2); 5556 } 5557 } 5558 } 5559 return c2; 5560 } 5561 5562 5572 public static final int toChars(int cp, char[] dst, int dstIndex) { 5573 if (cp >= 0) { 5574 if (cp < MIN_SUPPLEMENTARY_CODE_POINT) { 5575 dst[dstIndex] = (char)cp; 5576 return 1; 5577 } 5578 if (cp <= MAX_CODE_POINT) { 5579 dst[dstIndex] = UTF16.getLeadSurrogate(cp); 5580 dst[dstIndex+1] = UTF16.getTrailSurrogate(cp); 5581 return 2; 5582 } 5583 } 5584 throw new IllegalArgumentException (); 5585 } 5586 5587 5595 public static final char[] toChars(int cp) { 5596 if (cp >= 0) { 5597 if (cp < MIN_SUPPLEMENTARY_CODE_POINT) { 5598 return new char[] { (char)cp }; 5599 } 5600 if (cp <= MAX_CODE_POINT) { 5601 return new char[] { 5602 UTF16.getLeadSurrogate(cp), 5603 UTF16.getTrailSurrogate(cp) 5604 }; 5605 } 5606 } 5607 throw new IllegalArgumentException (); 5608 } 5609 5610 5622 public static byte getDirectionality(int cp) 5623 { 5624 return (byte)getDirection(cp); 5625 } 5626 5627 5635 public static int codePointCount(String text, int start, int limit) { 5637 if (start < 0 || limit < start || limit > text.length()) { 5638 throw new IndexOutOfBoundsException ("start (" + start + 5639 ") or limit (" + limit + 5640 ") invalid or out of range 0, " + text.length()); 5641 } 5642 5643 int len = limit - start; 5644 while (limit > start) { 5645 char ch = text.charAt(--limit); 5646 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5647 ch = text.charAt(--limit); 5648 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5649 --len; 5650 break; 5651 } 5652 } 5653 } 5654 return len; 5655 } 5656 5684 5692 public static int codePointCount(char[] text, int start, int limit) { 5693 if (start < 0 || limit < start || limit > text.length) { 5694 throw new IndexOutOfBoundsException ("start (" + start + 5695 ") or limit (" + limit + 5696 ") invalid or out of range 0, " + text.length); 5697 } 5698 5699 int len = limit - start; 5700 while (limit > start) { 5701 char ch = text[--limit]; 5702 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5703 ch = text[--limit]; 5704 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5705 --len; 5706 break; 5707 } 5708 } 5709 } 5710 return len; 5711 } 5712 5713 5721 public static int offsetByCodePoints(String text, int index, int codePointOffset) { 5723 if (index < 0 || index > text.length()) { 5724 throw new IndexOutOfBoundsException ("index ( " + index + 5725 ") out of range 0, " + text.length()); 5726 } 5727 5728 if (codePointOffset < 0) { 5729 while (++codePointOffset <= 0) { 5730 char ch = text.charAt(--index); 5731 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 5732 ch = text.charAt(--index); 5733 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5734 if (++codePointOffset > 0) { 5735 return index+1; 5736 } 5737 } 5738 } 5739 } 5740 } else { 5741 int limit = text.length(); 5742 while (--codePointOffset >= 0) { 5743 char ch = text.charAt(index++); 5744 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5745 ch = text.charAt(index++); 5746 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5747 if (--codePointOffset < 0) { 5748 return index-1; 5749 } 5750 } 5751 } 5752 } 5753 } 5754 5755 return index; 5756 } 5757 5809 public static int offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset) { 5810 int limit = start + count; 5811 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 5812 throw new IndexOutOfBoundsException ("index ( " + index + 5813 ") out of range " + start + 5814 ", " + limit + 5815 " in array 0, " + text.length); 5816 } 5817 5818 if (codePointOffset < 0) { 5819 while (++codePointOffset <= 0) { 5820 char ch = text[--index]; 5821 if (index < start) { 5822 throw new IndexOutOfBoundsException ("index ( " + index + 5823 ") < start (" + start + 5824 ")"); 5825 } 5826 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 5827 ch = text[--index]; 5828 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5829 if (++codePointOffset > 0) { 5830 return index+1; 5831 } 5832 } 5833 } 5834 } 5835 } else { 5836 while (--codePointOffset >= 0) { 5837 char ch = text[index++]; 5838 if (index > limit) { 5839 throw new IndexOutOfBoundsException ("index ( " + index + 5840 ") > limit (" + limit + 5841 ")"); 5842 } 5843 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5844 ch = text[index++]; 5845 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5846 if (--codePointOffset < 0) { 5847 return index-1; 5848 } 5849 } 5850 } 5851 } 5852 } 5853 5854 return index; 5855 } 5856 5857 5859 5862 static UCharacterName NAME_ = null; 5863 5864 5867 static UPropertyAliases PNAMES_ = null; 5868 5869 static { 5871 try { 5872 PNAMES_ = new UPropertyAliases(); 5873 NAME_ = UCharacterName.getInstance(); 5874 } catch (IOException e) { 5875 throw new MissingResourceException (e.getMessage(),"",""); 5877 } 5882 } 5883 5884 5886 5889 private static final UCharacterProperty PROPERTY_; 5890 5893 private static final char[] PROPERTY_TRIE_INDEX_; 5894 private static final char[] PROPERTY_TRIE_DATA_; 5895 private static final int PROPERTY_INITIAL_VALUE_; 5896 5897 private static final UCaseProps gCsp; 5898 private static final UBiDiProps gBdp; 5899 5900 static 5902 { 5903 try 5904 { 5905 PROPERTY_ = UCharacterProperty.getInstance(); 5906 PROPERTY_TRIE_INDEX_ = PROPERTY_.m_trieIndex_; 5907 PROPERTY_TRIE_DATA_ = PROPERTY_.m_trieData_; 5908 PROPERTY_INITIAL_VALUE_ = PROPERTY_.m_trieInitialValue_; 5909 } 5910 catch (Exception e) 5911 { 5912 throw new MissingResourceException (e.getMessage(),"",""); 5913 } 5914 5915 5940 UCaseProps csp; 5941 try { 5942 csp=UCaseProps.getSingleton(); 5943 } catch(IOException e) { 5944 csp=UCaseProps.getDummy(); 5945 } 5946 gCsp=csp; 5947 5948 UBiDiProps bdp; 5949 try { 5950 bdp=UBiDiProps.getSingleton(); 5951 } catch(IOException e) { 5952 bdp=UBiDiProps.getDummy(); 5953 } 5954 gBdp=bdp; 5955 } 5956 5957 5960 private static final int LAST_CHAR_MASK_ = 0xFFFF; 5961 5962 5965 private static final int LAST_BYTE_MASK_ = 0xFF; 5966 5967 5970 private static final int SHIFT_16_ = 16; 5971 5972 5975 private static final int SHIFT_24_ = 24; 5976 5977 5980 private static final int DECIMAL_RADIX_ = 10; 5981 5982 5985 private static final int NO_BREAK_SPACE_ = 0xA0; 5986 5987 5990 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 5991 5992 5995 private static final int ZERO_WIDTH_NO_BREAK_SPACE_ = 0xFEFF; 5996 5997 6000 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6001 6002 6005 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6006 6007 6010 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6011 6012 6015 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6016 6017 6020 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8; 6021 6022 6025 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6026 6027 6030 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6031 6032 6035 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6036 6037 6040 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6041 6042 6045 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6046 6047 6050 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6051 6052 6055 private static final int UNIT_SEPARATOR_ = 0x001F; 6056 6057 6060 private static final int DELETE_ = 0x007F; 6061 6064 private static final int ISO_CONTROL_FIRST_RANGE_MAX_ = 0x1F; 6065 6068 private static final int NUMERIC_TYPE_SHIFT_ = 5; 6069 6072 private static final int NUMERIC_TYPE_MASK_ = 0x7 << NUMERIC_TYPE_SHIFT_; 6073 6074 6075 private static final int MAX_SMALL_NUMBER=0xff; 6076 6077 private static final int FRACTION_NUM_SHIFT=3; 6078 private static final int FRACTION_DEN_MASK=7; 6079 6080 private static final int FRACTION_MAX_NUM=31; 6081 private static final int FRACTION_DEN_OFFSET=2; 6082 6083 private static final int FRACTION_MIN_DEN=FRACTION_DEN_OFFSET; 6084 private static final int FRACTION_MAX_DEN=FRACTION_MIN_DEN+FRACTION_DEN_MASK; 6085 6086 private static final int LARGE_MANT_SHIFT=4; 6087 private static final int LARGE_EXP_MASK=0xf; 6088 private static final int LARGE_EXP_OFFSET=2; 6089 private static final int LARGE_EXP_OFFSET_EXTRA=18; 6090 6091 private static final int LARGE_MIN_EXP=LARGE_EXP_OFFSET; 6092 private static final int LARGE_MAX_EXP=LARGE_MIN_EXP+LARGE_EXP_MASK; 6093 private static final int LARGE_MAX_EXP_EXTRA=LARGE_EXP_OFFSET_EXTRA+LARGE_EXP_MASK; 6094 6095 6098 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6099 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6100 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6101 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6102 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6103 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6104 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6105 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6106 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6107 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6108 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6109 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6110 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6111 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6112 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6113 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6114 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6115 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6116 6117 6121 private static final int ZERO_WIDTH_NON_JOINER_ = 0x200c; 6122 6126 private static final int ZERO_WIDTH_JOINER_ = 0x200d; 6127 6128 6138 private static final int SB_MASK = 0x0007c000; 6139 private static final int SB_SHIFT = 14; 6140 6141 private static final int WB_MASK = 0x00003c00; 6142 private static final int WB_SHIFT = 10; 6143 6144 private static final int GCB_MASK = 0x000003e0; 6145 private static final int GCB_SHIFT = 5; 6146 6147 6151 private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f; 6152 6153 6162 6163 6167 private static final int EAST_ASIAN_MASK_ = 0x00038000; 6168 6172 private static final int EAST_ASIAN_SHIFT_ = 15; 6173 6177 private static final int LINE_BREAK_MASK_ = 0x00FC0000; 6178 6182 private static final int LINE_BREAK_SHIFT_ = 18; 6183 6187 private static final int BLOCK_MASK_ = 0x00007f80; 6188 6192 private static final int BLOCK_SHIFT_ = 7; 6193 6197 private static final int SCRIPT_MASK_ = 0x0000007f; 6198 6199 6204 private UCharacter() 6205 { 6206 } 6207 6210 6218 private static int getEuropeanDigit(int ch) { 6219 if ((ch > 0x7a && ch < 0xff21) 6220 || ch < 0x41 || (ch > 0x5a && ch < 0x61) 6221 || ch > 0xff5a || (ch > 0xff31 && ch < 0xff41)) { 6222 return -1; 6223 } 6224 if (ch <= 0x7a) { 6225 return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61); 6227 } 6228 if (ch <= 0xff3a) { 6230 return ch + 10 - 0xff21; 6231 } 6232 return ch + 10 - 0xff41; 6234 } 6235 6236 6241 private static int getNumericType(int props) 6242 { 6243 return (props & NUMERIC_TYPE_MASK_) >> NUMERIC_TYPE_SHIFT_; 6244 } 6245 6246 6258 private static final int getProperty(int ch) 6259 { 6260 if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE 6261 || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE 6262 && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) { 6263 try { return PROPERTY_TRIE_DATA_[ 6266 (PROPERTY_TRIE_INDEX_[ch >> 5] << 2) 6267 + (ch & 0x1f)]; 6268 } catch (ArrayIndexOutOfBoundsException e) { 6269 return PROPERTY_INITIAL_VALUE_; 6270 } 6271 } 6272 if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { 6273 return PROPERTY_TRIE_DATA_[ 6275 (PROPERTY_TRIE_INDEX_[(0x2800 >> 5) + (ch >> 5)] << 2) 6276 + (ch & 0x1f)]; 6277 } 6278 if (ch <= UTF16.CODEPOINT_MAX_VALUE) { 6280 return PROPERTY_.m_trie_.getSurrogateValue( 6284 UTF16.getLeadSurrogate(ch), 6285 (char)(ch & 0x3ff)); 6286 } 6287 return PROPERTY_INITIAL_VALUE_; 6292 } 6293} 6294 | Popular Tags |