1 7 8 package com.ibm.icu.impl; 9 10 import java.io.BufferedInputStream ; 11 import java.io.InputStream ; 12 import java.io.IOException ; 13 import java.util.Locale ; 14 import java.util.MissingResourceException ; 15 16 import com.ibm.icu.lang.UCharacter; 17 import com.ibm.icu.lang.UCharacterCategory; 18 import com.ibm.icu.lang.UProperty; 19 import com.ibm.icu.text.BreakIterator; 20 import com.ibm.icu.text.Normalizer; 21 import com.ibm.icu.text.UCharacterIterator; 22 import com.ibm.icu.text.UnicodeSet; 23 import com.ibm.icu.text.UTF16; 24 import com.ibm.icu.util.RangeValueIterator; 25 import com.ibm.icu.util.ULocale; 26 import com.ibm.icu.util.UResourceBundle; 27 import com.ibm.icu.util.VersionInfo; 28 29 44 45 public final class UCharacterProperty 46 { 47 49 52 public CharTrie m_trie_; 53 57 public char[] m_trieIndex_; 58 62 public char[] m_trieData_; 63 67 public int m_trieInitialValue_; 68 71 public VersionInfo m_unicodeVersion_; 72 75 public static final char LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_ = 0x130; 76 79 public static final char LATIN_SMALL_LETTER_DOTLESS_I_ = 0x131; 80 83 public static final char LATIN_SMALL_LETTER_I_ = 0x69; 84 87 public static final int TYPE_MASK = 0x1F; 88 89 91 92 public static final int SRC_NONE=0; 93 94 public static final int SRC_CHAR=1; 95 96 public static final int SRC_PROPSVEC=2; 97 98 public static final int SRC_HST=3; 99 100 public static final int SRC_NAMES=4; 101 102 public static final int SRC_NORM=5; 103 104 public static final int SRC_CASE=6; 105 106 public static final int SRC_BIDI=7; 107 108 public static final int SRC_CHAR_AND_PROPSVEC=8; 109 110 public static final int SRC_COUNT=9; 111 112 114 117 public void setIndexData(CharTrie.FriendAgent friendagent) 118 { 119 m_trieIndex_ = friendagent.getPrivateIndex(); 120 m_trieData_ = friendagent.getPrivateData(); 121 m_trieInitialValue_ = friendagent.getPrivateInitialValue(); 122 } 123 124 132 public final int getProperty(int ch) 133 { 134 if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE 135 || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE 136 && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) { 137 try { return m_trieData_[ 141 (m_trieIndex_[ch >> Trie.INDEX_STAGE_1_SHIFT_] 142 << Trie.INDEX_STAGE_2_SHIFT_) 143 + (ch & Trie.INDEX_STAGE_3_MASK_)]; 144 } catch (ArrayIndexOutOfBoundsException e) { 145 return m_trieInitialValue_; 146 } 147 } 148 if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { 149 return m_trieData_[ 151 (m_trieIndex_[Trie.LEAD_INDEX_OFFSET_ 152 + (ch >> Trie.INDEX_STAGE_1_SHIFT_)] 153 << Trie.INDEX_STAGE_2_SHIFT_) 154 + (ch & Trie.INDEX_STAGE_3_MASK_)]; 155 } 156 if (ch <= UTF16.CODEPOINT_MAX_VALUE) { 157 return m_trie_.getSurrogateValue( 161 UTF16.getLeadSurrogate(ch), 162 (char)(ch & Trie.SURROGATE_MASK_)); 163 } 164 return m_trieInitialValue_; 170 171 } 173 174 180 public static int getSignedValue(int prop) 181 { 182 return ((short)prop >> VALUE_SHIFT_); 183 } 184 185 191 public static int getUnsignedValue(int prop) 193 { 194 return (prop >> VALUE_SHIFT_) & UNSIGNED_VALUE_MASK_AFTER_SHIFT_; 195 } 196 198 199 public static final int NT_FRACTION=4; 200 public static final int NT_LARGE=5; 201 public static final int NT_COUNT=6; 202 203 211 public int getAdditional(int codepoint, int column) { 212 if (column == -1) { 213 return getProperty(codepoint); 214 } 215 if (column < 0 || column >= m_additionalColumnsCount_) { 216 return 0; 217 } 218 return m_additionalVectors_[ 219 m_additionalTrie_.getCodePointValue(codepoint) + column]; 220 } 221 222 static final int MY_MASK = UCharacterProperty.TYPE_MASK 223 & ((1<<UCharacterCategory.UPPERCASE_LETTER) | 224 (1<<UCharacterCategory.LOWERCASE_LETTER) | 225 (1<<UCharacterCategory.TITLECASE_LETTER) | 226 (1<<UCharacterCategory.MODIFIER_LETTER) | 227 (1<<UCharacterCategory.OTHER_LETTER)); 228 229 230 243 public VersionInfo getAge(int codepoint) 244 { 245 int version = getAdditional(codepoint, 0) >> AGE_SHIFT_; 246 return VersionInfo.getInstance( 247 (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_, 248 version & LAST_NIBBLE_MASK_, 0, 0); 249 } 250 251 private static final long UNSIGNED_INT_MASK = 0xffffffffL; 252 253 private static final int GC_CN_MASK = getMask(UCharacter.UNASSIGNED); 254 private static final int GC_CC_MASK = getMask(UCharacter.CONTROL); 255 private static final int GC_CS_MASK = getMask(UCharacter.SURROGATE); 256 private static final int GC_ZS_MASK = getMask(UCharacter.SPACE_SEPARATOR); 257 private static final int GC_ZL_MASK = getMask(UCharacter.LINE_SEPARATOR); 258 private static final int GC_ZP_MASK = getMask(UCharacter.PARAGRAPH_SEPARATOR); 259 260 private static final int GC_Z_MASK = GC_ZS_MASK|GC_ZL_MASK|GC_ZP_MASK; 261 262 269 private static final boolean isgraphPOSIX(int c) { 270 271 272 return (getMask(UCharacter.getType(c))& 273 (GC_CC_MASK|GC_CS_MASK|GC_CN_MASK|GC_Z_MASK)) 274 ==0; 275 } 276 277 private static final class BinaryProperties{ 278 int column; 279 long mask; 280 public BinaryProperties(int column,long mask){ 281 this.column = column; 282 this.mask = mask; 283 } 284 } 285 BinaryProperties[] binProps={ 286 291 new BinaryProperties( 1, ( 1 << ALPHABETIC_PROPERTY_) ), 292 new BinaryProperties( 1, ( 1 << ASCII_HEX_DIGIT_PROPERTY_) ), 293 new BinaryProperties( SRC_BIDI, 0 ), 294 new BinaryProperties( SRC_BIDI, 0 ), 295 new BinaryProperties( 1, ( 1 << DASH_PROPERTY_) ), 296 new BinaryProperties( 1, ( 1 << DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_) ), 297 new BinaryProperties( 1, ( 1 << DEPRECATED_PROPERTY_) ), 298 new BinaryProperties( 1, ( 1 << DIACRITIC_PROPERTY_) ), 299 new BinaryProperties( 1, ( 1 << EXTENDER_PROPERTY_) ), 300 new BinaryProperties( SRC_NORM, 0 ), 301 new BinaryProperties( 1, ( 1 << GRAPHEME_BASE_PROPERTY_) ), 302 new BinaryProperties( 1, ( 1 << GRAPHEME_EXTEND_PROPERTY_) ), 303 new BinaryProperties( 1, ( 1 << GRAPHEME_LINK_PROPERTY_) ), 304 new BinaryProperties( 1, ( 1 << HEX_DIGIT_PROPERTY_) ), 305 new BinaryProperties( 1, ( 1 << HYPHEN_PROPERTY_) ), 306 new BinaryProperties( 1, ( 1 << ID_CONTINUE_PROPERTY_) ), 307 new BinaryProperties( 1, ( 1 << ID_START_PROPERTY_) ), 308 new BinaryProperties( 1, ( 1 << IDEOGRAPHIC_PROPERTY_) ), 309 new BinaryProperties( 1, ( 1 << IDS_BINARY_OPERATOR_PROPERTY_) ), 310 new BinaryProperties( 1, ( 1 << IDS_TRINARY_OPERATOR_PROPERTY_) ), 311 new BinaryProperties( SRC_BIDI, 0 ), 312 new BinaryProperties( 1, ( 1 << LOGICAL_ORDER_EXCEPTION_PROPERTY_) ), 313 new BinaryProperties( SRC_CASE, 0 ), 314 new BinaryProperties( 1, ( 1 << MATH_PROPERTY_) ), 315 new BinaryProperties( 1, ( 1 << NONCHARACTER_CODE_POINT_PROPERTY_) ), 316 new BinaryProperties( 1, ( 1 << QUOTATION_MARK_PROPERTY_) ), 317 new BinaryProperties( 1, ( 1 << RADICAL_PROPERTY_) ), 318 new BinaryProperties( SRC_CASE, 0 ), 319 new BinaryProperties( 1, ( 1 << TERMINAL_PUNCTUATION_PROPERTY_) ), 320 new BinaryProperties( 1, ( 1 << UNIFIED_IDEOGRAPH_PROPERTY_) ), 321 new BinaryProperties( SRC_CASE, 0 ), 322 new BinaryProperties( 1, ( 1 << WHITE_SPACE_PROPERTY_) ), 323 new BinaryProperties( 1, ( 1 << XID_CONTINUE_PROPERTY_) ), 324 new BinaryProperties( 1, ( 1 << XID_START_PROPERTY_) ), 325 new BinaryProperties( SRC_CASE, 0 ), 326 new BinaryProperties( 2, ( 1 << V2_S_TERM_PROPERTY_) ), 327 new BinaryProperties( 2, ( 1 << V2_VARIATION_SELECTOR_PROPERTY_) ), 328 new BinaryProperties( SRC_NORM, 0 ), 329 new BinaryProperties( SRC_NORM, 0 ), 330 new BinaryProperties( SRC_NORM, 0 ), 331 new BinaryProperties( SRC_NORM, 0 ), 332 new BinaryProperties( SRC_NORM, 0 ), 333 new BinaryProperties( 2, ( 1 << V2_PATTERN_SYNTAX) ), 334 new BinaryProperties( 2, ( 1 << V2_PATTERN_WHITE_SPACE) ), 335 new BinaryProperties( SRC_CHAR_AND_PROPSVEC, 0 ), 336 new BinaryProperties( SRC_CHAR, 0 ), 337 new BinaryProperties( SRC_CHAR, 0 ), 338 new BinaryProperties( SRC_CHAR, 0 ), 339 new BinaryProperties( SRC_CHAR, 0 ) 340 }; 341 342 343 368 369 public boolean hasBinaryProperty(int codepoint, int property) 370 { 371 if(property <UProperty.BINARY_START || UProperty.BINARY_LIMIT<=property) { 372 return false; 374 } else { 375 long mask=binProps[property].mask; 376 int column=binProps[property].column; 377 if(mask!=0) { 378 return ((UNSIGNED_INT_MASK & getAdditional(codepoint, column)) & mask)!=0; 380 } else { 381 if(column==SRC_CASE) { 382 383 UCaseProps csp; 384 try { 385 csp = UCaseProps.getSingleton(); 386 } catch (IOException e) { 387 return false; 388 } 389 switch(property) { 390 case UProperty.LOWERCASE: 391 return UCaseProps.LOWER==csp.getType(codepoint); 392 case UProperty.UPPERCASE: 393 return UCaseProps.UPPER==csp.getType(codepoint); 394 case UProperty.SOFT_DOTTED: 395 return csp.isSoftDotted(codepoint); 396 case UProperty.CASE_SENSITIVE: 397 return csp.isCaseSensitive(codepoint); 398 default: 399 break; 400 } 401 } else if(column==SRC_NORM) { 402 403 switch(property) { 404 case UProperty.FULL_COMPOSITION_EXCLUSION: 405 return NormalizerImpl.isFullCompositionExclusion(codepoint); 406 case UProperty.NFD_INERT: 407 return Normalizer.isNFSkippable(codepoint, Normalizer.NFD); 408 case UProperty.NFKD_INERT: 409 return Normalizer.isNFSkippable(codepoint, Normalizer.NFKD); 410 case UProperty.NFC_INERT: 411 return Normalizer.isNFSkippable(codepoint, Normalizer.NFC); 412 case UProperty.NFKC_INERT: 413 return Normalizer.isNFSkippable(codepoint, Normalizer.NFKC); 414 case UProperty.SEGMENT_STARTER: 415 return NormalizerImpl.isCanonSafeStart(codepoint); 416 default: 417 break; 418 } 419 } else if(column==SRC_BIDI) { 420 421 UBiDiProps bdp; 422 try { 423 bdp = UBiDiProps.getSingleton(); 424 } catch (IOException e) { 425 return false; 426 } 427 switch(property) { 428 case UProperty.BIDI_MIRRORED: 429 return bdp.isMirrored(codepoint); 430 case UProperty.BIDI_CONTROL: 431 return bdp.isBidiControl(codepoint); 432 case UProperty.JOIN_CONTROL: 433 return bdp.isJoinControl(codepoint); 434 default: 435 break; 436 } 437 } else if(column==SRC_CHAR) { 438 switch(property) { 439 case UProperty.POSIX_BLANK: 440 if(codepoint<=0x9f) { 442 return codepoint==9 || codepoint==0x20; 443 } else { 444 445 return UCharacter.getType(codepoint)==UCharacter.SPACE_SEPARATOR; 446 } 447 case UProperty.POSIX_GRAPH: 448 return isgraphPOSIX(codepoint); 449 case UProperty.POSIX_PRINT: 450 456 return (UCharacter.getType(codepoint)==UCharacter.SPACE_SEPARATOR) || isgraphPOSIX(codepoint); 457 case UProperty.POSIX_XDIGIT: 458 459 if( 460 (codepoint<=0x66 && codepoint>=0x41 && (codepoint<=0x46 || codepoint>=0x61)) || 461 (codepoint>=0xff21 && codepoint<=0xff46 && (codepoint<=0xff26 || codepoint>=0xff41)) 462 ) { 463 return true; 464 } 465 466 return UCharacter.getType(codepoint)==UCharacter.DECIMAL_DIGIT_NUMBER; 467 default: 468 break; 469 } 470 } else if(column==SRC_CHAR_AND_PROPSVEC) { 471 switch(property) { 472 case UProperty.POSIX_ALNUM: 473 return UCharacter.isUAlphabetic(codepoint) || UCharacter.isDigit(codepoint); 474 default: 475 break; 476 } 477 } 478 } 479 } 480 return false; 481 } 482 483 public final int getSource(int which) { 484 if(which<UProperty.BINARY_START) { 485 return SRC_NONE; 486 } else if(which<UProperty.BINARY_LIMIT) { 487 if(binProps[which].mask!=0) { 488 return SRC_PROPSVEC; 489 } else { 490 return binProps[which].column; 491 } 492 } else if(which<UProperty.INT_START) { 493 return SRC_NONE; 494 } else if(which<UProperty.INT_LIMIT) { 495 switch(which) { 496 case UProperty.GENERAL_CATEGORY: 497 case UProperty.NUMERIC_TYPE: 498 return SRC_CHAR; 499 500 case UProperty.HANGUL_SYLLABLE_TYPE: 501 return SRC_HST; 502 503 case UProperty.CANONICAL_COMBINING_CLASS: 504 case UProperty.NFD_QUICK_CHECK: 505 case UProperty.NFKD_QUICK_CHECK: 506 case UProperty.NFC_QUICK_CHECK: 507 case UProperty.NFKC_QUICK_CHECK: 508 case UProperty.LEAD_CANONICAL_COMBINING_CLASS: 509 case UProperty.TRAIL_CANONICAL_COMBINING_CLASS: 510 return SRC_NORM; 511 512 case UProperty.BIDI_CLASS: 513 case UProperty.JOINING_GROUP: 514 case UProperty.JOINING_TYPE: 515 return SRC_BIDI; 516 517 default: 518 return SRC_PROPSVEC; 519 } 520 } else if(which<UProperty.STRING_START) { 521 switch(which) { 522 case UProperty.GENERAL_CATEGORY_MASK: 523 case UProperty.NUMERIC_VALUE: 524 return SRC_CHAR; 525 526 default: 527 return SRC_NONE; 528 } 529 } else if(which<UProperty.STRING_LIMIT) { 530 switch(which) { 531 case UProperty.AGE: 532 return SRC_PROPSVEC; 533 534 case UProperty.BIDI_MIRRORING_GLYPH: 535 return SRC_BIDI; 536 537 case UProperty.CASE_FOLDING: 538 case UProperty.LOWERCASE_MAPPING: 539 case UProperty.SIMPLE_CASE_FOLDING: 540 case UProperty.SIMPLE_LOWERCASE_MAPPING: 541 case UProperty.SIMPLE_TITLECASE_MAPPING: 542 case UProperty.SIMPLE_UPPERCASE_MAPPING: 543 case UProperty.TITLECASE_MAPPING: 544 case UProperty.UPPERCASE_MAPPING: 545 return SRC_CASE; 546 547 case UProperty.ISO_COMMENT: 548 case UProperty.NAME: 549 case UProperty.UNICODE_1_NAME: 550 return SRC_NAMES; 551 552 default: 553 return SRC_NONE; 554 } 555 } else { 556 return SRC_NONE; 557 } 558 } 559 560 568 public static int getRawSupplementary(char lead, char trail) 569 { 570 return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_; 571 } 572 573 577 public static UCharacterProperty getInstance() 578 { 579 if(INSTANCE_ == null) { 580 try { 581 INSTANCE_ = new UCharacterProperty(); 582 } 583 catch (Exception e) { 584 throw new MissingResourceException (e.getMessage(),"",""); 585 } 586 } 587 return INSTANCE_; 588 } 589 590 609 662 663 672 public static boolean isRuleWhiteSpace(int c) 673 { 674 680 return (c >= 0x0009 && c <= 0x2029 && 681 (c <= 0x000D || c == 0x0020 || c == 0x0085 || 682 c == 0x200E || c == 0x200F || c >= 0x2028)); 683 } 684 685 689 public int getMaxValues(int column) 690 { 691 693 switch(column) { 694 case 0: 695 return m_maxBlockScriptValue_; 696 case 2: 697 return m_maxJTGValue_; 698 default: 699 return 0; 700 } 701 } 702 703 708 public static final int getMask(int type) 709 { 710 return 1 << type; 711 } 712 713 715 718 CharTrie m_additionalTrie_; 719 723 int m_additionalVectors_[]; 724 727 int m_additionalColumnsCount_; 728 732 int m_maxBlockScriptValue_; 733 737 int m_maxJTGValue_; 738 740 743 private static UCharacterProperty INSTANCE_ = null; 744 745 748 private static final String DATA_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/uprops.icu"; 749 750 753 private static final int DATA_BUFFER_SIZE_ = 25000; 754 755 758 private static final int VALUE_SHIFT_ = 8; 759 760 763 private static final int UNSIGNED_VALUE_MASK_AFTER_SHIFT_ = 0xFF; 764 765 768 private static final int NUMERIC_TYPE_SHIFT = 5; 769 770 773 private static final int LAST_5_BIT_MASK_ = 0x1F; 774 775 778 private static final int SHIFT_5_ = 5; 779 782 private static final int SHIFT_10_ = 10; 783 784 787 private static final int LEAD_SURROGATE_SHIFT_ = 10; 788 791 private static final int SURROGATE_OFFSET_ = 792 UTF16.SUPPLEMENTARY_MIN_VALUE - 793 (UTF16.SURROGATE_MIN_VALUE << 794 LEAD_SURROGATE_SHIFT_) - 795 UTF16.TRAIL_SURROGATE_MIN_VALUE; 796 799 private static final char LATIN_CAPITAL_LETTER_I_ = 0x49; 800 803 private static final char COMBINING_DOT_ABOVE_ = 0x307; 804 807 private static final int LATIN_SMALL_LETTER_J_ = 0x6a; 808 811 private static final int LATIN_SMALL_LETTER_I_WITH_OGONEK_ = 0x12f; 812 815 private static final int LATIN_SMALL_LETTER_I_WITH_TILDE_BELOW_ = 0x1e2d; 816 819 private static final int LATIN_SMALL_LETTER_I_WITH_DOT_BELOW_ = 0x1ecb; 820 823 private static final int COMBINING_MARK_ABOVE_CLASS_ = 230; 824 825 828 private static final int LATIN_CAPITAL_LETTER_J_ = 0x4a; 829 830 833 private static final int LATIN_CAPITAL_I_WITH_OGONEK_ = 0x12e; 834 837 private static final int LATIN_CAPITAL_I_WITH_TILDE_ = 0x128; 838 841 private static final int LATIN_CAPITAL_I_WITH_GRAVE_ = 0xcc; 842 845 private static final int LATIN_CAPITAL_I_WITH_ACUTE_ = 0xcd; 846 849 private static final int COMBINING_GRAVE_ACCENT_ = 0x300; 850 853 private static final int COMBINING_ACUTE_ACCENT_ = 0x301; 854 857 private static final int COMBINING_TILDE_ = 0x303; 858 861 private static final char GREEK_CAPITAL_LETTER_SIGMA_ = 0x3a3; 862 865 private static final char GREEK_SMALL_LETTER_SIGMA_ = 0x3c3; 866 869 private static final char GREEK_SMALL_LETTER_RHO_ = 0x3c2; 870 873 private static final int HYPHEN_ = 0x2010; 874 private static final int SOFT_HYPHEN_ = 0xAD; 875 878 private static final int LAST_CHAR_MASK_ = 0xFFFF; 879 882 private static final int LAST_BYTE_MASK_ = 0xFF; 883 886 private static final int SHIFT_16_ = 16; 887 888 890 893 904 private static final int WHITE_SPACE_PROPERTY_ = 0; 905 private static final int BIDI_CONTROL_PROPERTY_ = 1; 906 private static final int JOIN_CONTROL_PROPERTY_ = 2; 907 private static final int DASH_PROPERTY_ = 3; 908 private static final int HYPHEN_PROPERTY_ = 4; 909 private static final int QUOTATION_MARK_PROPERTY_ = 5; 910 private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 6; 911 private static final int MATH_PROPERTY_ = 7; 912 private static final int HEX_DIGIT_PROPERTY_ = 8; 913 private static final int ASCII_HEX_DIGIT_PROPERTY_ = 9; 914 private static final int ALPHABETIC_PROPERTY_ = 10; 915 private static final int IDEOGRAPHIC_PROPERTY_ = 11; 916 private static final int DIACRITIC_PROPERTY_ = 12; 917 private static final int EXTENDER_PROPERTY_ = 13; 918 private static final int LOWERCASE_PROPERTY_ = 14; 919 private static final int UPPERCASE_PROPERTY_ = 15; 920 private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 16; 921 private static final int GRAPHEME_EXTEND_PROPERTY_ = 17; 922 private static final int GRAPHEME_LINK_PROPERTY_ = 18; 923 private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 19; 924 private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 20; 925 private static final int RADICAL_PROPERTY_ = 21; 926 private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 22; 927 private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 23; 928 private static final int DEPRECATED_PROPERTY_ = 24; 929 private static final int SOFT_DOTTED_PROPERTY_ = 25; 930 private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 26; 931 private static final int XID_START_PROPERTY_ = 27; 932 private static final int XID_CONTINUE_PROPERTY_ = 28; 933 private static final int ID_START_PROPERTY_ = 29; 934 private static final int ID_CONTINUE_PROPERTY_ = 30; 935 private static final int GRAPHEME_BASE_PROPERTY_ = 31; 936 private static final int BINARY_1_TOP_PROPERTY_ = 32; 937 938 941 private static final int FIRST_NIBBLE_SHIFT_ = 0x4; 942 945 private static final int LAST_NIBBLE_MASK_ = 0xF; 946 949 private static final int AGE_SHIFT_ = 24; 950 951 private static final int V2_S_TERM_PROPERTY_ = 24; 953 private static final int V2_VARIATION_SELECTOR_PROPERTY_ = 25; 954 private static final int V2_PATTERN_SYNTAX = 26; 955 private static final int V2_PATTERN_WHITE_SPACE = 27; 956 957 959 963 private UCharacterProperty() throws IOException 964 { 965 InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_); 967 BufferedInputStream b = new BufferedInputStream (is, DATA_BUFFER_SIZE_); 968 UCharacterPropertyReader reader = new UCharacterPropertyReader(b); 969 reader.read(this); 970 b.close(); 971 972 m_trie_.putIndexData(this); 973 } 974 975 977 983 987 988 990 private static final int TAB = 0x0009; 991 private static final int LF = 0x000a; 992 private static final int FF = 0x000c; 993 private static final int CR = 0x000d; 994 private static final int U_A = 0x0041; 995 private static final int U_F = 0x0046; 996 private static final int U_Z = 0x005a; 997 private static final int U_a = 0x0061; 998 private static final int U_f = 0x0066; 999 private static final int U_z = 0x007a; 1000 private static final int DEL = 0x007f; 1001 private static final int NL = 0x0085; 1002 private static final int NBSP = 0x00a0; 1003 private static final int CGJ = 0x034f; 1004 private static final int FIGURESP= 0x2007; 1005 private static final int HAIRSP = 0x200a; 1006 private static final int ZWNJ = 0x200c; 1007 private static final int ZWJ = 0x200d; 1008 private static final int RLM = 0x200f; 1009 private static final int NNBSP = 0x202f; 1010 private static final int WJ = 0x2060; 1011 private static final int INHSWAP = 0x206a; 1012 private static final int NOMDIG = 0x206f; 1013 private static final int U_FW_A = 0xff21; 1014 private static final int U_FW_F = 0xff26; 1015 private static final int U_FW_Z = 0xff3a; 1016 private static final int U_FW_a = 0xff41; 1017 private static final int U_FW_f = 0xff46; 1018 private static final int U_FW_z = 0xff5a; 1019 private static final int ZWNBSP = 0xfeff; 1020 1021 1022 public void uhst_addPropertyStarts(UnicodeSet set) { 1023 1024 1025 1033 int c, value, value2; 1034 1035 set.add(0x1100); 1036 value=UCharacter.HangulSyllableType.LEADING_JAMO; 1037 for(c=0x115a; c<=0x115f; ++c) { 1038 value2= UCharacter.getIntPropertyValue(c, UProperty.HANGUL_SYLLABLE_TYPE); 1039 if(value!=value2) { 1040 value=value2; 1041 set.add(c); 1042 } 1043 } 1044 1045 set.add(0x1160); 1046 value=UCharacter.HangulSyllableType.VOWEL_JAMO; 1047 for(c=0x11a3; c<=0x11a7; ++c) { 1048 value2=UCharacter.getIntPropertyValue(c, UProperty.HANGUL_SYLLABLE_TYPE); 1049 if(value!=value2) { 1050 value=value2; 1051 set.add(c); 1052 } 1053 } 1054 1055 set.add(0x11a8); 1056 value=UCharacter.HangulSyllableType.TRAILING_JAMO; 1057 for(c=0x11fa; c<=0x11ff; ++c) { 1058 value2=UCharacter.getIntPropertyValue(c, UProperty.HANGUL_SYLLABLE_TYPE); 1059 if(value!=value2) { 1060 value=value2; 1061 set.add(c); 1062 } 1063 } 1064 } 1065 1066 public UnicodeSet addPropertyStarts(UnicodeSet set) { 1067 1068 TrieIterator propsIter = new TrieIterator(m_trie_); 1069 RangeValueIterator.Element propsResult = new RangeValueIterator.Element(); 1070 while(propsIter.next(propsResult)){ 1071 set.add(propsResult.start); 1072 } 1073 1074 1075 1076 1077 set.add(TAB); 1078 set.add(TAB+1); 1079 1080 1081 set.add(CR+1); 1082 set.add(0x1c); 1083 set.add(0x1f+1); 1084 set.add(NL); 1085 set.add(NL+1); 1086 1087 1088 set.add(DEL); 1089 set.add(HAIRSP); 1090 set.add(RLM+1); 1091 set.add(INHSWAP); 1092 set.add(NOMDIG+1); 1093 set.add(ZWNBSP); 1094 set.add(ZWNBSP+1); 1095 1096 1097 set.add(NBSP); 1098 set.add(NBSP+1); 1099 set.add(FIGURESP); 1100 set.add(FIGURESP+1); 1101 set.add(NNBSP); 1102 set.add(NNBSP+1); 1103 1104 1105 set.add(0x3007); 1108 set.add(0x3008); 1109 set.add(0x4e00); 1110 set.add(0x4e01); 1111 set.add(0x4e8c); 1112 set.add(0x4e8d); 1113 set.add(0x4e09); 1114 set.add(0x4e0a); 1115 set.add(0x56db); 1116 set.add(0x56dc); 1117 set.add(0x4e94); 1118 set.add(0x4e95); 1119 set.add(0x516d); 1120 set.add(0x516e); 1121 set.add(0x4e03); 1122 set.add(0x4e04); 1123 set.add(0x516b); 1124 set.add(0x516c); 1125 set.add(0x4e5d); 1126 set.add(0x4e5e); 1127 1128 1129 set.add(U_a); 1130 set.add(U_z+1); 1131 set.add(U_A); 1132 set.add(U_Z+1); 1133 set.add(U_FW_a); 1134 set.add(U_FW_z+1); 1135 set.add(U_FW_A); 1136 set.add(U_FW_Z+1); 1137 1138 1139 set.add(U_f+1); 1140 set.add(U_F+1); 1141 set.add(U_FW_f+1); 1142 set.add(U_FW_F+1); 1143 1144 1145 set.add(WJ); 1146 set.add(0xfff0); 1147 set.add(0xfffb+1); 1148 set.add(0xe0000); 1149 set.add(0xe0fff+1); 1150 1151 1152 set.add(CGJ); 1153 set.add(CGJ+1); 1154 1155 return set; } 1157 1158 public void upropsvec_addPropertyStarts(UnicodeSet set) { 1159 1160 if(m_additionalColumnsCount_>0) { 1161 1162 TrieIterator propsVectorsIter = new TrieIterator(m_additionalTrie_); 1163 RangeValueIterator.Element propsVectorsResult = new RangeValueIterator.Element(); 1164 while(propsVectorsIter.next(propsVectorsResult)){ 1165 set.add(propsVectorsResult.start); 1166 } 1167 } 1168 } 1169 1170 1173 1174 1263 1271} 1272 | Popular Tags |