1 7 package com.ibm.icu.impl; 8 9 import java.io.InputStream ; 10 import java.io.BufferedInputStream ; 11 import java.io.IOException ; 12 import java.util.MissingResourceException ; 13 14 import com.ibm.icu.text.UTF16; 15 import com.ibm.icu.text.UnicodeSet; 16 import com.ibm.icu.lang.UCharacter; 17 import com.ibm.icu.lang.UCharacterCategory; 18 19 34 35 public final class UCharacterName 36 { 37 39 43 public static final int LINES_PER_GROUP_ = 1 << 5; 44 47 public int m_groupcount_ = 0; 48 49 51 56 public static UCharacterName getInstance() 57 { 58 if (INSTANCE_ == null) { 59 try { 60 INSTANCE_ = new UCharacterName(); 61 }catch(IOException e){ 62 throw new MissingResourceException ("Could not construct UCharacterName. Missing unames.icu","",""); 63 } 64 catch (Exception e) { 65 throw new MissingResourceException (e.getMessage(),"",""); 66 } 67 } 68 return INSTANCE_; 69 } 70 71 83 public String getName(int ch, int choice) 84 { 85 if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE || 86 choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) { 87 return null; 88 } 89 90 String result = null; 91 92 result = getAlgName(ch, choice); 93 94 if (result == null || result.length() == 0) { 96 if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 97 result = getExtendedName(ch); 98 } else { 99 result = getGroupName(ch, choice); 100 } 101 } 102 103 return result; 104 } 105 106 113 public int getCharFromName(int choice, String name) 114 { 115 if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT || 117 name == null || name.length() == 0) { 118 return -1; 119 } 120 121 int result = getExtendedChar(name.toLowerCase(), choice); 123 if (result >= -1) { 124 return result; 125 } 126 127 String upperCaseName = name.toUpperCase(); 128 131 if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) { 132 int count = 0; 133 if (m_algorithm_ != null) { 134 count = m_algorithm_.length; 135 } 136 for (count --; count >= 0; count --) { 137 result = m_algorithm_[count].getChar(upperCaseName); 138 if (result >= 0) { 139 return result; 140 } 141 } 142 } 143 144 if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 145 result = getGroupChar(upperCaseName, 146 UCharacterNameChoice.UNICODE_CHAR_NAME); 147 if (result == -1) { 148 result = getGroupChar(upperCaseName, 149 UCharacterNameChoice.UNICODE_10_CHAR_NAME); 150 } 151 } 152 else { 153 result = getGroupChar(upperCaseName, choice); 154 } 155 return result; 156 } 157 158 160 176 public int getGroupLengths(int index, char offsets[], char lengths[]) 177 { 178 char length = 0xffff; 179 byte b = 0, 180 n = 0; 181 int shift; 182 index = index * m_groupsize_; int stringoffset = UCharacterUtility.toInt( 184 m_groupinfo_[index + OFFSET_HIGH_OFFSET_], 185 m_groupinfo_[index + OFFSET_LOW_OFFSET_]); 186 187 offsets[0] = 0; 188 189 for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) { 192 b = m_groupstring_[stringoffset]; 193 shift = 4; 194 195 while (shift >= 0) { 196 n = (byte)((b >> shift) & 0x0F); 198 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) { 199 length = (char)((n - 12) << 4); 200 } 201 else { 202 if (length != 0xffff) { 203 lengths[i] = (char)((length | n) + 12); 204 } 205 else { 206 lengths[i] = (char)n; 207 } 208 209 if (i < LINES_PER_GROUP_) { 210 offsets[i + 1] = (char)(offsets[i] + lengths[i]); 211 } 212 213 length = 0xffff; 214 i ++; 215 } 216 217 shift -= 4; 218 } 219 } 220 return stringoffset; 221 } 222 223 237 public String getGroupName(int index, int length, int choice) 238 { 239 if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME 240 || choice == UCharacterNameChoice.ISO_COMMENT_) { 241 if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) { 242 int oldindex = index; 244 index += UCharacterUtility.skipByteSubString(m_groupstring_, 245 index, length, (byte)';'); 246 length -= (index - oldindex); 247 if (choice == UCharacterNameChoice.ISO_COMMENT_) { 248 oldindex = index; 250 index += UCharacterUtility.skipByteSubString(m_groupstring_, 251 index, length, (byte)';'); 252 length -= (index - oldindex); 253 } 254 } 255 else { 256 length = 0; 260 } 261 } 262 263 synchronized (m_utilStringBuffer_) { 264 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 265 byte b; 266 char token; 267 for (int i = 0; i < length;) { 268 b = m_groupstring_[index + i]; 269 i ++; 270 271 if (b >= m_tokentable_.length) { 272 if (b == ';') { 273 break; 274 } 275 m_utilStringBuffer_.append(b); } 277 else { 278 token = m_tokentable_[b & 0x00ff]; 279 if (token == 0xFFFE) { 280 token = m_tokentable_[b << 8 | 282 (m_groupstring_[index + i] & 0x00ff)]; 283 i ++; 284 } 285 if (token == 0xFFFF) { 286 if (b == ';') { 287 if (m_utilStringBuffer_.length() == 0 && choice == 291 UCharacterNameChoice.EXTENDED_CHAR_NAME) { 292 continue; 293 } 294 break; 295 } 296 m_utilStringBuffer_.append((char)(b & 0x00ff)); 298 } 299 else { UCharacterUtility.getNullTermByteSubString( 301 m_utilStringBuffer_, m_tokenstring_, token); 302 } 303 } 304 } 305 306 if (m_utilStringBuffer_.length() > 0) { 307 return m_utilStringBuffer_.toString(); 308 } 309 } 310 return null; 311 } 312 313 316 public String getExtendedName(int ch) 317 { 318 String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 319 if (result == null) { 320 if (getType(ch) == UCharacterCategory.CONTROL) { 321 result = getName(ch, 322 UCharacterNameChoice.UNICODE_10_CHAR_NAME); 323 } 324 if (result == null) { 325 result = getExtendedOr10Name(ch); 326 } 327 } 328 return result; 329 } 330 331 336 public int getGroup(int codepoint) 337 { 338 int endGroup = m_groupcount_; 339 int msb = getCodepointMSB(codepoint); 340 int result = 0; 341 while (result < endGroup - 1) { 345 int gindex = (result + endGroup) >> 1; 346 if (msb < getGroupMSB(gindex)) { 347 endGroup = gindex; 348 } 349 else { 350 result = gindex; 351 } 352 } 353 return result; 354 } 355 356 362 public String getExtendedOr10Name(int ch) 363 { 364 String result = null; 365 if (getType(ch) == UCharacterCategory.CONTROL) { 366 result = getName(ch, 367 UCharacterNameChoice.UNICODE_10_CHAR_NAME); 368 } 369 if (result == null) { 370 int type = getType(ch); 371 if (type >= TYPE_NAMES_.length) { 374 result = UNKNOWN_TYPE_NAME_; 375 } 376 else { 377 result = TYPE_NAMES_[type]; 378 } 379 synchronized (m_utilStringBuffer_) { 380 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 381 m_utilStringBuffer_.append('<'); 382 m_utilStringBuffer_.append(result); 383 m_utilStringBuffer_.append('-'); 384 String chStr = Integer.toHexString(ch).toUpperCase(); 385 int zeros = 4 - chStr.length(); 386 while (zeros > 0) { 387 m_utilStringBuffer_.append('0'); 388 zeros --; 389 } 390 m_utilStringBuffer_.append(chStr); 391 m_utilStringBuffer_.append('>'); 392 result = m_utilStringBuffer_.toString(); 393 } 394 } 395 return result; 396 } 397 398 403 public int getGroupMSB(int gindex) 404 { 405 if (gindex >= m_groupcount_) { 406 return -1; 407 } 408 return m_groupinfo_[gindex * m_groupsize_]; 409 } 410 411 416 public static int getCodepointMSB(int codepoint) 417 { 418 return codepoint >> GROUP_SHIFT_; 419 } 420 421 426 public static int getGroupLimit(int msb) 427 { 428 return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_; 429 } 430 431 436 public static int getGroupMin(int msb) 437 { 438 return msb << GROUP_SHIFT_; 439 } 440 441 446 public static int getGroupOffset(int codepoint) 447 { 448 return codepoint & GROUP_MASK_; 449 } 450 451 456 public static int getGroupMinFromCodepoint(int codepoint) 458 { 459 return codepoint & ~GROUP_MASK_; 460 } 461 463 467 public int getAlgorithmLength() 468 { 469 return m_algorithm_.length; 470 } 471 472 477 public int getAlgorithmStart(int index) 478 { 479 return m_algorithm_[index].m_rangestart_; 480 } 481 482 487 public int getAlgorithmEnd(int index) 488 { 489 return m_algorithm_[index].m_rangeend_; 490 } 491 492 498 public String getAlgorithmName(int index, int codepoint) 499 { 500 String result = null; 501 synchronized (m_utilStringBuffer_) { 502 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 503 m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_); 504 result = m_utilStringBuffer_.toString(); 505 } 506 return result; 507 } 508 509 514 public String getGroupName(int ch, int choice) 515 { 516 int msb = getCodepointMSB(ch); 518 int group = getGroup(ch); 519 520 if (msb == m_groupinfo_[group * m_groupsize_]) { 522 int index = getGroupLengths(group, m_groupoffsets_, 523 m_grouplengths_); 524 int offset = ch & GROUP_MASK_; 525 return getGroupName(index + m_groupoffsets_[offset], 526 m_grouplengths_[offset], choice); 527 } 528 529 return null; 530 } 531 532 534 539 public int getMaxCharNameLength() 540 { 541 if (initNameSetsLengths()) { 542 return m_maxNameLength_; 543 } 544 else { 545 return 0; 546 } 547 } 548 549 554 public int getMaxISOCommentLength() 556 { 557 if (initNameSetsLengths()) { 558 return m_maxISOCommentLength_; 559 } 560 else { 561 return 0; 562 } 563 } 564 566 571 public void getCharNameCharacters(UnicodeSet set) 572 { 573 convert(m_nameSet_, set); 574 } 575 576 581 public void getISOCommentCharacters(UnicodeSet set) 583 { 584 convert(m_ISOCommentSet_, set); 585 } 586 588 590 593 static final class AlgorithmName 594 { 595 597 600 static final int TYPE_0_ = 0; 601 static final int TYPE_1_ = 1; 602 603 605 608 AlgorithmName() 609 { 610 } 611 612 614 624 boolean setInfo(int rangestart, int rangeend, byte type, byte variant) 625 { 626 if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend 627 && rangeend <= UCharacter.MAX_VALUE && 628 (type == TYPE_0_ || type == TYPE_1_)) { 629 m_rangestart_ = rangestart; 630 m_rangeend_ = rangeend; 631 m_type_ = type; 632 m_variant_ = variant; 633 return true; 634 } 635 return false; 636 } 637 638 643 boolean setFactor(char factor[]) 644 { 645 if (factor.length == m_variant_) { 646 m_factor_ = factor; 647 return true; 648 } 649 return false; 650 } 651 652 657 boolean setPrefix(String prefix) 658 { 659 if (prefix != null && prefix.length() > 0) { 660 m_prefix_ = prefix; 661 return true; 662 } 663 return false; 664 } 665 666 671 boolean setFactorString(byte string[]) 672 { 673 m_factorstring_ = string; 676 return true; 677 } 678 679 683 boolean contains(int ch) 684 { 685 return m_rangestart_ <= ch && ch <= m_rangeend_; 686 } 687 688 695 void appendName(int ch, StringBuffer str) 696 { 697 str.append(m_prefix_); 698 switch (m_type_) 699 { 700 case TYPE_0_: 701 Utility.hex(ch, m_variant_, str); 703 break; 704 case TYPE_1_: 705 int offset = ch - m_rangestart_; 707 int indexes[] = m_utilIntBuffer_; 708 int factor; 709 710 synchronized (m_utilIntBuffer_) { 714 for (int i = m_variant_ - 1; i > 0; i --) 715 { 716 factor = m_factor_[i] & 0x00FF; 717 indexes[i] = offset % factor; 718 offset /= factor; 719 } 720 721 indexes[0] = offset; 725 726 str.append(getFactorString(indexes, m_variant_)); 728 } 729 break; 730 } 731 } 732 733 737 int getChar(String name) 738 { 739 int prefixlen = m_prefix_.length(); 740 if (name.length() < prefixlen || 741 !m_prefix_.equals(name.substring(0, prefixlen))) { 742 return -1; 743 } 744 745 switch (m_type_) 746 { 747 case TYPE_0_ : 748 try 749 { 750 int result = Integer.parseInt(name.substring(prefixlen), 751 16); 752 if (m_rangestart_ <= result && result <= m_rangeend_) { 754 return result; 755 } 756 } 757 catch (NumberFormatException e) 758 { 759 return -1; 760 } 761 break; 762 case TYPE_1_ : 763 for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++) 766 { 767 int offset = ch - m_rangestart_; 768 int indexes[] = m_utilIntBuffer_; 769 int factor; 770 771 synchronized (m_utilIntBuffer_) { 775 for (int i = m_variant_ - 1; i > 0; i --) 776 { 777 factor = m_factor_[i] & 0x00FF; 778 indexes[i] = offset % factor; 779 offset /= factor; 780 } 781 782 indexes[0] = offset; 786 787 if (compareFactorString(indexes, m_variant_, name, 789 prefixlen)) { 790 return ch; 791 } 792 } 793 } 794 } 795 796 return -1; 797 } 798 799 807 int add(int set[], int maxlength) 808 { 809 int length = UCharacterName.add(set, m_prefix_); 811 switch (m_type_) { 812 case TYPE_0_ : { 813 length += m_variant_; 816 819 break; 820 } 821 case TYPE_1_ : { 822 for (int i = m_variant_ - 1; i > 0; i --) 826 { 827 int maxfactorlength = 0; 828 int count = 0; 829 for (int factor = m_factor_[i]; factor > 0; -- factor) { 830 synchronized (m_utilStringBuffer_) { 831 m_utilStringBuffer_.delete(0, 832 m_utilStringBuffer_.length()); 833 count 834 = UCharacterUtility.getNullTermByteSubString( 835 m_utilStringBuffer_, 836 m_factorstring_, count); 837 UCharacterName.add(set, m_utilStringBuffer_); 838 if (m_utilStringBuffer_.length() 839 > maxfactorlength) 840 { 841 maxfactorlength 842 = m_utilStringBuffer_.length(); 843 } 844 } 845 } 846 length += maxfactorlength; 847 } 848 } 849 } 850 if (length > maxlength) { 851 return length; 852 } 853 return maxlength; 854 } 855 856 858 861 private int m_rangestart_; 862 private int m_rangeend_; 863 private byte m_type_; 864 private byte m_variant_; 865 private char m_factor_[]; 866 private String m_prefix_; 867 private byte m_factorstring_[]; 868 871 private StringBuffer m_utilStringBuffer_ = new StringBuffer (); 872 875 private int m_utilIntBuffer_[] = new int[256]; 876 877 879 886 private String getFactorString(int index[], int length) 887 { 888 int size = m_factor_.length; 889 if (index == null || length != size) { 890 return null; 891 } 892 893 synchronized (m_utilStringBuffer_) { 894 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 895 int count = 0; 896 int factor; 897 size --; 898 for (int i = 0; i <= size; i ++) { 899 factor = m_factor_[i]; 900 count = UCharacterUtility.skipNullTermByteSubString( 901 m_factorstring_, count, index[i]); 902 count = UCharacterUtility.getNullTermByteSubString( 903 m_utilStringBuffer_, m_factorstring_, 904 count); 905 if (i != size) { 906 count = UCharacterUtility.skipNullTermByteSubString( 907 m_factorstring_, count, 908 factor - index[i] - 1); 909 } 910 } 911 return m_utilStringBuffer_.toString(); 912 } 913 } 914 915 924 private boolean compareFactorString(int index[], int length, String str, 925 int offset) 926 { 927 int size = m_factor_.length; 928 if (index == null || length != size) 929 return false; 930 931 int count = 0; 932 int strcount = offset; 933 int factor; 934 size --; 935 for (int i = 0; i <= size; i ++) 936 { 937 factor = m_factor_[i]; 938 count = UCharacterUtility.skipNullTermByteSubString( 939 m_factorstring_, count, index[i]); 940 strcount = UCharacterUtility.compareNullTermByteSubString(str, 941 m_factorstring_, strcount, count); 942 if (strcount < 0) { 943 return false; 944 } 945 946 if (i != size) { 947 count = UCharacterUtility.skipNullTermByteSubString( 948 m_factorstring_, count, factor - index[i]); 949 } 950 } 951 if (strcount != str.length()) { 952 return false; 953 } 954 return true; 955 } 956 } 957 958 960 963 int m_groupsize_ = 0; 964 965 967 973 boolean setToken(char token[], byte tokenstring[]) 974 { 975 if (token != null && tokenstring != null && token.length > 0 && 976 tokenstring.length > 0) { 977 m_tokentable_ = token; 978 m_tokenstring_ = tokenstring; 979 return true; 980 } 981 return false; 982 } 983 984 989 boolean setAlgorithm(AlgorithmName alg[]) 990 { 991 if (alg != null && alg.length != 0) { 992 m_algorithm_ = alg; 993 return true; 994 } 995 return false; 996 } 997 998 1004 boolean setGroupCountSize(int count, int size) 1005 { 1006 if (count <= 0 || size <= 0) { 1007 return false; 1008 } 1009 m_groupcount_ = count; 1010 m_groupsize_ = size; 1011 return true; 1012 } 1013 1014 1020 boolean setGroup(char group[], byte groupstring[]) 1021 { 1022 if (group != null && groupstring != null && group.length > 0 && 1023 groupstring.length > 0) { 1024 m_groupinfo_ = group; 1025 m_groupstring_ = groupstring; 1026 return true; 1027 } 1028 return false; 1029 } 1030 1031 1033 1036 private char m_tokentable_[]; 1037 private byte m_tokenstring_[]; 1038 private char m_groupinfo_[]; 1039 private byte m_groupstring_[]; 1040 private AlgorithmName m_algorithm_[]; 1041 1042 1045 private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1]; 1046 private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1]; 1047 1048 1051 private static final String NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/unames.icu"; 1052 1055 private static final int GROUP_SHIFT_ = 5; 1056 1059 private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1; 1060 1063 private static final int NAME_BUFFER_SIZE_ = 100000; 1064 1065 1068 private static final int OFFSET_HIGH_OFFSET_ = 1; 1069 1070 1073 private static final int OFFSET_LOW_OFFSET_ = 2; 1074 1078 private static final int SINGLE_NIBBLE_MAX_ = 11; 1079 1080 1083 1087 1089 1093 private int m_nameSet_[] = new int[8]; 1094 1098 private int m_ISOCommentSet_[] = new int[8]; 1099 1102 private StringBuffer m_utilStringBuffer_ = new StringBuffer (); 1103 1106 private int m_utilIntBuffer_[] = new int[2]; 1107 1110 private int m_maxISOCommentLength_; 1111 1114 private int m_maxNameLength_; 1115 1118 private static UCharacterName INSTANCE_ = null; 1119 1122 private static final String TYPE_NAMES_[] = {"unassigned", 1123 "uppercase letter", 1124 "lowercase letter", 1125 "titlecase letter", 1126 "modifier letter", 1127 "other letter", 1128 "non spacing mark", 1129 "enclosing mark", 1130 "combining spacing mark", 1131 "decimal digit number", 1132 "letter number", 1133 "other number", 1134 "space separator", 1135 "line separator", 1136 "paragraph separator", 1137 "control", 1138 "format", 1139 "private use area", 1140 "surrogate", 1141 "dash punctuation", 1142 "start punctuation", 1143 "end punctuation", 1144 "connector punctuation", 1145 "other punctuation", 1146 "math symbol", 1147 "currency symbol", 1148 "modifier symbol", 1149 "other symbol", 1150 "initial punctuation", 1151 "final punctuation", 1152 "noncharacter", 1153 "lead surrogate", 1154 "trail surrogate"}; 1155 1158 private static final String UNKNOWN_TYPE_NAME_ = "unknown"; 1159 1162 private static final int NON_CHARACTER_ 1163 = UCharacterCategory.CHAR_CATEGORY_COUNT; 1164 1167 private static final int LEAD_SURROGATE_ 1168 = UCharacterCategory.CHAR_CATEGORY_COUNT + 1; 1169 1172 private static final int TRAIL_SURROGATE_ 1173 = UCharacterCategory.CHAR_CATEGORY_COUNT + 2; 1174 1177 static final int EXTENDED_CATEGORY_ 1178 = UCharacterCategory.CHAR_CATEGORY_COUNT + 3; 1179 1180 1182 1186 private UCharacterName() throws IOException 1187 { 1188 InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_); 1189 BufferedInputStream b = new BufferedInputStream (is, NAME_BUFFER_SIZE_); 1190 UCharacterNameReader reader = new UCharacterNameReader(b); 1191 reader.read(this); 1192 b.close(); 1193 } 1194 1195 1197 1203 private String getAlgName(int ch, int choice) 1204 { 1205 if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) { 1210 synchronized (m_utilStringBuffer_) { 1212 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 1213 1214 for (int index = m_algorithm_.length - 1; index >= 0; index --) 1215 { 1216 if (m_algorithm_[index].contains(ch)) { 1217 m_algorithm_[index].appendName(ch, m_utilStringBuffer_); 1218 return m_utilStringBuffer_.toString(); 1219 } 1220 } 1221 } 1222 } 1223 return null; 1224 } 1225 1226 1232 private synchronized int getGroupChar(String name, int choice) 1233 { 1234 for (int i = 0; i < m_groupcount_; i ++) { 1235 1237 int startgpstrindex = getGroupLengths(i, m_groupoffsets_, 1238 m_grouplengths_); 1239 1240 int result = getGroupChar(startgpstrindex, m_grouplengths_, name, 1242 choice); 1243 if (result != -1) { 1244 return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_) 1245 | result; 1246 } 1247 } 1248 return -1; 1249 } 1250 1251 1261 private int getGroupChar(int index, char length[], String name, 1262 int choice) 1263 { 1264 byte b = 0; 1265 char token; 1266 int len; 1267 int namelen = name.length(); 1268 int nindex; 1269 int count; 1270 1271 for (int result = 0; result <= LINES_PER_GROUP_; result ++) { 1272 nindex = 0; 1273 len = length[result]; 1274 1275 if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME) { 1276 int oldindex = index; 1277 index += UCharacterUtility.skipByteSubString(m_groupstring_, 1278 index, len, (byte)';'); 1279 len -= (index - oldindex); 1280 } 1281 1282 for (count = 0; count < len && nindex != -1 && nindex < namelen; 1285 ) { 1286 b = m_groupstring_[index + count]; 1287 count ++; 1288 1289 if (b >= m_tokentable_.length) { 1290 if (name.charAt(nindex ++) != (b & 0xFF)) { 1291 nindex = -1; 1292 } 1293 } 1294 else { 1295 token = m_tokentable_[b & 0xFF]; 1296 if (token == 0xFFFE) { 1297 token = m_tokentable_[b << 8 | 1299 (m_groupstring_[index + count] & 0x00ff)]; 1300 count ++; 1301 } 1302 if (token == 0xFFFF) { 1303 if (name.charAt(nindex ++) != (b & 0xFF)) { 1304 nindex = -1; 1305 } 1306 } 1307 else { 1308 nindex = UCharacterUtility.compareNullTermByteSubString( 1310 name, m_tokenstring_, nindex, token); 1311 } 1312 } 1313 } 1314 1315 if (namelen == nindex && 1316 (count == len || m_groupstring_[index + count] == ';')) { 1317 return result; 1318 } 1319 1320 index += len; 1321 } 1322 return -1; 1323 } 1324 1325 1330 private static int getType(int ch) 1331 { 1332 if (UCharacterUtility.isNonCharacter(ch)) { 1333 return NON_CHARACTER_; 1335 } 1336 int result = UCharacter.getType(ch); 1337 if (result == UCharacterCategory.SURROGATE) { 1338 if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { 1339 result = LEAD_SURROGATE_; 1340 } 1341 else { 1342 result = TRAIL_SURROGATE_; 1343 } 1344 } 1345 return result; 1346 } 1347 1348 1355 private static int getExtendedChar(String name, int choice) 1356 { 1357 if (name.charAt(0) == '<') { 1358 if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 1359 int endIndex = name.length() - 1; 1360 if (name.charAt(endIndex) == '>') { 1361 int startIndex = name.lastIndexOf('-'); 1362 if (startIndex >= 0) { startIndex ++; 1364 int result = -1; 1365 try { 1366 result = Integer.parseInt( 1367 name.substring(startIndex, endIndex), 1368 16); 1369 } 1370 catch (NumberFormatException e) { 1371 return -1; 1372 } 1373 String type = name.substring(1, startIndex - 1); 1376 int length = TYPE_NAMES_.length; 1377 for (int i = 0; i < length; ++ i) { 1378 if (type.compareTo(TYPE_NAMES_[i]) == 0) { 1379 if (getType(result) == i) { 1380 return result; 1381 } 1382 break; 1383 } 1384 } 1385 } 1386 } 1387 } 1388 return -1; 1389 } 1390 return -2; 1391 } 1392 1393 1395 1401 private static void add(int set[], char ch) 1402 { 1403 set[ch >>> 5] |= 1 << (ch & 0x1f); 1404 } 1405 1406 1413 private static boolean contains(int set[], char ch) 1414 { 1415 return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0; 1416 } 1417 1418 1424 private static int add(int set[], String str) 1425 { 1426 int result = str.length(); 1427 1428 for (int i = result - 1; i >= 0; i --) { 1429 add(set, str.charAt(i)); 1430 } 1431 return result; 1432 } 1433 1434 1440 private static int add(int set[], StringBuffer str) 1441 { 1442 int result = str.length(); 1443 1444 for (int i = result - 1; i >= 0; i --) { 1445 add(set, str.charAt(i)); 1446 } 1447 return result; 1448 } 1449 1450 1457 private int addAlgorithmName(int maxlength) 1458 { 1459 int result = 0; 1460 for (int i = m_algorithm_.length - 1; i >= 0; i --) { 1461 result = m_algorithm_[i].add(m_nameSet_, maxlength); 1462 if (result > maxlength) { 1463 maxlength = result; 1464 } 1465 } 1466 return maxlength; 1467 } 1468 1469 1475 private int addExtendedName(int maxlength) 1476 { 1477 for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) { 1478 int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]); 1484 if (length > maxlength) { 1485 maxlength = length; 1486 } 1487 } 1488 return maxlength; 1489 } 1490 1491 1501 private int[] addGroupName(int offset, int length, byte tokenlength[], 1502 int set[]) 1503 { 1504 int resultnlength = 0; 1505 int resultplength = 0; 1506 while (resultplength < length) { 1507 char b = (char)(m_groupstring_[offset + resultplength] & 0xff); 1508 resultplength ++; 1509 if (b == ';') { 1510 break; 1511 } 1512 1513 if (b >= m_tokentable_.length) { 1514 add(set, b); resultnlength ++; 1516 } 1517 else { 1518 char token = m_tokentable_[b & 0x00ff]; 1519 if (token == 0xFFFE) { 1520 b = (char)(b << 8 | (m_groupstring_[offset + resultplength] 1522 & 0x00ff)); 1523 token = m_tokentable_[b]; 1524 resultplength ++; 1525 } 1526 if (token == 0xFFFF) { 1527 add(set, b); 1528 resultnlength ++; 1529 } 1530 else { 1531 byte tlength = tokenlength[b]; 1534 if (tlength == 0) { 1535 synchronized (m_utilStringBuffer_) { 1536 m_utilStringBuffer_.delete(0, 1537 m_utilStringBuffer_.length()); 1538 UCharacterUtility.getNullTermByteSubString( 1539 m_utilStringBuffer_, m_tokenstring_, 1540 token); 1541 tlength = (byte)add(set, m_utilStringBuffer_); 1542 } 1543 tokenlength[b] = tlength; 1544 } 1545 resultnlength += tlength; 1546 } 1547 } 1548 } 1549 m_utilIntBuffer_[0] = resultnlength; 1550 m_utilIntBuffer_[1] = resultplength; 1551 return m_utilIntBuffer_; 1552 } 1553 1554 1561 private void addGroupName(int maxlength) 1562 { 1563 int maxisolength = 0; 1564 char offsets[] = new char[LINES_PER_GROUP_ + 2]; 1565 char lengths[] = new char[LINES_PER_GROUP_ + 2]; 1566 byte tokenlengths[] = new byte[m_tokentable_.length]; 1567 1568 for (int i = 0; i < m_groupcount_ ; i ++) { 1571 int offset = getGroupLengths(i, offsets, lengths); 1572 for (int linenumber = 0; linenumber < LINES_PER_GROUP_; 1576 linenumber ++) { 1577 int lineoffset = offset + offsets[linenumber]; 1578 int length = lengths[linenumber]; 1579 if (length == 0) { 1580 continue; 1581 } 1582 1583 int parsed[] = addGroupName(lineoffset, length, tokenlengths, 1585 m_nameSet_); 1586 if (parsed[0] > maxlength) { 1587 maxlength = parsed[0]; 1589 } 1590 lineoffset += parsed[1]; 1591 if (parsed[1] >= length) { 1592 continue; 1594 } 1595 length -= parsed[1]; 1596 parsed = addGroupName(lineoffset, length, tokenlengths, 1598 m_nameSet_); 1599 if (parsed[0] > maxlength) { 1600 maxlength = parsed[0]; 1602 } 1603 lineoffset += parsed[1]; 1604 if (parsed[1] >= length) { 1605 continue; 1607 } 1608 length -= parsed[1]; 1609 parsed = addGroupName(lineoffset, length, tokenlengths, 1611 m_ISOCommentSet_); 1612 if (parsed[1] > maxisolength) { 1613 maxisolength = length; 1614 } 1615 } 1616 } 1617 1618 m_maxISOCommentLength_ = maxisolength; 1620 m_maxNameLength_ = maxlength; 1621 } 1622 1623 1627 private boolean initNameSetsLengths() 1628 { 1629 if (m_maxNameLength_ > 0) { 1630 return true; 1631 } 1632 1633 String extra = "0123456789ABCDEF<>-"; 1634 for (int i = extra.length() - 1; i >= 0; i --) { 1637 add(m_nameSet_, extra.charAt(i)); 1638 } 1639 1640 m_maxNameLength_ = addAlgorithmName(0); 1642 m_maxNameLength_ = addExtendedName(m_maxNameLength_); 1644 addGroupName(m_maxNameLength_); 1646 return true; 1647 } 1648 1649 1655 private void convert(int set[], UnicodeSet uset) 1656 { 1657 uset.clear(); 1658 if (!initNameSetsLengths()) { 1659 return; 1660 } 1661 1662 for (char c = 255; c > 0; c --) { 1664 if (contains(set, c)) { 1665 uset.add(c); 1666 } 1667 } 1668 } 1669} 1670 | Popular Tags |