1 8 9 package com.ibm.icu.text; 10 11 import com.ibm.icu.impl.UCharacterProperty; 12 import com.ibm.icu.impl.NormalizerImpl; 13 14 93 94 public final class UTF16 95 { 96 98 106 public static final int SINGLE_CHAR_BOUNDARY = 1, 107 LEAD_SURROGATE_BOUNDARY = 2, 108 TRAIL_SURROGATE_BOUNDARY = 5; 109 113 public static final int CODEPOINT_MIN_VALUE = 0; 114 119 public static final int CODEPOINT_MAX_VALUE = 0x10ffff; 120 124 public static final int SUPPLEMENTARY_MIN_VALUE = 0x10000; 125 129 public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800; 130 134 public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00; 135 139 public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF; 140 144 public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF; 145 149 public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE; 150 154 public static final int SURROGATE_MAX_VALUE = TRAIL_SURROGATE_MAX_VALUE; 155 156 158 162 private UTF16() 163 { 164 } 165 168 187 public static int charAt(String source, int offset16) 188 { 189 char single = source.charAt(offset16); 190 if (single < LEAD_SURROGATE_MIN_VALUE) { 191 return single; 192 } 193 return _charAt(source, offset16, single); 194 } 195 196 private static int _charAt(String source, int offset16, char single) 197 { 198 if (single > TRAIL_SURROGATE_MAX_VALUE) { 199 return single; 200 } 201 202 206 if (single <= LEAD_SURROGATE_MAX_VALUE) { 207 ++ offset16; 208 if (source.length() != offset16) { 209 char trail = source.charAt(offset16); 210 if (trail >= TRAIL_SURROGATE_MIN_VALUE && 211 trail <= TRAIL_SURROGATE_MAX_VALUE) { 212 return UCharacterProperty.getRawSupplementary(single, 213 trail); 214 } 215 } 216 } 217 else 218 { 219 -- offset16; 220 if (offset16 >= 0) { 221 char lead = source.charAt(offset16); 223 if (lead >= LEAD_SURROGATE_MIN_VALUE && 224 lead <= LEAD_SURROGATE_MAX_VALUE) { 225 return UCharacterProperty.getRawSupplementary(lead, 226 single); 227 } 228 } 229 } 230 return single; } 232 233 300 319 public static int charAt(StringBuffer source, int offset16) 320 { 321 if (offset16 < 0 || offset16 >= source.length()) { 322 throw new StringIndexOutOfBoundsException (offset16); 323 } 324 325 char single = source.charAt(offset16); 326 if (!isSurrogate(single)) { 327 return single; 328 } 329 330 334 if (single <= LEAD_SURROGATE_MAX_VALUE) 335 { 336 ++ offset16; 337 if (source.length() != offset16) 338 { 339 char trail = source.charAt(offset16); 340 if (isTrailSurrogate(trail)) 341 return UCharacterProperty.getRawSupplementary(single, trail); 342 } 343 } 344 else 345 { 346 -- offset16; 347 if (offset16 >= 0) 348 { 349 char lead = source.charAt(offset16); 351 if (isLeadSurrogate(lead)) { 352 return UCharacterProperty.getRawSupplementary(lead, single); 353 } 354 } 355 } 356 return single; } 358 359 380 public static int charAt(char source[], int start, int limit, 381 int offset16) 382 { 383 offset16 += start; 384 if (offset16 < start || offset16 >= limit) { 385 throw new ArrayIndexOutOfBoundsException (offset16); 386 } 387 388 char single = source[offset16]; 389 if (!isSurrogate(single)) { 390 return single; 391 } 392 393 if (single <= LEAD_SURROGATE_MAX_VALUE) { 397 offset16 ++; 398 if (offset16 >= limit) { 399 return single; 400 } 401 char trail = source[offset16]; 402 if (isTrailSurrogate(trail)) { 403 return UCharacterProperty.getRawSupplementary(single, trail); 404 } 405 } 406 else { if (offset16 == start) { 408 return single; 409 } 410 offset16 --; 411 char lead = source[offset16]; 412 if (isLeadSurrogate(lead)) 413 return UCharacterProperty.getRawSupplementary(lead, single); 414 } 415 return single; } 417 418 437 public static int charAt(Replaceable source, int offset16) 438 { 439 if (offset16 < 0 || offset16 >= source.length()) { 440 throw new StringIndexOutOfBoundsException (offset16); 441 } 442 443 char single = source.charAt(offset16); 444 if (!isSurrogate(single)) { 445 return single; 446 } 447 448 452 if (single <= LEAD_SURROGATE_MAX_VALUE) 453 { 454 ++ offset16; 455 if (source.length() != offset16) 456 { 457 char trail = source.charAt(offset16); 458 if (isTrailSurrogate(trail)) 459 return UCharacterProperty.getRawSupplementary(single, trail); 460 } 461 } 462 else 463 { 464 -- offset16; 465 if (offset16 >= 0) 466 { 467 char lead = source.charAt(offset16); 469 if (isLeadSurrogate(lead)) { 470 return UCharacterProperty.getRawSupplementary(lead, single); 471 } 472 } 473 } 474 return single; } 476 477 486 public static int getCharCount(int char32) 487 { 488 if (char32 < SUPPLEMENTARY_MIN_VALUE) { 489 return 1; 490 } 491 return 2; 492 } 493 494 516 public static int bounds(String source, int offset16) 517 { 518 char ch = source.charAt(offset16); 519 if (isSurrogate(ch)) { 520 if (isLeadSurrogate(ch)) 521 { 522 if (++ offset16 < source.length() && 523 isTrailSurrogate(source.charAt(offset16))) { 524 return LEAD_SURROGATE_BOUNDARY; 525 } 526 } 527 else { 528 -- offset16; 530 if (offset16 >= 0 && isLeadSurrogate(source.charAt(offset16))) { 531 return TRAIL_SURROGATE_BOUNDARY; 532 } 533 } 534 } 535 return SINGLE_CHAR_BOUNDARY; 536 } 537 538 560 public static int bounds(StringBuffer source, int offset16) 561 { 562 char ch = source.charAt(offset16); 563 if (isSurrogate(ch)) { 564 if (isLeadSurrogate(ch)) 565 { 566 if (++ offset16 < source.length() && 567 isTrailSurrogate(source.charAt(offset16))) { 568 return LEAD_SURROGATE_BOUNDARY; 569 } 570 } 571 else { 572 -- offset16; 574 if (offset16 >= 0 && 575 isLeadSurrogate(source.charAt(offset16))) { 576 return TRAIL_SURROGATE_BOUNDARY; 577 } 578 } 579 } 580 return SINGLE_CHAR_BOUNDARY; 581 } 582 583 607 public static int bounds(char source[], int start, int limit, 608 int offset16) 609 { 610 offset16 += start; 611 if (offset16 < start || offset16 >= limit) { 612 throw new ArrayIndexOutOfBoundsException (offset16); 613 } 614 char ch = source[offset16]; 615 if (isSurrogate(ch)) { 616 if (isLeadSurrogate(ch)) { 617 ++ offset16; 618 if (offset16 < limit && isTrailSurrogate(source[offset16])) { 619 return LEAD_SURROGATE_BOUNDARY; 620 } 621 } 622 else { -- offset16; 624 if (offset16 >= start && isLeadSurrogate(source[offset16])) { 625 return TRAIL_SURROGATE_BOUNDARY; 626 } 627 } 628 } 629 return SINGLE_CHAR_BOUNDARY; 630 } 631 632 638 public static boolean isSurrogate(char char16) 639 { 640 return LEAD_SURROGATE_MIN_VALUE <= char16 && 641 char16 <= TRAIL_SURROGATE_MAX_VALUE; 642 } 643 644 650 public static boolean isTrailSurrogate(char char16) 651 { 652 return (TRAIL_SURROGATE_MIN_VALUE <= char16 && 653 char16 <= TRAIL_SURROGATE_MAX_VALUE); 654 } 655 656 662 public static boolean isLeadSurrogate(char char16) 663 { 664 return LEAD_SURROGATE_MIN_VALUE <= char16 && 665 char16 <= LEAD_SURROGATE_MAX_VALUE; 666 } 667 668 678 public static char getLeadSurrogate(int char32) 679 { 680 if (char32 >= SUPPLEMENTARY_MIN_VALUE) { 681 return (char)(LEAD_SURROGATE_OFFSET_ + 682 (char32 >> LEAD_SURROGATE_SHIFT_)); 683 } 684 685 return 0; 686 } 687 688 698 public static char getTrailSurrogate(int char32) 699 { 700 if (char32 >= SUPPLEMENTARY_MIN_VALUE) { 701 return (char)(TRAIL_SURROGATE_MIN_VALUE + 702 (char32 & TRAIL_SURROGATE_MASK_)); 703 } 704 705 return (char)char32; 706 } 707 708 720 public static String valueOf(int char32) 721 { 722 if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { 723 throw new IllegalArgumentException ("Illegal codepoint"); 724 } 725 return toString(char32); 726 } 727 728 744 public static String valueOf(String source, int offset16) 745 { 746 switch (bounds(source, offset16)) { 747 case LEAD_SURROGATE_BOUNDARY: 748 return source.substring(offset16, offset16 + 2); 749 case TRAIL_SURROGATE_BOUNDARY: 750 return source.substring(offset16 - 1, offset16 + 1); 751 default: return source.substring(offset16, offset16 + 1); 752 } 753 } 754 755 771 public static String valueOf(StringBuffer source, int offset16) 772 { 773 switch (bounds(source, offset16)) { 774 case LEAD_SURROGATE_BOUNDARY: 775 return source.substring(offset16, offset16 + 2); 776 case TRAIL_SURROGATE_BOUNDARY: 777 return source.substring(offset16 - 1, offset16 + 1); 778 default: return source.substring(offset16, offset16 + 1); 779 } 780 } 781 782 803 public static String valueOf(char source[], int start, int limit, 804 int offset16) 805 { 806 switch (bounds(source, start, limit, offset16)) { 807 case LEAD_SURROGATE_BOUNDARY: 808 return new String (source, start + offset16, 2); 809 case TRAIL_SURROGATE_BOUNDARY: 810 return new String (source, start + offset16 - 1, 2); 811 } 812 return new String (source, start + offset16, 1); 813 } 814 815 825 public static int findOffsetFromCodePoint(String source, int offset32) 826 { 827 char ch; 828 int size = source.length(), 829 result = 0, 830 count = offset32; 831 if (offset32 < 0 || offset32 > size) { 832 throw new StringIndexOutOfBoundsException (offset32); 833 } 834 while (result < size && count > 0) 835 { 836 ch = source.charAt(result); 837 if (isLeadSurrogate(ch) && ((result + 1) < size) && 838 isTrailSurrogate(source.charAt(result + 1))) { 839 result ++; 840 } 841 842 count --; 843 result ++; 844 } 845 if (count != 0) { 846 throw new StringIndexOutOfBoundsException (offset32); 847 } 848 return result; 849 } 850 851 861 public static int findOffsetFromCodePoint(StringBuffer source, 862 int offset32) 863 { 864 char ch; 865 int size = source.length(), 866 result = 0, 867 count = offset32; 868 if (offset32 < 0 || offset32 > size) { 869 throw new StringIndexOutOfBoundsException (offset32); 870 } 871 while (result < size && count > 0) 872 { 873 ch = source.charAt(result); 874 if (isLeadSurrogate(ch) && ((result + 1) < size) && 875 isTrailSurrogate(source.charAt(result + 1))) { 876 result ++; 877 } 878 879 count --; 880 result ++; 881 } 882 if (count != 0) { 883 throw new StringIndexOutOfBoundsException (offset32); 884 } 885 return result; 886 } 887 888 900 public static int findOffsetFromCodePoint(char source[], int start, 901 int limit, int offset32) 902 { 903 char ch; 904 int result = start, 905 count = offset32; 906 if (offset32 > limit - start) { 907 throw new ArrayIndexOutOfBoundsException (offset32); 908 } 909 while (result < limit && count > 0) 910 { 911 ch = source[result]; 912 if (isLeadSurrogate(ch) && ((result + 1) < limit) && 913 isTrailSurrogate(source[result + 1])) { 914 result ++; 915 } 916 917 count --; 918 result ++; 919 } 920 if (count != 0) { 921 throw new ArrayIndexOutOfBoundsException (offset32); 922 } 923 return result - start; 924 } 925 926 947 public static int findCodePointOffset(String source, int offset16) 948 { 949 if (offset16 < 0 || offset16 > source.length()) { 950 throw new StringIndexOutOfBoundsException (offset16); 951 } 952 953 int result = 0; 954 char ch; 955 boolean hadLeadSurrogate = false; 956 957 for (int i = 0; i < offset16; ++ i) 958 { 959 ch = source.charAt(i); 960 if (hadLeadSurrogate && isTrailSurrogate(ch)) { 961 hadLeadSurrogate = false; } 963 else 964 { 965 hadLeadSurrogate = isLeadSurrogate(ch); 966 ++ result; } 968 } 969 970 if (offset16 == source.length()) { 971 return result; 972 } 973 974 if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) { 977 result --; 978 } 979 980 return result; 981 } 982 983 1004 public static int findCodePointOffset(StringBuffer source, int offset16) 1005 { 1006 if (offset16 < 0 || offset16 > source.length()) { 1007 throw new StringIndexOutOfBoundsException (offset16); 1008 } 1009 1010 int result = 0; 1011 char ch; 1012 boolean hadLeadSurrogate = false; 1013 1014 for (int i = 0; i < offset16; ++ i) 1015 { 1016 ch = source.charAt(i); 1017 if (hadLeadSurrogate && isTrailSurrogate(ch)) { 1018 hadLeadSurrogate = false; } 1020 else 1021 { 1022 hadLeadSurrogate = isLeadSurrogate(ch); 1023 ++ result; } 1025 } 1026 1027 if (offset16 == source.length()) { 1028 return result; 1029 } 1030 1031 if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) 1034 { 1035 result --; 1036 } 1037 1038 return result; 1039 } 1040 1041 1065 public static int findCodePointOffset(char source[], int start, int limit, 1066 int offset16) 1067 { 1068 offset16 += start; 1069 if (offset16 > limit) { 1070 throw new StringIndexOutOfBoundsException (offset16); 1071 } 1072 1073 int result = 0; 1074 char ch; 1075 boolean hadLeadSurrogate = false; 1076 1077 for (int i = start; i < offset16; ++ i) 1078 { 1079 ch = source[i]; 1080 if (hadLeadSurrogate && isTrailSurrogate(ch)) { 1081 hadLeadSurrogate = false; } 1083 else 1084 { 1085 hadLeadSurrogate = isLeadSurrogate(ch); 1086 ++ result; } 1088 } 1089 1090 if (offset16 == limit) { 1091 return result; 1092 } 1093 1094 if (hadLeadSurrogate && (isTrailSurrogate(source[offset16]))) { 1097 result --; 1098 } 1099 1100 return result; 1101 } 1102 1103 1115 public static StringBuffer append(StringBuffer target, int char32) 1116 { 1117 if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { 1119 throw new IllegalArgumentException ("Illegal codepoint: " + Integer.toHexString(char32)); 1120 } 1121 1122 if (char32 >= SUPPLEMENTARY_MIN_VALUE) 1124 { 1125 target.append(getLeadSurrogate(char32)); 1126 target.append(getTrailSurrogate(char32)); 1127 } 1128 else { 1129 target.append((char)char32); 1130 } 1131 return target; 1132 } 1133 1134 1144 public static StringBuffer appendCodePoint(StringBuffer target, int cp) { 1145 return append(target, cp); 1146 } 1147 1148 1159 public static int append(char[] target, int limit, int char32) 1160 { 1161 if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { 1163 throw new IllegalArgumentException ("Illegal codepoint"); 1164 } 1165 if (char32 >= SUPPLEMENTARY_MIN_VALUE) 1167 { 1168 target[limit ++] = getLeadSurrogate(char32); 1169 target[limit ++] = getTrailSurrogate(char32); 1170 } 1171 else { 1172 target[limit ++] = (char)char32; 1173 } 1174 return limit; 1175 } 1176 1177 1183 public static int countCodePoint(String source) 1184 { 1185 if (source == null || source.length() == 0) { 1186 return 0; 1187 } 1188 return findCodePointOffset(source, source.length()); 1189 } 1190 1191 1197 public static int countCodePoint(StringBuffer source) 1198 { 1199 if (source == null || source.length() == 0) { 1200 return 0; 1201 } 1202 return findCodePointOffset(source, source.length()); 1203 } 1204 1205 1214 public static int countCodePoint(char source[], int start, int limit) 1215 { 1216 if (source == null || source.length == 0) { 1217 return 0; 1218 } 1219 return findCodePointOffset(source, start, limit, limit - start); 1220 } 1221 1222 1231 public static void setCharAt(StringBuffer target, int offset16, 1232 int char32) 1233 { 1234 int count = 1; 1235 char single = target.charAt(offset16); 1236 1237 if (isSurrogate(single)) 1238 { 1239 if (isLeadSurrogate(single) && (target.length() > offset16 + 1) 1241 && isTrailSurrogate(target.charAt(offset16 + 1))) { 1242 count ++; 1243 } 1244 else { 1245 if (isTrailSurrogate(single) && (offset16 > 0) && 1248 isLeadSurrogate(target.charAt(offset16 -1))) 1249 { 1250 offset16 --; 1251 count ++; 1252 } 1253 } 1254 } 1255 target.replace(offset16, offset16 + count, valueOf(char32)); 1256 } 1257 1258 1272 public static int setCharAt(char target[], int limit, 1273 int offset16, int char32) 1274 { 1275 if (offset16 >= limit) { 1276 throw new ArrayIndexOutOfBoundsException (offset16); 1277 } 1278 int count = 1; 1279 char single = target[offset16]; 1280 1281 if (isSurrogate(single)) 1282 { 1283 if (isLeadSurrogate(single) && (target.length > offset16 + 1) && 1285 isTrailSurrogate(target[offset16 + 1])) { 1286 count ++; 1287 } 1288 else { 1289 if (isTrailSurrogate(single) && (offset16 > 0) && 1292 isLeadSurrogate(target[offset16 -1])) 1293 { 1294 offset16 --; 1295 count ++; 1296 } 1297 } 1298 } 1299 1300 String str = valueOf(char32); 1301 int result = limit; 1302 int strlength = str.length(); 1303 target[offset16] = str.charAt(0); 1304 if (count == strlength) { 1305 if (count == 2) { 1306 target[offset16 + 1] = str.charAt(1); 1307 } 1308 } 1309 else { 1310 System.arraycopy(target, offset16 + count, target, 1313 offset16 + strlength, limit - (offset16 + count)); 1314 if (count < strlength) { 1315 target[offset16 + 1] = str.charAt(1); 1318 result ++; 1319 if (result < target.length) { 1320 target[result] = 0; 1321 } 1322 } 1323 else { 1324 result --; 1327 target[result] = 0; 1328 } 1329 } 1330 return result; 1331 } 1332 1333 1343 public static int moveCodePointOffset(String source, int offset16, 1344 int shift32) 1345 { 1346 int result = offset16; 1347 int size = source.length(); 1348 int count; 1349 char ch; 1350 if (offset16<0 || offset16>size) { 1351 throw new StringIndexOutOfBoundsException (offset16); 1352 } 1353 if (shift32 > 0 ) { 1354 if (shift32 + offset16 > size) { 1355 throw new StringIndexOutOfBoundsException (offset16); 1356 } 1357 count = shift32; 1358 while (result < size && count > 0) 1359 { 1360 ch = source.charAt(result); 1361 if (isLeadSurrogate(ch) && ((result + 1) < size) && 1362 isTrailSurrogate(source.charAt(result + 1))) { 1363 result ++; 1364 } 1365 count --; 1366 result ++; 1367 } 1368 } else { 1369 if (offset16 + shift32 < 0) { 1370 throw new StringIndexOutOfBoundsException (offset16); 1371 } 1372 for (count=-shift32; count>0; count--) { 1373 result--; 1374 if (result<0) { 1375 break; 1376 } 1377 ch = source.charAt(result); 1378 if (isTrailSurrogate(ch) && result>0 && isLeadSurrogate(source.charAt(result-1))) { 1379 result--; 1380 } 1381 } 1382 } 1383 if (count != 0) { 1384 throw new StringIndexOutOfBoundsException (shift32); 1385 } 1386 return result; 1387 } 1388 1389 1399 public static int moveCodePointOffset(StringBuffer source, int offset16, 1400 int shift32) 1401 { 1402 int result = offset16; 1403 int size = source.length(); 1404 int count; 1405 char ch; 1406 if (offset16<0 || offset16>size) { 1407 throw new StringIndexOutOfBoundsException (offset16); 1408 } 1409 if (shift32 > 0 ) { 1410 if (shift32 + offset16 > size) { 1411 throw new StringIndexOutOfBoundsException (offset16); 1412 } 1413 count = shift32; 1414 while (result < size && count > 0) 1415 { 1416 ch = source.charAt(result); 1417 if (isLeadSurrogate(ch) && ((result + 1) < size) && 1418 isTrailSurrogate(source.charAt(result + 1))) { 1419 result ++; 1420 } 1421 count --; 1422 result ++; 1423 } 1424 } else { 1425 if (offset16 + shift32 < 0) { 1426 throw new StringIndexOutOfBoundsException (offset16); 1427 } 1428 for (count=-shift32; count>0; count--) { 1429 result--; 1430 if (result<0) { 1431 break; 1432 } 1433 ch = source.charAt(result); 1434 if (isTrailSurrogate(ch) && result>0 && isLeadSurrogate(source.charAt(result-1))) { 1435 result--; 1436 } 1437 } 1438 } 1439 if (count != 0) { 1440 throw new StringIndexOutOfBoundsException (shift32); 1441 } 1442 return result; 1443 } 1444 1445 1458 public static int moveCodePointOffset(char source[], int start, int limit, 1459 int offset16, int shift32) 1460 { 1461 int size = source.length; 1462 int count; 1463 char ch; 1464 int result = offset16 + start; 1465 if (start<0 || limit<start) { 1466 throw new StringIndexOutOfBoundsException (start); 1467 } 1468 if (limit>size) { 1469 throw new StringIndexOutOfBoundsException (limit); 1470 } 1471 if (offset16<0 || result>limit) { 1472 throw new StringIndexOutOfBoundsException (offset16); 1473 } 1474 if (shift32 > 0 ) { 1475 if (shift32 + result > size) { 1476 throw new StringIndexOutOfBoundsException (result); 1477 } 1478 count = shift32; 1479 while (result < limit && count > 0) 1480 { 1481 ch = source[result]; 1482 if (isLeadSurrogate(ch) && (result+1 < limit) && 1483 isTrailSurrogate(source[result+1])) { 1484 result ++; 1485 } 1486 count --; 1487 result ++; 1488 } 1489 } else { 1490 if (result + shift32 < start) { 1491 throw new StringIndexOutOfBoundsException (result); 1492 } 1493 for (count=-shift32; count>0; count--) { 1494 result--; 1495 if (result<start) { 1496 break; 1497 } 1498 ch = source[result]; 1499 if (isTrailSurrogate(ch) && result>start && isLeadSurrogate(source[result-1])) { 1500 result--; 1501 } 1502 } 1503 } 1504 if (count != 0) { 1505 throw new StringIndexOutOfBoundsException (shift32); 1506 } 1507 result -= start; 1508 return result; 1509 } 1510 1511 1532 public static StringBuffer insert(StringBuffer target, int offset16, 1533 int char32) 1534 { 1535 String str = valueOf(char32); 1536 if (offset16 != target.length() && 1537 bounds(target, offset16) == TRAIL_SURROGATE_BOUNDARY) { 1538 offset16 ++; 1539 } 1540 target.insert(offset16, str); 1541 return target; 1542 } 1543 1544 1565 public static int insert(char target[], int limit, int offset16, 1566 int char32) 1567 { 1568 String str = valueOf(char32); 1569 if (offset16 != limit && 1570 bounds(target, 0, limit, offset16) == TRAIL_SURROGATE_BOUNDARY) { 1571 offset16 ++; 1572 } 1573 int size = str.length(); 1574 if (limit + size > target.length) { 1575 throw new ArrayIndexOutOfBoundsException (offset16 + size); 1576 } 1577 System.arraycopy(target, offset16, target, offset16 + size, 1578 limit - offset16); 1579 target[offset16] = str.charAt(0); 1580 if (size == 2) { 1581 target[offset16 + 1] = str.charAt(1); 1582 } 1583 return limit + size; 1584 } 1585 1586 1596 public static StringBuffer delete(StringBuffer target, int offset16) 1597 { 1598 int count = 1; 1599 switch (bounds(target, offset16)) { 1600 case LEAD_SURROGATE_BOUNDARY: 1601 count ++; 1602 break; 1603 case TRAIL_SURROGATE_BOUNDARY: 1604 count ++; 1605 offset16 --; 1606 break; 1607 } 1608 target.delete(offset16, offset16 + count); 1609 return target; 1610 } 1611 1612 1623 public static int delete(char target[], int limit, int offset16) 1624 { 1625 int count = 1; 1626 switch (bounds(target, 0, limit, offset16)) { 1627 case LEAD_SURROGATE_BOUNDARY: 1628 count ++; 1629 break; 1630 case TRAIL_SURROGATE_BOUNDARY: 1631 count ++; 1632 offset16 --; 1633 break; 1634 } 1635 System.arraycopy(target, offset16 + count, target, offset16, 1636 limit - (offset16 + count)); 1637 target[limit - count] = 0; 1638 return limit - count; 1639 } 1640 1641 1661 public static int indexOf(String source, int char32) 1662 { 1663 if (char32 < CODEPOINT_MIN_VALUE || 1664 char32 > CODEPOINT_MAX_VALUE) { 1665 throw new IllegalArgumentException ( 1666 "Argument char32 is not a valid codepoint"); 1667 } 1668 if (char32 < LEAD_SURROGATE_MIN_VALUE || 1670 (char32 > TRAIL_SURROGATE_MAX_VALUE && 1671 char32 < SUPPLEMENTARY_MIN_VALUE)) { 1672 return source.indexOf((char)char32); 1673 } 1674 if (char32 < SUPPLEMENTARY_MIN_VALUE) { 1676 int result = source.indexOf((char)char32); 1677 if (result >= 0) { 1678 if (isLeadSurrogate((char)char32) && 1679 (result < source.length() - 1) && 1680 isTrailSurrogate(source.charAt(result + 1))) { 1681 return indexOf(source, char32, result + 1); 1682 } 1683 if (result > 0 && 1685 isLeadSurrogate(source.charAt(result - 1))) { 1686 return indexOf(source, char32, result + 1); 1687 } 1688 } 1689 return result; 1690 } 1691 String char32str = toString(char32); 1693 return source.indexOf(char32str); 1694 } 1695 1696 1721 public static int indexOf(String source, String str) 1722 { 1723 int strLength = str.length(); 1724 if (!isTrailSurrogate(str.charAt(0)) && 1726 !isLeadSurrogate(str.charAt(strLength - 1))) { 1727 return source.indexOf(str); 1728 } 1729 1730 int result = source.indexOf(str); 1731 int resultEnd = result + strLength; 1732 if (result >= 0) { 1733 if (isLeadSurrogate(str.charAt(strLength - 1)) && 1735 (result < source.length() - 1) && 1736 isTrailSurrogate(source.charAt(resultEnd + 1))) { 1737 return indexOf(source, str, resultEnd + 1); 1738 } 1739 if (isTrailSurrogate(str.charAt(0)) && result > 0 && 1741 isLeadSurrogate(source.charAt(result - 1))) { 1742 return indexOf(source, str, resultEnd + 1); 1743 } 1744 } 1745 return result; 1746 } 1747 1748 1770 public static int indexOf(String source, int char32, int fromIndex) 1771 { 1772 if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { 1773 throw new IllegalArgumentException ( 1774 "Argument char32 is not a valid codepoint"); 1775 } 1776 if (char32 < LEAD_SURROGATE_MIN_VALUE || 1778 (char32 > TRAIL_SURROGATE_MAX_VALUE && 1779 char32 < SUPPLEMENTARY_MIN_VALUE)) { 1780 return source.indexOf((char)char32, fromIndex); 1781 } 1782 if (char32 < SUPPLEMENTARY_MIN_VALUE) { 1784 int result = source.indexOf((char)char32, fromIndex); 1785 if (result >= 0) { 1786 if (isLeadSurrogate((char)char32) && 1787 (result < source.length() - 1) && 1788 isTrailSurrogate(source.charAt(result + 1))) { 1789 return indexOf(source, char32, result + 1); 1790 } 1791 if (result > 0 && 1793 isLeadSurrogate(source.charAt(result - 1))) { 1794 return indexOf(source, char32, result + 1); 1795 } 1796 } 1797 return result; 1798 } 1799 String char32str = toString(char32); 1801 return source.indexOf(char32str, fromIndex); 1802 } 1803 1804 1831 public static int indexOf(String source, String str, int fromIndex) 1832 { 1833 int strLength = str.length(); 1834 if (!isTrailSurrogate(str.charAt(0)) && 1836 !isLeadSurrogate(str.charAt(strLength - 1))) { 1837 return source.indexOf(str, fromIndex); 1838 } 1839 1840 int result = source.indexOf(str, fromIndex); 1841 int resultEnd = result + strLength; 1842 if (result >= 0) { 1843 if (isLeadSurrogate(str.charAt(strLength - 1)) && 1845 (result < source.length() - 1) && 1846 isTrailSurrogate(source.charAt(resultEnd))) { 1847 return indexOf(source, str, resultEnd + 1); 1848 } 1849 if (isTrailSurrogate(str.charAt(0)) && result > 0 && 1851 isLeadSurrogate(source.charAt(result - 1))) { 1852 return indexOf(source, str, resultEnd + 1); 1853 } 1854 } 1855 return result; 1856 } 1857 1858 1878 public static int lastIndexOf(String source, int char32) 1879 { 1880 if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { 1881 throw new IllegalArgumentException ( 1882 "Argument char32 is not a valid codepoint"); 1883 } 1884 if (char32 < LEAD_SURROGATE_MIN_VALUE || 1886 (char32 > TRAIL_SURROGATE_MAX_VALUE && 1887 char32 < SUPPLEMENTARY_MIN_VALUE)) { 1888 return source.lastIndexOf((char)char32); 1889 } 1890 if (char32 < SUPPLEMENTARY_MIN_VALUE) { 1892 int result = source.lastIndexOf((char)char32); 1893 if (result >= 0) { 1894 if (isLeadSurrogate((char)char32) && 1895 (result < source.length() - 1) && 1896 isTrailSurrogate(source.charAt(result + 1))) { 1897 return lastIndexOf(source, char32, result - 1); 1898 } 1899 if (result > 0 && 1901 isLeadSurrogate(source.charAt(result - 1))) { 1902 return lastIndexOf(source, char32, result - 1); 1903 } 1904 } 1905 return result; 1906 } 1907 String char32str = toString(char32); 1909 return source.lastIndexOf(char32str); 1910 } 1911 1912 1937 public static int lastIndexOf(String source, String str) 1938 { 1939 int strLength = str.length(); 1940 if (!isTrailSurrogate(str.charAt(0)) && 1942 !isLeadSurrogate(str.charAt(strLength - 1))) { 1943 return source.lastIndexOf(str); 1944 } 1945 1946 int result = source.lastIndexOf(str); 1947 if (result >= 0) { 1948 if (isLeadSurrogate(str.charAt(strLength - 1)) && 1950 (result < source.length() - 1) && 1951 isTrailSurrogate(source.charAt(result + strLength + 1))) { 1952 return lastIndexOf(source, str, result - 1); 1953 } 1954 if (isTrailSurrogate(str.charAt(0)) && result > 0 && 1956 isLeadSurrogate(source.charAt(result - 1))) { 1957 return lastIndexOf(source, str, result - 1); 1958 } 1959 } 1960 return result; 1961 } 1962 1963 1994 public static int lastIndexOf(String source, int char32, int fromIndex) 1995 { 1996 if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { 1997 throw new IllegalArgumentException ( 1998 "Argument char32 is not a valid codepoint"); 1999 } 2000 if (char32 < LEAD_SURROGATE_MIN_VALUE || 2002 (char32 > TRAIL_SURROGATE_MAX_VALUE && 2003 char32 < SUPPLEMENTARY_MIN_VALUE)) { 2004 return source.lastIndexOf((char)char32, fromIndex); 2005 } 2006 if (char32 < SUPPLEMENTARY_MIN_VALUE) { 2008 int result = source.lastIndexOf((char)char32, fromIndex); 2009 if (result >= 0) { 2010 if (isLeadSurrogate((char)char32) && 2011 (result < source.length() - 1) && 2012 isTrailSurrogate(source.charAt(result + 1))) { 2013 return lastIndexOf(source, char32, result - 1); 2014 } 2015 if (result > 0 && 2017 isLeadSurrogate(source.charAt(result - 1))) { 2018 return lastIndexOf(source, char32, result - 1); 2019 } 2020 } 2021 return result; 2022 } 2023 String char32str = toString(char32); 2025 return source.lastIndexOf(char32str, fromIndex); 2026 } 2027 2028 2065 public static int lastIndexOf(String source, String str, int fromIndex) 2066 { 2067 int strLength = str.length(); 2068 if (!isTrailSurrogate(str.charAt(0)) && 2070 !isLeadSurrogate(str.charAt(strLength - 1))) { 2071 return source.lastIndexOf(str, fromIndex); 2072 } 2073 2074 int result = source.lastIndexOf(str, fromIndex); 2075 if (result >= 0) { 2076 if (isLeadSurrogate(str.charAt(strLength - 1)) && 2078 (result < source.length() - 1) && 2079 isTrailSurrogate(source.charAt(result + strLength))) { 2080 return lastIndexOf(source, str, result - 1); 2081 } 2082 if (isTrailSurrogate(str.charAt(0)) && result > 0 && 2084 isLeadSurrogate(source.charAt(result - 1))) { 2085 return lastIndexOf(source, str, result - 1); 2086 } 2087 } 2088 return result; 2089 } 2090 2091 2121 public static String replace(String source, int oldChar32, 2122 int newChar32) 2123 { 2124 if (oldChar32 <= 0 || oldChar32 > CODEPOINT_MAX_VALUE) { 2125 throw new IllegalArgumentException ( 2126 "Argument oldChar32 is not a valid codepoint"); 2127 } 2128 if (newChar32 <= 0 || newChar32 > CODEPOINT_MAX_VALUE) { 2129 throw new IllegalArgumentException ( 2130 "Argument newChar32 is not a valid codepoint"); 2131 } 2132 2133 int index = indexOf(source, oldChar32); 2134 if (index == -1) { 2135 return source; 2136 } 2137 String newChar32Str = toString(newChar32); 2138 int oldChar32Size = 1; 2139 int newChar32Size = newChar32Str.length(); 2140 StringBuffer result = new StringBuffer (source); 2141 int resultIndex = index; 2142 2143 if (oldChar32 >= SUPPLEMENTARY_MIN_VALUE) { 2144 oldChar32Size = 2; 2145 } 2146 2147 while (index != -1) { 2148 int endResultIndex = resultIndex + oldChar32Size; 2149 result.replace(resultIndex, endResultIndex, newChar32Str); 2150 int lastEndIndex = index + oldChar32Size; 2151 index = indexOf(source, oldChar32, lastEndIndex); 2152 resultIndex += newChar32Size + index - lastEndIndex; 2153 } 2154 return result.toString(); 2155 } 2156 2157 2190 public static String replace(String source, String oldStr, 2191 String newStr) 2192 { 2193 int index = indexOf(source, oldStr); 2194 if (index == -1) { 2195 return source; 2196 } 2197 int oldStrSize = oldStr.length(); 2198 int newStrSize = newStr.length(); 2199 StringBuffer result = new StringBuffer (source); 2200 int resultIndex = index; 2201 2202 while (index != -1) { 2203 int endResultIndex = resultIndex + oldStrSize; 2204 result.replace(resultIndex, endResultIndex, newStr); 2205 int lastEndIndex = index + oldStrSize; 2206 index = indexOf(source, oldStr, lastEndIndex); 2207 resultIndex += newStrSize + index - lastEndIndex; 2208 } 2209 return result.toString(); 2210 } 2211 2212 2227 public static StringBuffer reverse(StringBuffer source) 2228 { 2229 int length = source.length(); 2230 StringBuffer result = new StringBuffer (length); 2231 for (int i = length; i-- > 0;) { 2232 char ch = source.charAt(i); 2233 if (isTrailSurrogate(ch) && i > 0) { 2234 char ch2 = source.charAt(i-1); 2235 if (isLeadSurrogate(ch2)) { 2236 result.append(ch2); 2237 result.append(ch); 2238 --i; 2239 continue; 2240 } 2241 } 2242 result.append(ch); 2243 } 2244 return result; 2245 } 2246 2247 2262 public static boolean hasMoreCodePointsThan(String source, int number) 2263 { 2264 if (number < 0) { 2265 return true; 2266 } 2267 if (source == null) { 2268 return false; 2269 } 2270 int length = source.length(); 2271 2272 if (((length + 1) >> 1) > number) { 2276 return true; 2277 } 2278 2279 int maxsupplementary = length - number; 2281 if (maxsupplementary <= 0) { 2282 return false; 2283 } 2284 2285 2288 int start = 0; 2291 while (true) { 2292 if (length == 0) { 2293 return false; 2294 } 2295 if (number == 0) { 2296 return true; 2297 } 2298 if (isLeadSurrogate(source.charAt(start ++)) && start != length 2299 && isTrailSurrogate(source.charAt(start))) { 2300 start ++; 2301 if (-- maxsupplementary <= 0) { 2302 return false; 2304 } 2305 } 2306 -- number; 2307 } 2308 } 2309 2310 2330 public static boolean hasMoreCodePointsThan(char source[], int start, 2331 int limit, int number) 2332 { 2333 int length = limit - start; 2334 if (length < 0 || start < 0 || limit < 0) { 2335 throw new IndexOutOfBoundsException ( 2336 "Start and limit indexes should be non-negative and start <= limit"); 2337 } 2338 if (number < 0) { 2339 return true; 2340 } 2341 if (source == null) { 2342 return false; 2343 } 2344 2345 if (((length + 1) >> 1) > number) { 2349 return true; 2350 } 2351 2352 int maxsupplementary = length - number; 2354 if (maxsupplementary <= 0) { 2355 return false; 2356 } 2357 2358 2361 while (true) { 2364 if (length == 0) { 2365 return false; 2366 } 2367 if (number == 0) { 2368 return true; 2369 } 2370 if (isLeadSurrogate(source[start ++]) && start != limit 2371 && isTrailSurrogate(source[start])) { 2372 start ++; 2373 if (-- maxsupplementary <= 0) { 2374 return false; 2376 } 2377 } 2378 -- number; 2379 } 2380 } 2381 2382 2398 public static boolean hasMoreCodePointsThan(StringBuffer source, int number) 2399 { 2400 if (number < 0) { 2401 return true; 2402 } 2403 if (source == null) { 2404 return false; 2405 } 2406 int length = source.length(); 2407 2408 if (((length + 1) >> 1) > number) { 2412 return true; 2413 } 2414 2415 int maxsupplementary = length - number; 2417 if (maxsupplementary <= 0) { 2418 return false; 2419 } 2420 2421 2424 int start = 0; 2427 while (true) { 2428 if (length == 0) { 2429 return false; 2430 } 2431 if (number == 0) { 2432 return true; 2433 } 2434 if (isLeadSurrogate(source.charAt(start ++)) && start != length 2435 && isTrailSurrogate(source.charAt(start))) { 2436 start ++; 2437 if (-- maxsupplementary <= 0) { 2438 return false; 2440 } 2441 } 2442 -- number; 2443 } 2444 } 2445 2446 2456 public static String newString(int[] codePoints, int offset, int count) { 2457 if (count < 0) { 2458 throw new IllegalArgumentException (); 2459 } 2460 char[] chars = new char[count]; 2461 int w = 0; 2462 for (int r = offset, e = offset + count; r < e; ++r) { 2463 int cp = codePoints[r]; 2464 if (cp < 0 || cp > 0x10ffff) { 2465 throw new IllegalArgumentException (); 2466 } 2467 while (true) { 2468 try { 2469 if (cp < 0x010000) { 2470 chars[w] = (char)cp; 2471 w++; 2472 } else { 2473 chars[w] = (char)(LEAD_SURROGATE_OFFSET_ + 2474 (cp >> LEAD_SURROGATE_SHIFT_)); 2475 chars[w+1] = (char)(TRAIL_SURROGATE_MIN_VALUE + 2476 (cp & TRAIL_SURROGATE_MASK_)); 2477 w += 2; 2478 } 2479 break; 2480 } 2481 catch (IndexOutOfBoundsException ex) { 2482 int newlen = (int)(Math.ceil((double)codePoints.length * (w+2) / (r-offset+1))); 2483 char[] temp = new char[newlen]; 2484 System.arraycopy(chars, 0, temp, 0, w); 2485 chars = temp; 2486 } 2487 } 2488 } 2489 return new String (chars, 0, w); 2490 } 2491 2492 2510 public static final class StringComparator implements java.util.Comparator 2511 { 2512 2514 2519 public StringComparator() 2520 { 2521 this(false, false, FOLD_CASE_DEFAULT); 2522 } 2523 2524 2539 public StringComparator(boolean codepointcompare, 2540 boolean ignorecase, 2541 int foldcaseoption) 2542 { 2543 setCodePointCompare(codepointcompare); 2544 m_ignoreCase_ = ignorecase; 2545 if (foldcaseoption < FOLD_CASE_DEFAULT 2546 || foldcaseoption > FOLD_CASE_EXCLUDE_SPECIAL_I) { 2547 throw new IllegalArgumentException ("Invalid fold case option"); 2548 } 2549 m_foldCase_ = foldcaseoption; 2550 } 2551 2552 2554 2562 public static final int FOLD_CASE_DEFAULT = 0; 2563 2580 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 1; 2581 2582 2584 2586 2592 public void setCodePointCompare(boolean flag) 2593 { 2594 if (flag) { 2595 m_codePointCompare_ = Normalizer.COMPARE_CODE_POINT_ORDER; 2596 } 2597 else { 2598 m_codePointCompare_ = 0; 2599 } 2600 } 2601 2602 2615 public void setIgnoreCase(boolean ignorecase, int foldcaseoption) 2616 { 2617 m_ignoreCase_ = ignorecase; 2618 if (foldcaseoption < FOLD_CASE_DEFAULT 2619 || foldcaseoption > FOLD_CASE_EXCLUDE_SPECIAL_I) { 2620 throw new IllegalArgumentException ("Invalid fold case option"); 2621 } 2622 m_foldCase_ = foldcaseoption; 2623 } 2624 2625 2627 2632 public boolean getCodePointCompare() 2633 { 2634 return m_codePointCompare_ == Normalizer.COMPARE_CODE_POINT_ORDER; 2635 } 2636 2637 2643 public boolean getIgnoreCase() 2644 { 2645 return m_ignoreCase_; 2646 } 2647 2648 2656 public int getIgnoreCaseOption() 2657 { 2658 return m_foldCase_; 2659 } 2660 2661 2663 2674 public int compare(Object a, Object b) 2675 { 2676 String str1 = (String )a; 2677 String str2 = (String )b; 2678 2679 if (str1 == str2) { 2680 return 0; 2681 } 2682 if (str1 == null) { 2683 return -1; 2684 } 2685 if (str2 == null) { 2686 return 1; 2687 } 2688 2689 if (m_ignoreCase_) { 2690 return compareCaseInsensitive(str1, str2); 2691 } 2692 return compareCaseSensitive(str1, str2); 2693 } 2694 2695 2697 2701 private int m_codePointCompare_; 2702 2703 2706 private int m_foldCase_; 2707 2708 2711 private boolean m_ignoreCase_; 2712 2713 2716 private static final int CODE_POINT_COMPARE_SURROGATE_OFFSET_ = 0x2800; 2717 2718 2720 2727 private int compareCaseInsensitive(String s1, String s2) 2728 { 2729 return NormalizerImpl.cmpEquivFold(s1, s2, 2730 m_foldCase_ | m_codePointCompare_ 2731 | Normalizer.COMPARE_IGNORE_CASE); 2732 } 2733 2734 2741 private int compareCaseSensitive(String s1, String s2) 2742 { 2743 int length1 = s1.length(); 2746 int length2 = s2.length(); 2747 int minlength = length1; 2748 int result = 0; 2749 if (length1 < length2) { 2750 result = -1; 2751 } 2752 else if (length1 > length2) { 2753 result = 1; 2754 minlength = length2; 2755 } 2756 2757 char c1 = 0; 2758 char c2 = 0; 2759 int index = 0; 2760 for (; index < minlength; index ++) { 2761 c1 = s1.charAt(index); 2762 c2 = s2.charAt(index); 2763 if (c1 != c2) { 2765 break; 2766 } 2767 } 2768 2769 if (index == minlength) { 2770 return result; 2771 } 2772 2773 boolean codepointcompare 2774 = m_codePointCompare_ == Normalizer.COMPARE_CODE_POINT_ORDER; 2775 if (c1 >= LEAD_SURROGATE_MIN_VALUE 2777 && c2 >= LEAD_SURROGATE_MIN_VALUE && codepointcompare) { 2778 if ((c1 <= LEAD_SURROGATE_MAX_VALUE && (index + 1) != length1 2781 && isTrailSurrogate(s1.charAt(index + 1))) 2782 || (isTrailSurrogate(c1) && index != 0 2783 && isLeadSurrogate(s1.charAt(index - 1)))) { 2784 } 2786 else { 2787 c1 -= CODE_POINT_COMPARE_SURROGATE_OFFSET_; 2790 } 2791 2792 if ((c2 <= LEAD_SURROGATE_MAX_VALUE 2793 && (index + 1) != length2 2794 && isTrailSurrogate(s2.charAt(index + 1))) || 2795 (isTrailSurrogate(c2) && index != 0 2796 && isLeadSurrogate(s2.charAt(index - 1)))) { 2797 } 2799 else { 2800 c2 -= CODE_POINT_COMPARE_SURROGATE_OFFSET_; 2802 } 2803 } 2804 2805 return c1 - c2; 2807 } 2808 } 2809 2810 2812 2815 private static final int LEAD_SURROGATE_SHIFT_ = 10; 2816 2817 2820 private static final int TRAIL_SURROGATE_MASK_ = 0x3FF; 2821 2822 2825 private static final int LEAD_SURROGATE_OFFSET_ = 2826 LEAD_SURROGATE_MIN_VALUE - 2827 (SUPPLEMENTARY_MIN_VALUE 2828 >> LEAD_SURROGATE_SHIFT_); 2829 2830 2832 2843 private static String toString(int ch) 2844 { 2845 if (ch < SUPPLEMENTARY_MIN_VALUE) { 2846 return String.valueOf((char)ch); 2847 } 2848 2849 StringBuffer result = new StringBuffer (); 2850 result.append(getLeadSurrogate(ch)); 2851 result.append(getTrailSurrogate(ch)); 2852 return result.toString(); 2853 } 2854} 2855 | Popular Tags |