1 8 package com.ibm.icu.text; 9 10 import java.text.*; 11 import com.ibm.icu.lang.*; 12 13 import java.io.IOException ; 14 15 import com.ibm.icu.impl.CollectionUtilities; 16 import com.ibm.icu.impl.NormalizerImpl; 17 import com.ibm.icu.impl.Utility; 18 import com.ibm.icu.impl.UCharacterProperty; 19 import com.ibm.icu.impl.UBiDiProps; 20 import com.ibm.icu.impl.UCaseProps; 21 import com.ibm.icu.impl.UPropertyAliases; 22 import com.ibm.icu.impl.SortedSetRelation; 23 import com.ibm.icu.impl.RuleCharacterIterator; 24 25 import com.ibm.icu.util.Freezable; 26 import com.ibm.icu.util.ULocale; 27 import com.ibm.icu.util.VersionInfo; 28 29 import com.ibm.icu.text.BreakIterator; 30 31 import java.util.Map ; 32 import java.util.HashMap ; 33 import java.util.MissingResourceException ; 34 import java.util.TreeSet ; 35 import java.util.Iterator ; 36 import java.util.Collection ; 37 38 271 public class UnicodeSet extends UnicodeFilter implements Freezable { 272 273 private static final int LOW = 0x000000; private static final int HIGH = 0x110000; 277 281 public static final int MIN_VALUE = LOW; 282 283 287 public static final int MAX_VALUE = HIGH - 1; 288 289 private int len; private int[] list; private int[] rangeList; private int[] buffer; 294 TreeSet strings = new TreeSet (); 297 298 307 private String pat = null; 308 309 private static final int START_EXTRA = 16; private static final int GROW_EXTRA = START_EXTRA; 312 private static final String ANY_ID = "ANY"; private static final String ASCII_ID = "ASCII"; private static final String ASSIGNED = "Assigned"; 317 323 private static UnicodeSet INCLUSIONS[] = null; 324 325 329 333 public UnicodeSet() { 334 list = new int[1 + START_EXTRA]; 335 list[len++] = HIGH; 336 } 337 338 342 public UnicodeSet(UnicodeSet other) { 343 set(other); 344 } 345 346 354 public UnicodeSet(int start, int end) { 355 this(); 356 complement(start, end); 357 } 358 359 367 public UnicodeSet(String pattern) { 368 this(); 369 applyPattern(pattern, null, null, IGNORE_SPACE); 370 } 371 372 382 public UnicodeSet(String pattern, boolean ignoreWhitespace) { 383 this(); 384 applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0); 385 } 386 387 398 public UnicodeSet(String pattern, int options) { 399 this(); 400 applyPattern(pattern, null, null, options); 401 } 402 403 415 public UnicodeSet(String pattern, ParsePosition pos, SymbolTable symbols) { 416 this(); 417 applyPattern(pattern, pos, symbols, IGNORE_SPACE); 418 } 419 420 435 public UnicodeSet(String pattern, ParsePosition pos, SymbolTable symbols, int options) { 436 this(); 437 applyPattern(pattern, pos, symbols, options); 438 } 439 440 441 445 public Object clone() { 446 UnicodeSet result = new UnicodeSet(this); 447 result.frozen = this.frozen; 448 return result; 449 } 450 451 460 public UnicodeSet set(int start, int end) { 461 checkFrozen(); 462 clear(); 463 complement(start, end); 464 return this; 465 } 466 467 473 public UnicodeSet set(UnicodeSet other) { 474 checkFrozen(); 475 list = (int[]) other.list.clone(); 476 len = other.len; 477 pat = other.pat; 478 strings = (TreeSet )other.strings.clone(); 479 return this; 480 } 481 482 491 public final UnicodeSet applyPattern(String pattern) { 492 checkFrozen(); 493 return applyPattern(pattern, null, null, IGNORE_SPACE); 494 } 495 496 507 public UnicodeSet applyPattern(String pattern, boolean ignoreWhitespace) { 508 checkFrozen(); 509 return applyPattern(pattern, null, null, ignoreWhitespace ? IGNORE_SPACE : 0); 510 } 511 512 524 public UnicodeSet applyPattern(String pattern, int options) { 525 checkFrozen(); 526 return applyPattern(pattern, null, null, options); 527 } 528 529 534 public static boolean resemblesPattern(String pattern, int pos) { 535 return ((pos+1) < pattern.length() && 536 pattern.charAt(pos) == '[') || 537 resemblesPropertyPattern(pattern, pos); 538 } 539 540 544 private static void _appendToPat(StringBuffer buf, String s, boolean escapeUnprintable) { 545 for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) { 546 _appendToPat(buf, UTF16.charAt(s, i), escapeUnprintable); 547 } 548 } 549 550 554 private static void _appendToPat(StringBuffer buf, int c, boolean escapeUnprintable) { 555 if (escapeUnprintable && Utility.isUnprintable(c)) { 556 if (Utility.escapeUnprintable(buf, c)) { 559 return; 560 } 561 } 562 switch (c) { 564 case '[': case ']': case '-': case '^': case '&': case '\\': case '{': 571 case '}': 572 case '$': 573 case ':': 574 buf.append('\\'); 575 break; 576 default: 577 if (UCharacterProperty.isRuleWhiteSpace(c)) { 579 buf.append('\\'); 580 } 581 break; 582 } 583 UTF16.append(buf, c); 584 } 585 586 592 public String toPattern(boolean escapeUnprintable) { 593 StringBuffer result = new StringBuffer (); 594 return _toPattern(result, escapeUnprintable).toString(); 595 } 596 597 602 private StringBuffer _toPattern(StringBuffer result, 603 boolean escapeUnprintable) { 604 if (pat != null) { 605 int i; 606 int backslashCount = 0; 607 for (i=0; i<pat.length(); ) { 608 int c = UTF16.charAt(pat, i); 609 i += UTF16.getCharCount(c); 610 if (escapeUnprintable && Utility.isUnprintable(c)) { 611 if ((backslashCount % 2) == 1) { 616 result.setLength(result.length() - 1); 617 } 618 Utility.escapeUnprintable(result, c); 619 backslashCount = 0; 620 } else { 621 UTF16.append(result, c); 622 if (c == '\\') { 623 ++backslashCount; 624 } else { 625 backslashCount = 0; 626 } 627 } 628 } 629 return result; 630 } 631 632 return _generatePattern(result, escapeUnprintable, true); 633 } 634 635 643 public StringBuffer _generatePattern(StringBuffer result, boolean escapeUnprintable) { 644 return _generatePattern(result, escapeUnprintable, true); 645 } 646 647 655 public StringBuffer _generatePattern(StringBuffer result, 656 boolean escapeUnprintable, boolean includeStrings) { 657 result.append('['); 658 659 669 int count = getRangeCount(); 670 671 if (count > 1 && 675 getRangeStart(0) == MIN_VALUE && 676 getRangeEnd(count-1) == MAX_VALUE) { 677 678 result.append('^'); 680 681 for (int i = 1; i < count; ++i) { 682 int start = getRangeEnd(i-1)+1; 683 int end = getRangeStart(i)-1; 684 _appendToPat(result, start, escapeUnprintable); 685 if (start != end) { 686 if ((start+1) != end) { 687 result.append('-'); 688 } 689 _appendToPat(result, end, escapeUnprintable); 690 } 691 } 692 } 693 694 else { 696 for (int i = 0; i < count; ++i) { 697 int start = getRangeStart(i); 698 int end = getRangeEnd(i); 699 _appendToPat(result, start, escapeUnprintable); 700 if (start != end) { 701 if ((start+1) != end) { 702 result.append('-'); 703 } 704 _appendToPat(result, end, escapeUnprintable); 705 } 706 } 707 } 708 709 if (includeStrings && strings.size() > 0) { 710 Iterator it = strings.iterator(); 711 while (it.hasNext()) { 712 result.append('{'); 713 _appendToPat(result, (String ) it.next(), escapeUnprintable); 714 result.append('}'); 715 } 716 } 717 return result.append(']'); 718 } 719 720 728 public int size() { 729 int n = 0; 730 int count = getRangeCount(); 731 for (int i = 0; i < count; ++i) { 732 n += getRangeEnd(i) - getRangeStart(i) + 1; 733 } 734 return n + strings.size(); 735 } 736 737 743 public boolean isEmpty() { 744 return len == 1 && strings.size() == 0; 745 } 746 747 754 public boolean matchesIndexValue(int v) { 755 763 for (int i=0; i<getRangeCount(); ++i) { 764 int low = getRangeStart(i); 765 int high = getRangeEnd(i); 766 if ((low & ~0xFF) == (high & ~0xFF)) { 767 if ((low & 0xFF) <= v && v <= (high & 0xFF)) { 768 return true; 769 } 770 } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) { 771 return true; 772 } 773 } 774 if (strings.size() != 0) { 775 Iterator it = strings.iterator(); 776 while (it.hasNext()) { 777 String s = (String ) it.next(); 778 int c = UTF16.charAt(s, 0); 784 if ((c & 0xFF) == v) { 785 return true; 786 } 787 } 788 } 789 return false; 790 } 791 792 797 public int matches(Replaceable text, 798 int[] offset, 799 int limit, 800 boolean incremental) { 801 802 if (offset[0] == limit) { 803 if (contains(UnicodeMatcher.ETHER)) { 807 return incremental ? U_PARTIAL_MATCH : U_MATCH; 808 } else { 809 return U_MISMATCH; 810 } 811 } else { 812 if (strings.size() != 0) { 814 817 821 Iterator it = strings.iterator(); 822 boolean forward = offset[0] < limit; 823 824 char firstChar = text.charAt(offset[0]); 828 829 int highWaterLength = 0; 832 833 while (it.hasNext()) { 834 String trial = (String ) it.next(); 835 836 841 char c = trial.charAt(forward ? 0 : trial.length() - 1); 842 843 if (forward && c > firstChar) break; 846 if (c != firstChar) continue; 847 848 int len = matchRest(text, offset[0], limit, trial); 849 850 if (incremental) { 851 int maxLen = forward ? limit-offset[0] : offset[0]-limit; 852 if (len == maxLen) { 853 return U_PARTIAL_MATCH; 855 } 856 } 857 858 if (len == trial.length()) { 859 if (len > highWaterLength) { 861 highWaterLength = len; 862 } 863 if (forward && len < highWaterLength) { 866 break; 867 } 868 continue; 869 } 870 } 871 872 if (highWaterLength != 0) { 875 offset[0] += forward ? highWaterLength : -highWaterLength; 876 return U_MATCH; 877 } 878 } 879 return super.matches(text, offset, limit, incremental); 880 } 881 } 882 883 904 private static int matchRest (Replaceable text, int start, int limit, String s) { 905 int maxLen; 906 int slen = s.length(); 907 if (start < limit) { 908 maxLen = limit - start; 909 if (maxLen > slen) maxLen = slen; 910 for (int i = 1; i < maxLen; ++i) { 911 if (text.charAt(start + i) != s.charAt(i)) return 0; 912 } 913 } else { 914 maxLen = start - limit; 915 if (maxLen > slen) maxLen = slen; 916 --slen; for (int i = 1; i < maxLen; ++i) { 918 if (text.charAt(start - i) != s.charAt(slen - i)) return 0; 919 } 920 } 921 return maxLen; 922 } 923 924 963 970 public void addMatchSetTo(UnicodeSet toUnionTo) { 971 toUnionTo.addAll(this); 972 } 973 974 982 public int indexOf(int c) { 983 if (c < MIN_VALUE || c > MAX_VALUE) { 984 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(c, 6)); 985 } 986 int i = 0; 987 int n = 0; 988 for (;;) { 989 int start = list[i++]; 990 if (c < start) { 991 return -1; 992 } 993 int limit = list[i++]; 994 if (c < limit) { 995 return n + c - start; 996 } 997 n += limit - start; 998 } 999 } 1000 1001 1010 public int charAt(int index) { 1011 if (index >= 0) { 1012 int len2 = len & ~1; 1016 for (int i=0; i < len2;) { 1017 int start = list[i++]; 1018 int count = list[i++] - start; 1019 if (index < count) { 1020 return start + index; 1021 } 1022 index -= count; 1023 } 1024 } 1025 return -1; 1026 } 1027 1028 1040 public UnicodeSet add(int start, int end) { 1041 checkFrozen(); 1042 return add_unchecked(start, end); 1043 } 1044 1045 private UnicodeSet add_unchecked(int start, int end) { 1047 if (start < MIN_VALUE || start > MAX_VALUE) { 1048 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(start, 6)); 1049 } 1050 if (end < MIN_VALUE || end > MAX_VALUE) { 1051 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(end, 6)); 1052 } 1053 if (start < end) { 1054 add(range(start, end), 2, 0); 1055 } else if (start == end) { 1056 add(start); 1057 } 1058 return this; 1059 } 1060 1061 1080 1086 public final UnicodeSet add(int c) { 1087 checkFrozen(); 1088 return add_unchecked(c); 1089 } 1090 1091 private final UnicodeSet add_unchecked(int c) { 1093 if (c < MIN_VALUE || c > MAX_VALUE) { 1094 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(c, 6)); 1095 } 1096 1097 int i = findCodePoint(c); 1101 1102 if ((i & 1) != 0) return this; 1104 1105 1108 1111 1115 1117 if (c == list[i]-1) { 1118 list[i] = c; 1120 if (c == MAX_VALUE) { 1122 ensureCapacity(len+1); 1123 list[len++] = HIGH; 1124 } 1125 if (i > 0 && c == list[i-1]) { 1126 1128 System.arraycopy(list, i+1, list, i-1, len-i-1); 1132 len -= 2; 1133 } 1134 } 1135 1136 else if (i > 0 && c == list[i-1]) { 1137 list[i-1]++; 1139 } 1141 1142 else { 1143 1146 1147 1151 1155 if (len+2 > list.length) { 1159 int[] temp = new int[len + 2 + GROW_EXTRA]; 1160 if (i != 0) System.arraycopy(list, 0, temp, 0, i); 1161 System.arraycopy(list, i, temp, i+2, len-i); 1162 list = temp; 1163 } else { 1164 System.arraycopy(list, i, list, i+2, len-i); 1165 } 1166 1167 list[i] = c; 1168 list[i+1] = c+1; 1169 len += 2; 1170 } 1171 1172 pat = null; 1173 return this; 1174 } 1175 1176 1186 public final UnicodeSet add(String s) { 1187 checkFrozen(); 1188 int cp = getSingleCP(s); 1189 if (cp < 0) { 1190 strings.add(s); 1191 pat = null; 1192 } else { 1193 add_unchecked(cp, cp); 1194 } 1195 return this; 1196 } 1197 1198 1203 private static int getSingleCP(String s) { 1204 if (s.length() < 1) { 1205 throw new IllegalArgumentException ("Can't use zero-length strings in UnicodeSet"); 1206 } 1207 if (s.length() > 2) return -1; 1208 if (s.length() == 1) return s.charAt(0); 1209 1210 int cp = UTF16.charAt(s, 0); 1212 if (cp > 0xFFFF) { return cp; 1214 } 1215 return -1; 1216 } 1217 1218 1225 public final UnicodeSet addAll(String s) { 1226 checkFrozen(); 1227 int cp; 1228 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 1229 cp = UTF16.charAt(s, i); 1230 add_unchecked(cp, cp); 1231 } 1232 return this; 1233 } 1234 1235 1242 public final UnicodeSet retainAll(String s) { 1243 return retainAll(fromAll(s)); 1244 } 1245 1246 1253 public final UnicodeSet complementAll(String s) { 1254 return complementAll(fromAll(s)); 1255 } 1256 1257 1264 public final UnicodeSet removeAll(String s) { 1265 return removeAll(fromAll(s)); 1266 } 1267 1268 1275 public static UnicodeSet from(String s) { 1276 return new UnicodeSet().add(s); 1277 } 1278 1279 1280 1286 public static UnicodeSet fromAll(String s) { 1287 return new UnicodeSet().addAll(s); 1288 } 1289 1290 1291 1302 public UnicodeSet retain(int start, int end) { 1303 checkFrozen(); 1304 if (start < MIN_VALUE || start > MAX_VALUE) { 1305 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(start, 6)); 1306 } 1307 if (end < MIN_VALUE || end > MAX_VALUE) { 1308 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(end, 6)); 1309 } 1310 if (start <= end) { 1311 retain(range(start, end), 2, 0); 1312 } else { 1313 clear(); 1314 } 1315 return this; 1316 } 1317 1318 1326 public final UnicodeSet retain(int c) { 1327 return retain(c, c); 1328 } 1329 1330 1338 public final UnicodeSet retain(String s) { 1339 int cp = getSingleCP(s); 1340 if (cp < 0) { 1341 boolean isIn = strings.contains(s); 1342 if (isIn && size() == 1) { 1343 return this; 1344 } 1345 clear(); 1346 strings.add(s); 1347 pat = null; 1348 } else { 1349 retain(cp, cp); 1350 } 1351 return this; 1352 } 1353 1354 1366 public UnicodeSet remove(int start, int end) { 1367 checkFrozen(); 1368 if (start < MIN_VALUE || start > MAX_VALUE) { 1369 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(start, 6)); 1370 } 1371 if (end < MIN_VALUE || end > MAX_VALUE) { 1372 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(end, 6)); 1373 } 1374 if (start <= end) { 1375 retain(range(start, end), 2, 2); 1376 } 1377 return this; 1378 } 1379 1380 1388 public final UnicodeSet remove(int c) { 1389 return remove(c, c); 1390 } 1391 1392 1400 public final UnicodeSet remove(String s) { 1401 int cp = getSingleCP(s); 1402 if (cp < 0) { 1403 strings.remove(s); 1404 pat = null; 1405 } else { 1406 remove(cp, cp); 1407 } 1408 return this; 1409 } 1410 1411 1423 public UnicodeSet complement(int start, int end) { 1424 checkFrozen(); 1425 if (start < MIN_VALUE || start > MAX_VALUE) { 1426 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(start, 6)); 1427 } 1428 if (end < MIN_VALUE || end > MAX_VALUE) { 1429 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(end, 6)); 1430 } 1431 if (start <= end) { 1432 xor(range(start, end), 2, 0); 1433 } 1434 pat = null; 1435 return this; 1436 } 1437 1438 1444 public final UnicodeSet complement(int c) { 1445 return complement(c, c); 1446 } 1447 1448 1453 public UnicodeSet complement() { 1454 checkFrozen(); 1455 if (list[0] == LOW) { 1456 System.arraycopy(list, 1, list, 0, len-1); 1457 --len; 1458 } else { 1459 ensureCapacity(len+1); 1460 System.arraycopy(list, 0, list, 1, len); 1461 list[0] = LOW; 1462 ++len; 1463 } 1464 pat = null; 1465 return this; 1466 } 1467 1468 1477 public final UnicodeSet complement(String s) { 1478 checkFrozen(); 1479 int cp = getSingleCP(s); 1480 if (cp < 0) { 1481 if (strings.contains(s)) strings.remove(s); 1482 else strings.add(s); 1483 pat = null; 1484 } else { 1485 complement(cp, cp); 1486 } 1487 return this; 1488 } 1489 1490 1496 public boolean contains(int c) { 1497 if (c < MIN_VALUE || c > MAX_VALUE) { 1498 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(c, 6)); 1499 } 1500 1501 1509 1510 int i = findCodePoint(c); 1511 1512 return ((i & 1) != 0); } 1514 1515 1524 private final int findCodePoint(int c) { 1525 1534 1535 if (c < list[0]) return 0; 1538 if (len >= 2 && c >= list[len-2]) return len-1; 1541 int lo = 0; 1542 int hi = len - 1; 1543 for (;;) { 1546 int i = (lo + hi) >>> 1; 1547 if (i == lo) return hi; 1548 if (c < list[i]) { 1549 hi = i; 1550 } else { 1551 lo = i; 1552 } 1553 } 1554 } 1555 1556 1671 1679 public boolean contains(int start, int end) { 1680 if (start < MIN_VALUE || start > MAX_VALUE) { 1681 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(start, 6)); 1682 } 1683 if (end < MIN_VALUE || end > MAX_VALUE) { 1684 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(end, 6)); 1685 } 1686 int i = findCodePoint(start); 1691 return ((i & 1) != 0 && end < list[i]); 1692 } 1693 1694 1701 public final boolean contains(String s) { 1702 1703 int cp = getSingleCP(s); 1704 if (cp < 0) { 1705 return strings.contains(s); 1706 } else { 1707 return contains(cp); 1708 } 1709 } 1710 1711 1718 public boolean containsAll(UnicodeSet c) { 1719 int n = c.getRangeCount(); 1723 for (int i=0; i<n; ++i) { 1724 if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) { 1725 return false; 1726 } 1727 } 1728 if (!strings.containsAll(c.strings)) return false; 1729 return true; 1730 } 1731 1732 1741 public boolean containsAll(String s) { 1742 int cp; 1743 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 1744 cp = UTF16.charAt(s, i); 1745 if (!contains(cp)) { 1746 if (strings.size() == 0) { 1747 return false; 1748 } 1749 return containsAll(s, 0); 1750 } 1751 } 1752 return true; 1753 } 1754 1755 1761 private boolean containsAll(String s, int i) { 1762 if (i >= s.length()) { 1763 return true; 1764 } 1765 int cp= UTF16.charAt(s, i); 1766 if (contains(cp) && containsAll(s, i+UTF16.getCharCount(cp))) { 1767 return true; 1768 } 1769 1770 Iterator it = strings.iterator(); 1771 while (it.hasNext()) { 1772 String setStr = (String )it.next(); 1773 if (s.startsWith(setStr, i) && containsAll(s, i+setStr.length())) { 1774 return true; 1775 } 1776 } 1777 return false; 1778 1779 } 1780 1781 1786 public String getRegexEquivalent() { 1787 if (strings.size() == 0) return toString(); 1788 StringBuffer result = new StringBuffer ("(?:"); 1789 _generatePattern(result, true, false); 1790 Iterator it = strings.iterator(); 1791 while (it.hasNext()) { 1792 result.append('|'); 1793 _appendToPat(result, (String ) it.next(), true); 1794 } 1795 return result.append(")").toString(); 1796 } 1797 1798 1806 public boolean containsNone(int start, int end) { 1807 if (start < MIN_VALUE || start > MAX_VALUE) { 1808 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(start, 6)); 1809 } 1810 if (end < MIN_VALUE || end > MAX_VALUE) { 1811 throw new IllegalArgumentException ("Invalid code point U+" + Utility.hex(end, 6)); 1812 } 1813 int i = -1; 1814 while (true) { 1815 if (start < list[++i]) break; 1816 } 1817 return ((i & 1) == 0 && end < list[i]); 1818 } 1819 1820 1829 public boolean containsNone(UnicodeSet c) { 1830 int n = c.getRangeCount(); 1834 for (int i=0; i<n; ++i) { 1835 if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) { 1836 return false; 1837 } 1838 } 1839 if (!SortedSetRelation.hasRelation(strings, SortedSetRelation.DISJOINT, c.strings)) return false; 1840 return true; 1841 } 1842 1843 1850 public boolean containsNone(String s) { 1851 int cp; 1852 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 1853 cp = UTF16.charAt(s, i); 1854 if (contains(cp)) return false; 1855 } 1856 if (strings.size() == 0) return true; 1857 for (Iterator it = strings.iterator(); it.hasNext();) { 1859 String item = (String )it.next(); 1860 if (s.indexOf(item) >= 0) return false; 1861 } 1862 return true; 1863 } 1864 1865 1873 public final boolean containsSome(int start, int end) { 1874 return !containsNone(start, end); 1875 } 1876 1877 1884 public final boolean containsSome(UnicodeSet s) { 1885 return !containsNone(s); 1886 } 1887 1888 1895 public final boolean containsSome(String s) { 1896 return !containsNone(s); 1897 } 1898 1899 1900 1910 public UnicodeSet addAll(UnicodeSet c) { 1911 checkFrozen(); 1912 add(c.list, c.len, 0); 1913 strings.addAll(c.strings); 1914 return this; 1915 } 1916 1917 1927 public UnicodeSet retainAll(UnicodeSet c) { 1928 checkFrozen(); 1929 retain(c.list, c.len, 0); 1930 strings.retainAll(c.strings); 1931 return this; 1932 } 1933 1934 1944 public UnicodeSet removeAll(UnicodeSet c) { 1945 checkFrozen(); 1946 retain(c.list, c.len, 2); 1947 strings.removeAll(c.strings); 1948 return this; 1949 } 1950 1951 1960 public UnicodeSet complementAll(UnicodeSet c) { 1961 checkFrozen(); 1962 xor(c.list, c.len, 0); 1963 SortedSetRelation.doOperation(strings, SortedSetRelation.COMPLEMENTALL, c.strings); 1964 return this; 1965 } 1966 1967 1972 public UnicodeSet clear() { 1973 checkFrozen(); 1974 list[0] = HIGH; 1975 len = 1; 1976 pat = null; 1977 strings.clear(); 1978 return this; 1979 } 1980 1981 1988 public int getRangeCount() { 1989 return len/2; 1990 } 1991 1992 2001 public int getRangeStart(int index) { 2002 return list[index*2]; 2003 } 2004 2005 2014 public int getRangeEnd(int index) { 2015 return (list[index*2 + 1] - 1); 2016 } 2017 2018 2023 public UnicodeSet compact() { 2024 checkFrozen(); 2025 if (len != list.length) { 2026 int[] temp = new int[len]; 2027 System.arraycopy(list, 0, temp, 0, len); 2028 list = temp; 2029 } 2030 rangeList = null; 2031 buffer = null; 2032 return this; 2033 } 2034 2035 2046 public boolean equals(Object o) { 2047 try { 2048 UnicodeSet that = (UnicodeSet) o; 2049 if (len != that.len) return false; 2050 for (int i = 0; i < len; ++i) { 2051 if (list[i] != that.list[i]) return false; 2052 } 2053 if (!strings.equals(that.strings)) return false; 2054 } catch (Exception e) { 2055 return false; 2056 } 2057 return true; 2058 } 2059 2060 2067 public int hashCode() { 2068 int result = len; 2069 for (int i = 0; i < len; ++i) { 2070 result *= 1000003; 2071 result += list[i]; 2072 } 2073 return result; 2074 } 2075 2076 2080 public String toString() { 2081 return toPattern(true); 2082 } 2083 2084 2088 2110 UnicodeSet applyPattern(String pattern, 2111 ParsePosition pos, 2112 SymbolTable symbols, 2113 int options) { 2114 2115 boolean parsePositionWasNull = pos == null; 2118 if (parsePositionWasNull) { 2119 pos = new ParsePosition(0); 2120 } 2121 2122 StringBuffer rebuiltPat = new StringBuffer (); 2123 RuleCharacterIterator chars = 2124 new RuleCharacterIterator(pattern, symbols, pos); 2125 applyPattern(chars, symbols, rebuiltPat, options); 2126 if (chars.inVariable()) { 2127 syntaxError(chars, "Extra chars in variable value"); 2128 } 2129 pat = rebuiltPat.toString(); 2130 if (parsePositionWasNull) { 2131 int i = pos.getIndex(); 2132 2133 if ((options & IGNORE_SPACE) != 0) { 2135 i = Utility.skipWhitespace(pattern, i); 2136 } 2137 2138 if (i != pattern.length()) { 2139 throw new IllegalArgumentException ("Parse of \"" + pattern + 2140 "\" failed at " + i); 2141 } 2142 } 2143 return this; 2144 } 2145 2146 2160 void applyPattern(RuleCharacterIterator chars, SymbolTable symbols, 2161 StringBuffer rebuiltPat, int options) { 2162 2163 2165 2167 int opts = RuleCharacterIterator.PARSE_VARIABLES | 2168 RuleCharacterIterator.PARSE_ESCAPES; 2169 if ((options & IGNORE_SPACE) != 0) { 2170 opts |= RuleCharacterIterator.SKIP_WHITESPACE; 2171 } 2172 2173 StringBuffer pat = new StringBuffer (), buf = null; 2174 boolean usePat = false; 2175 UnicodeSet scratch = null; 2176 Object backup = null; 2177 2178 int lastItem = 0, lastChar = 0, mode = 0; 2181 char op = 0; 2182 2183 boolean invert = false; 2184 2185 clear(); 2186 2187 while (mode != 2 && !chars.atEnd()) { 2188 if (false) { 2189 if (!((lastItem == 0 && op == 0) || 2191 (lastItem == 1 && (op == 0 || op == '-')) || 2192 (lastItem == 2 && (op == 0 || op == '-' || op == '&')))) { 2193 throw new IllegalArgumentException (); 2194 } 2195 } 2196 2197 int c = 0; 2198 boolean literal = false; 2199 UnicodeSet nested = null; 2200 2201 2203 int setMode = 0; 2205 if (resemblesPropertyPattern(chars, opts)) { 2206 setMode = 2; 2207 } 2208 2209 2217 else { 2218 backup = chars.getPos(backup); 2220 c = chars.next(opts); 2221 literal = chars.isEscaped(); 2222 2223 if (c == '[' && !literal) { 2224 if (mode == 1) { 2225 chars.setPos(backup); setMode = 1; 2227 } else { 2228 mode = 1; 2230 pat.append('['); 2231 backup = chars.getPos(backup); c = chars.next(opts); 2233 literal = chars.isEscaped(); 2234 if (c == '^' && !literal) { 2235 invert = true; 2236 pat.append('^'); 2237 backup = chars.getPos(backup); c = chars.next(opts); 2239 literal = chars.isEscaped(); 2240 } 2241 if (c == '-') { 2244 literal = true; 2245 } else { 2247 chars.setPos(backup); continue; 2249 } 2250 } 2251 } else if (symbols != null) { 2252 UnicodeMatcher m = symbols.lookupMatcher(c); if (m != null) { 2254 try { 2255 nested = (UnicodeSet) m; 2256 setMode = 3; 2257 } catch (ClassCastException e) { 2258 syntaxError(chars, "Syntax error"); 2259 } 2260 } 2261 } 2262 } 2263 2264 2269 if (setMode != 0) { 2270 if (lastItem == 1) { 2271 if (op != 0) { 2272 syntaxError(chars, "Char expected after operator"); 2273 } 2274 add_unchecked(lastChar, lastChar); 2275 _appendToPat(pat, lastChar, false); 2276 lastItem = op = 0; 2277 } 2278 2279 if (op == '-' || op == '&') { 2280 pat.append(op); 2281 } 2282 2283 if (nested == null) { 2284 if (scratch == null) scratch = new UnicodeSet(); 2285 nested = scratch; 2286 } 2287 switch (setMode) { 2288 case 1: 2289 nested.applyPattern(chars, symbols, pat, options); 2290 break; 2291 case 2: 2292 chars.skipIgnored(opts); 2293 nested.applyPropertyPattern(chars, pat, symbols); 2294 break; 2295 case 3: nested._toPattern(pat, false); 2297 break; 2298 } 2299 2300 usePat = true; 2301 2302 if (mode == 0) { 2303 set(nested); 2305 mode = 2; 2306 break; 2307 } 2308 2309 switch (op) { 2310 case '-': 2311 removeAll(nested); 2312 break; 2313 case '&': 2314 retainAll(nested); 2315 break; 2316 case 0: 2317 addAll(nested); 2318 break; 2319 } 2320 2321 op = 0; 2322 lastItem = 2; 2323 2324 continue; 2325 } 2326 2327 if (mode == 0) { 2328 syntaxError(chars, "Missing '['"); 2329 } 2330 2331 2335 if (!literal) { 2336 switch (c) { 2337 case ']': 2338 if (lastItem == 1) { 2339 add_unchecked(lastChar, lastChar); 2340 _appendToPat(pat, lastChar, false); 2341 } 2342 if (op == '-') { 2344 add_unchecked(op, op); 2345 pat.append(op); 2346 } else if (op == '&') { 2347 syntaxError(chars, "Trailing '&'"); 2348 } 2349 pat.append(']'); 2350 mode = 2; 2351 continue; 2352 case '-': 2353 if (op == 0) { 2354 if (lastItem != 0) { 2355 op = (char) c; 2356 continue; 2357 } else { 2358 add_unchecked(c, c); 2360 c = chars.next(opts); 2361 literal = chars.isEscaped(); 2362 if (c == ']' && !literal) { 2363 pat.append("-]"); 2364 mode = 2; 2365 continue; 2366 } 2367 } 2368 } 2369 syntaxError(chars, "'-' not after char or set"); 2370 case '&': 2371 if (lastItem == 2 && op == 0) { 2372 op = (char) c; 2373 continue; 2374 } 2375 syntaxError(chars, "'&' not after set"); 2376 case '^': 2377 syntaxError(chars, "'^' not after '['"); 2378 case '{': 2379 if (op != 0) { 2380 syntaxError(chars, "Missing operand after operator"); 2381 } 2382 if (lastItem == 1) { 2383 add_unchecked(lastChar, lastChar); 2384 _appendToPat(pat, lastChar, false); 2385 } 2386 lastItem = 0; 2387 if (buf == null) { 2388 buf = new StringBuffer (); 2389 } else { 2390 buf.setLength(0); 2391 } 2392 boolean ok = false; 2393 while (!chars.atEnd()) { 2394 c = chars.next(opts); 2395 literal = chars.isEscaped(); 2396 if (c == '}' && !literal) { 2397 ok = true; 2398 break; 2399 } 2400 UTF16.append(buf, c); 2401 } 2402 if (buf.length() < 1 || !ok) { 2403 syntaxError(chars, "Invalid multicharacter string"); 2404 } 2405 add(buf.toString()); 2409 pat.append('{'); 2410 _appendToPat(pat, buf.toString(), false); 2411 pat.append('}'); 2412 continue; 2413 case SymbolTable.SYMBOL_REF: 2414 backup = chars.getPos(backup); 2421 c = chars.next(opts); 2422 literal = chars.isEscaped(); 2423 boolean anchor = (c == ']' && !literal); 2424 if (symbols == null && !anchor) { 2425 c = SymbolTable.SYMBOL_REF; 2426 chars.setPos(backup); 2427 break; } 2429 if (anchor && op == 0) { 2430 if (lastItem == 1) { 2431 add_unchecked(lastChar, lastChar); 2432 _appendToPat(pat, lastChar, false); 2433 } 2434 add_unchecked(UnicodeMatcher.ETHER); 2435 usePat = true; 2436 pat.append(SymbolTable.SYMBOL_REF).append(']'); 2437 mode = 2; 2438 continue; 2439 } 2440 syntaxError(chars, "Unquoted '$'"); 2441 default: 2442 break; 2443 } 2444 } 2445 2446 2450 switch (lastItem) { 2451 case 0: 2452 lastItem = 1; 2453 lastChar = c; 2454 break; 2455 case 1: 2456 if (op == '-') { 2457 if (lastChar >= c) { 2458 syntaxError(chars, "Invalid range"); 2461 } 2462 add_unchecked(lastChar, c); 2463 _appendToPat(pat, lastChar, false); 2464 pat.append(op); 2465 _appendToPat(pat, c, false); 2466 lastItem = op = 0; 2467 } else { 2468 add_unchecked(lastChar, lastChar); 2469 _appendToPat(pat, lastChar, false); 2470 lastChar = c; 2471 } 2472 break; 2473 case 2: 2474 if (op != 0) { 2475 syntaxError(chars, "Set expected after operator"); 2476 } 2477 lastChar = c; 2478 lastItem = 1; 2479 break; 2480 } 2481 } 2482 2483 if (mode != 2) { 2484 syntaxError(chars, "Missing ']'"); 2485 } 2486 2487 chars.skipIgnored(opts); 2488 2489 2495 if ((options & CASE) != 0) { 2496 closeOver(CASE); 2497 } 2498 if (invert) { 2499 complement(); 2500 } 2501 2502 if (usePat) { 2505 rebuiltPat.append(pat.toString()); 2506 } else { 2507 _generatePattern(rebuiltPat, false, true); 2508 } 2509 } 2510 2511 private static void syntaxError(RuleCharacterIterator chars, String msg) { 2512 throw new IllegalArgumentException ("Error: " + msg + " at \"" + 2513 Utility.escape(chars.toString()) + 2514 '"'); 2515 } 2516 2517 2522 public void addAllTo(Collection target) { 2523 UnicodeSetIterator it = new UnicodeSetIterator(this); 2524 while (it.next()) { 2525 target.add(it.getString()); 2526 } 2527 } 2528 2529 2534 public void addAll(Collection source) { 2535 checkFrozen(); 2536 Iterator it = source.iterator(); 2537 while (it.hasNext()) { 2538 add(it.next().toString()); 2539 } 2540 } 2541 2542 2546 private void ensureCapacity(int newLen) { 2547 if (newLen <= list.length) return; 2548 int[] temp = new int[newLen + GROW_EXTRA]; 2549 System.arraycopy(list, 0, temp, 0, len); 2550 list = temp; 2551 } 2552 2553 private void ensureBufferCapacity(int newLen) { 2554 if (buffer != null && newLen <= buffer.length) return; 2555 buffer = new int[newLen + GROW_EXTRA]; 2556 } 2557 2558 2561 private int[] range(int start, int end) { 2562 if (rangeList == null) { 2563 rangeList = new int[] { start, end+1, HIGH }; 2564 } else { 2565 rangeList[0] = start; 2566 rangeList[1] = end+1; 2567 } 2568 return rangeList; 2569 } 2570 2571 2575 2578 private UnicodeSet xor(int[] other, int otherLen, int polarity) { 2579 ensureBufferCapacity(len + otherLen); 2580 int i = 0, j = 0, k = 0; 2581 int a = list[i++]; 2582 int b; 2583 if (polarity == 1 || polarity == 2) { 2584 b = LOW; 2585 if (other[j] == LOW) { ++j; 2587 b = other[j]; 2588 } 2589 } else { 2590 b = other[j++]; 2591 } 2592 while (true) { 2595 if (a < b) { 2596 buffer[k++] = a; 2597 a = list[i++]; 2598 } else if (b < a) { 2599 buffer[k++] = b; 2600 b = other[j++]; 2601 } else if (a != HIGH) { a = list[i++]; 2604 b = other[j++]; 2605 } else { buffer[k++] = HIGH; 2607 len = k; 2608 break; 2609 } 2610 } 2611 int[] temp = list; 2613 list = buffer; 2614 buffer = temp; 2615 pat = null; 2616 return this; 2617 } 2618 2619 2624 private UnicodeSet add(int[] other, int otherLen, int polarity) { 2625 ensureBufferCapacity(len + otherLen); 2626 int i = 0, j = 0, k = 0; 2627 int a = list[i++]; 2628 int b = other[j++]; 2629 main: 2632 while (true) { 2633 switch (polarity) { 2634 case 0: if (a < b) { if (k > 0 && a <= buffer[k-1]) { 2638 a = max(list[i], buffer[--k]); 2640 } else { 2641 buffer[k++] = a; 2643 a = list[i]; 2644 } 2645 i++; polarity ^= 1; 2647 } else if (b < a) { if (k > 0 && b <= buffer[k-1]) { 2649 b = max(other[j], buffer[--k]); 2650 } else { 2651 buffer[k++] = b; 2652 b = other[j]; 2653 } 2654 j++; 2655 polarity ^= 2; 2656 } else { if (a == HIGH) break main; 2658 if (k > 0 && a <= buffer[k-1]) { 2661 a = max(list[i], buffer[--k]); 2662 } else { 2663 buffer[k++] = a; 2665 a = list[i]; 2666 } 2667 i++; 2668 polarity ^= 1; 2669 b = other[j++]; polarity ^= 2; 2670 } 2671 break; 2672 case 3: if (b <= a) { if (a == HIGH) break main; 2675 buffer[k++] = a; 2676 } else { if (b == HIGH) break main; 2678 buffer[k++] = b; 2679 } 2680 a = list[i++]; polarity ^= 1; b = other[j++]; polarity ^= 2; 2682 break; 2683 case 1: if (a < b) { buffer[k++] = a; a = list[i++]; polarity ^= 1; 2686 } else if (b < a) { b = other[j++]; polarity ^= 2; 2688 } else { if (a == HIGH) break main; 2690 a = list[i++]; polarity ^= 1; 2691 b = other[j++]; polarity ^= 2; 2692 } 2693 break; 2694 case 2: if (b < a) { buffer[k++] = b; b = other[j++]; polarity ^= 2; 2697 } else if (a < b) { a = list[i++]; polarity ^= 1; 2699 } else { if (a == HIGH) break main; 2701 a = list[i++]; polarity ^= 1; 2702 b = other[j++]; polarity ^= 2; 2703 } 2704 break; 2705 } 2706 } 2707 buffer[k++] = HIGH; len = k; 2709 int[] temp = list; 2711 list = buffer; 2712 buffer = temp; 2713 pat = null; 2714 return this; 2715 } 2716 2717 2722 private UnicodeSet retain(int[] other, int otherLen, int polarity) { 2723 ensureBufferCapacity(len + otherLen); 2724 int i = 0, j = 0, k = 0; 2725 int a = list[i++]; 2726 int b = other[j++]; 2727 main: 2730 while (true) { 2731 switch (polarity) { 2732 case 0: if (a < b) { a = list[i++]; polarity ^= 1; 2735 } else if (b < a) { b = other[j++]; polarity ^= 2; 2737 } else { if (a == HIGH) break main; 2739 buffer[k++] = a; a = list[i++]; polarity ^= 1; 2740 b = other[j++]; polarity ^= 2; 2741 } 2742 break; 2743 case 3: if (a < b) { buffer[k++] = a; a = list[i++]; polarity ^= 1; 2746 } else if (b < a) { buffer[k++] = b; b = other[j++]; polarity ^= 2; 2748 } else { if (a == HIGH) break main; 2750 buffer[k++] = a; a = list[i++]; polarity ^= 1; 2751 b = other[j++]; polarity ^= 2; 2752 } 2753 break; 2754 case 1: if (a < b) { a = list[i++]; polarity ^= 1; 2757 } else if (b < a) { buffer[k++] = b; b = other[j++]; polarity ^= 2; 2759 } else { if (a == HIGH) break main; 2761 a = list[i++]; polarity ^= 1; 2762 b = other[j++]; polarity ^= 2; 2763 } 2764 break; 2765 case 2: if (b < a) { b = other[j++]; polarity ^= 2; 2768 } else if (a < b) { buffer[k++] = a; a = list[i++]; polarity ^= 1; 2770 } else { if (a == HIGH) break main; 2772 a = list[i++]; polarity ^= 1; 2773 b = other[j++]; polarity ^= 2; 2774 } 2775 break; 2776 } 2777 } 2778 buffer[k++] = HIGH; len = k; 2780 int[] temp = list; 2782 list = buffer; 2783 buffer = temp; 2784 pat = null; 2785 return this; 2786 } 2787 2788 private static final int max(int a, int b) { 2789 return (a > b) ? a : b; 2790 } 2791 2792 2796 private static interface Filter { 2797 boolean contains(int codePoint); 2798 } 2799 2800 private static class NumericValueFilter implements Filter { 2801 double value; 2802 NumericValueFilter(double value) { this.value = value; } 2803 public boolean contains(int ch) { 2804 return UCharacter.getUnicodeNumericValue(ch) == value; 2805 } 2806 } 2807 2808 private static class GeneralCategoryMaskFilter implements Filter { 2809 int mask; 2810 GeneralCategoryMaskFilter(int mask) { this.mask = mask; } 2811 public boolean contains(int ch) { 2812 return ((1 << UCharacter.getType(ch)) & mask) != 0; 2813 } 2814 } 2815 2816 private static class IntPropertyFilter implements Filter { 2817 int prop; 2818 int value; 2819 IntPropertyFilter(int prop, int value) { 2820 this.prop = prop; 2821 this.value = value; 2822 } 2823 public boolean contains(int ch) { 2824 return UCharacter.getIntPropertyValue(ch, prop) == value; 2825 } 2826 } 2827 2828 static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0, 0, 0); 2830 2831 private static class VersionFilter implements Filter { 2832 VersionInfo version; 2833 VersionFilter(VersionInfo version) { this.version = version; } 2834 public boolean contains(int ch) { 2835 VersionInfo v = UCharacter.getAge(ch); 2836 return v != NO_VERSION && 2839 v.compareTo(version) <= 0; 2840 } 2841 } 2842 2843 private static synchronized UnicodeSet getInclusions(int src) { 2844 if (INCLUSIONS == null) { 2845 INCLUSIONS = new UnicodeSet[UCharacterProperty.SRC_COUNT]; 2846 } 2847 if(INCLUSIONS[src] == null) { 2848 UnicodeSet incl = new UnicodeSet(); 2849 switch(src) { 2850 case UCharacterProperty.SRC_CHAR: 2851 UCharacterProperty.getInstance().addPropertyStarts(incl); 2852 break; 2853 case UCharacterProperty.SRC_PROPSVEC: 2854 UCharacterProperty.getInstance().upropsvec_addPropertyStarts(incl); 2855 break; 2856 case UCharacterProperty.SRC_CHAR_AND_PROPSVEC: 2857 UCharacterProperty.getInstance().addPropertyStarts(incl); 2858 UCharacterProperty.getInstance().upropsvec_addPropertyStarts(incl); 2859 break; 2860 case UCharacterProperty.SRC_HST: 2861 UCharacterProperty.getInstance().uhst_addPropertyStarts(incl); 2862 break; 2863 case UCharacterProperty.SRC_NORM: 2864 NormalizerImpl.addPropertyStarts(incl); 2865 break; 2866 case UCharacterProperty.SRC_CASE: 2867 try { 2868 UCaseProps.getSingleton().addPropertyStarts(incl); 2869 } catch(IOException e) { 2870 throw new MissingResourceException (e.getMessage(),"",""); 2871 } 2872 break; 2873 case UCharacterProperty.SRC_BIDI: 2874 try { 2875 UBiDiProps.getSingleton().addPropertyStarts(incl); 2876 } catch(IOException e) { 2877 throw new MissingResourceException (e.getMessage(),"",""); 2878 } 2879 break; 2880 default: 2881 throw new IllegalStateException ("UnicodeSet.getInclusions(unknown src "+src+")"); 2882 } 2883 INCLUSIONS[src] = incl; 2884 } 2885 return INCLUSIONS[src]; 2886 } 2887 2888 2891 private UnicodeSet applyFilter(Filter filter, int src) { 2892 2906 clear(); 2907 2908 int startHasProperty = -1; 2909 UnicodeSet inclusions = getInclusions(src); 2910 int limitRange = inclusions.getRangeCount(); 2911 2912 for (int j=0; j<limitRange; ++j) { 2913 int start = inclusions.getRangeStart(j); 2915 int end = inclusions.getRangeEnd(j); 2916 2917 for (int ch = start; ch <= end; ++ch) { 2919 if (filter.contains(ch)) { 2922 if (startHasProperty < 0) { 2923 startHasProperty = ch; 2924 } 2925 } else if (startHasProperty >= 0) { 2926 add_unchecked(startHasProperty, ch-1); 2927 startHasProperty = -1; 2928 } 2929 } 2930 } 2931 if (startHasProperty >= 0) { 2932 add_unchecked(startHasProperty, 0x10FFFF); 2933 } 2934 2935 return this; 2936 } 2937 2938 2939 2945 private static String mungeCharName(String source) { 2946 StringBuffer buf = new StringBuffer (); 2947 for (int i=0; i<source.length(); ) { 2948 int ch = UTF16.charAt(source, i); 2949 i += UTF16.getCharCount(ch); 2950 if (UCharacterProperty.isRuleWhiteSpace(ch)) { 2951 if (buf.length() == 0 || 2952 buf.charAt(buf.length() - 1) == ' ') { 2953 continue; 2954 } 2955 ch = ' '; } 2957 UTF16.append(buf, ch); 2958 } 2959 if (buf.length() != 0 && 2960 buf.charAt(buf.length() - 1) == ' ') { 2961 buf.setLength(buf.length() - 1); 2962 } 2963 return buf.toString(); 2964 } 2965 2966 2970 2993 public UnicodeSet applyIntPropertyValue(int prop, int value) { 2994 checkFrozen(); 2995 if (prop == UProperty.GENERAL_CATEGORY_MASK) { 2996 applyFilter(new GeneralCategoryMaskFilter(value), UCharacterProperty.SRC_CHAR); 2997 } else { 2998 applyFilter(new IntPropertyFilter(prop, value), UCharacterProperty.getInstance().getSource(prop)); 2999 } 3000 return this; 3001 } 3002 3003 3004 3005 3032 public UnicodeSet applyPropertyAlias(String propertyAlias, String valueAlias) { 3033 return applyPropertyAlias(propertyAlias, valueAlias, null); 3034 } 3035 3036 3048 public UnicodeSet applyPropertyAlias(String propertyAlias, 3049 String valueAlias, SymbolTable symbols) { 3050 checkFrozen(); 3051 int p; 3052 int v; 3053 boolean mustNotBeEmpty = false, invert = false; 3054 3055 if (symbols != null 3056 && (symbols instanceof XSymbolTable) 3057 && ((XSymbolTable)symbols).applyPropertyAlias(propertyAlias, valueAlias, this)) { 3058 return this; 3059 } 3060 3061 if (valueAlias.length() > 0) { 3062 p = UCharacter.getPropertyEnum(propertyAlias); 3063 3064 if (p == UProperty.GENERAL_CATEGORY) { 3066 p = UProperty.GENERAL_CATEGORY_MASK; 3067 } 3068 3069 if ((p >= UProperty.BINARY_START && p < UProperty.BINARY_LIMIT) || 3070 (p >= UProperty.INT_START && p < UProperty.INT_LIMIT) || 3071 (p >= UProperty.MASK_START && p < UProperty.MASK_LIMIT)) { 3072 try { 3073 v = UCharacter.getPropertyValueEnum(p, valueAlias); 3074 } catch (IllegalArgumentException e) { 3075 if (p == UProperty.CANONICAL_COMBINING_CLASS || 3077 p == UProperty.LEAD_CANONICAL_COMBINING_CLASS || 3078 p == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) { 3079 v = Integer.parseInt(Utility.deleteRuleWhiteSpace(valueAlias)); 3080 mustNotBeEmpty = true; 3083 } else { 3084 throw e; 3085 } 3086 } 3087 } 3088 3089 else { 3090 3091 switch (p) { 3092 case UProperty.NUMERIC_VALUE: 3093 { 3094 double value = Double.parseDouble(Utility.deleteRuleWhiteSpace(valueAlias)); 3095 applyFilter(new NumericValueFilter(value), UCharacterProperty.SRC_CHAR); 3096 return this; 3097 } 3098 case UProperty.NAME: 3099 case UProperty.UNICODE_1_NAME: 3100 { 3101 String buf = mungeCharName(valueAlias); 3105 int ch = 3106 (p == UProperty.NAME) ? 3107 UCharacter.getCharFromExtendedName(buf) : 3108 UCharacter.getCharFromName1_0(buf); 3109 if (ch == -1) { 3110 throw new IllegalArgumentException ("Invalid character name"); 3111 } 3112 clear(); 3113 add_unchecked(ch); 3114 return this; 3115 } 3116 case UProperty.AGE: 3117 { 3118 VersionInfo version = VersionInfo.getInstance(mungeCharName(valueAlias)); 3122 applyFilter(new VersionFilter(version), UCharacterProperty.SRC_PROPSVEC); 3123 return this; 3124 } 3125 } 3126 3127 throw new IllegalArgumentException ("Unsupported property"); 3130 } 3131 } 3132 3133 else { 3134 try { 3138 p = UProperty.GENERAL_CATEGORY_MASK; 3139 v = UCharacter.getPropertyValueEnum(p, propertyAlias); 3140 } catch (IllegalArgumentException e) { 3141 try { 3142 p = UProperty.SCRIPT; 3143 v = UCharacter.getPropertyValueEnum(p, propertyAlias); 3144 } catch (IllegalArgumentException e2) { 3145 try { 3146 p = UCharacter.getPropertyEnum(propertyAlias); 3147 } catch (IllegalArgumentException e3) { 3148 p = -1; 3149 } 3150 if (p >= UProperty.BINARY_START && p < UProperty.BINARY_LIMIT) { 3151 v = 1; 3152 } else if (p == -1) { 3153 if (0 == UPropertyAliases.compare(ANY_ID, propertyAlias)) { 3154 set(MIN_VALUE, MAX_VALUE); 3155 return this; 3156 } else if (0 == UPropertyAliases.compare(ASCII_ID, propertyAlias)) { 3157 set(0, 0x7F); 3158 return this; 3159 } else if (0 == UPropertyAliases.compare(ASSIGNED, propertyAlias)) { 3160 p = UProperty.GENERAL_CATEGORY_MASK; 3162 v = (1<<UCharacter.UNASSIGNED); 3163 invert = true; 3164 } else { 3165 throw new IllegalArgumentException ("Invalid property alias: " + propertyAlias + "=" + valueAlias); 3167 } 3168 } else { 3169 throw new IllegalArgumentException ("Missing property value"); 3172 } 3173 } 3174 } 3175 } 3176 3177 applyIntPropertyValue(p, v); 3178 if(invert) { 3179 complement(); 3180 } 3181 3182 if (mustNotBeEmpty && isEmpty()) { 3183 throw new IllegalArgumentException ("Invalid property value"); 3186 } 3187 3188 return this; 3189 } 3190 3191 3195 3199 private static boolean resemblesPropertyPattern(String pattern, int pos) { 3200 if ((pos+5) > pattern.length()) { 3202 return false; 3203 } 3204 3205 return pattern.regionMatches(pos, "[:", 0, 2) || 3207 pattern.regionMatches(true, pos, "\\p", 0, 2) || 3208 pattern.regionMatches(pos, "\\N", 0, 2); 3209 } 3210 3211 3219 private static boolean resemblesPropertyPattern(RuleCharacterIterator chars, 3220 int iterOpts) { 3221 boolean result = false; 3222 iterOpts &= ~RuleCharacterIterator.PARSE_ESCAPES; 3223 Object pos = chars.getPos(null); 3224 int c = chars.next(iterOpts); 3225 if (c == '[' || c == '\\') { 3226 int d = chars.next(iterOpts & ~RuleCharacterIterator.SKIP_WHITESPACE); 3227 result = (c == '[') ? (d == ':') : 3228 (d == 'N' || d == 'p' || d == 'P'); 3229 } 3230 chars.setPos(pos); 3231 return result; 3232 } 3233 3234 3238 private UnicodeSet applyPropertyPattern(String pattern, ParsePosition ppos, SymbolTable symbols) { 3239 int pos = ppos.getIndex(); 3240 3241 3243 if ((pos+5) > pattern.length()) { 3245 return null; 3246 } 3247 3248 boolean posix = false; boolean isName = false; boolean invert = false; 3251 3252 if (pattern.regionMatches(pos, "[:", 0, 2)) { 3254 posix = true; 3255 pos = Utility.skipWhitespace(pattern, pos+2); 3256 if (pos < pattern.length() && pattern.charAt(pos) == '^') { 3257 ++pos; 3258 invert = true; 3259 } 3260 } else if (pattern.regionMatches(true, pos, "\\p", 0, 2) || 3261 pattern.regionMatches(pos, "\\N", 0, 2)) { 3262 char c = pattern.charAt(pos+1); 3263 invert = (c == 'P'); 3264 isName = (c == 'N'); 3265 pos = Utility.skipWhitespace(pattern, pos+2); 3266 if (pos == pattern.length() || pattern.charAt(pos++) != '{') { 3267 return null; 3269 } 3270 } else { 3271 return null; 3273 } 3274 3275 int close = pattern.indexOf(posix ? ":]" : "}", pos); 3277 if (close < 0) { 3278 return null; 3280 } 3281 3282 int equals = pattern.indexOf('=', pos); 3286 String propName, valueName; 3287 if (equals >= 0 && equals < close && !isName) { 3288 propName = pattern.substring(pos, equals); 3290 valueName = pattern.substring(equals+1, close); 3291 } 3292 3293 else { 3294 propName = pattern.substring(pos, close); 3296 valueName = ""; 3297 3298 if (isName) { 3300 valueName = propName; 3306 propName = "na"; 3307 } 3308 } 3309 3310 applyPropertyAlias(propName, valueName, symbols); 3311 3312 if (invert) { 3313 complement(); 3314 } 3315 3316 ppos.setIndex(close + (posix ? 2 : 1)); 3318 3319 return this; 3320 } 3321 3322 3332 private void applyPropertyPattern(RuleCharacterIterator chars, 3333 StringBuffer rebuiltPat, SymbolTable symbols) { 3334 String pat = chars.lookahead(); 3335 ParsePosition pos = new ParsePosition(0); 3336 applyPropertyPattern(pat, pos, symbols); 3337 if (pos.getIndex() == 0) { 3338 syntaxError(chars, "Invalid property pattern"); 3339 } 3340 chars.jumpahead(pos.getIndex()); 3341 rebuiltPat.append(pat.substring(0, pos.getIndex())); 3342 } 3343 3344 3348 3357 public static final int IGNORE_SPACE = 1; 3358 3359 3390 public static final int CASE = 2; 3391 3392 3399 public static final int CASE_INSENSITIVE = 2; 3400 3401 3414 public static final int ADD_CASE_MAPPINGS = 4; 3415 3416 private static final void addCaseMapping(UnicodeSet set, int result, StringBuffer full) { 3419 if(result >= 0) { 3420 if(result > UCaseProps.MAX_STRING_LENGTH) { 3421 set.add(result); 3423 } else { 3424 set.add(full.toString()); 3426 full.setLength(0); 3427 } 3428 } 3429 } 3432 3433 3458 public UnicodeSet closeOver(int attribute) { 3459 checkFrozen(); 3460 if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) { 3461 UCaseProps csp; 3462 try { 3463 csp = UCaseProps.getSingleton(); 3464 } catch(IOException e) { 3465 return this; 3466 } 3467 UnicodeSet foldSet = new UnicodeSet(this); 3468 ULocale root = ULocale.ROOT; 3469 3470 if((attribute & CASE) != 0) { 3474 foldSet.strings.clear(); 3475 } 3476 3477 int n = getRangeCount(); 3478 int result; 3479 StringBuffer full = new StringBuffer (); 3480 int locCache[] = new int[1]; 3481 3482 for (int i=0; i<n; ++i) { 3483 int start = getRangeStart(i); 3484 int end = getRangeEnd(i); 3485 3486 if((attribute & CASE) != 0) { 3487 for (int cp=start; cp<=end; ++cp) { 3489 csp.addCaseClosure(cp, foldSet); 3490 } 3491 } else { 3492 for (int cp=start; cp<=end; ++cp) { 3495 result = csp.toFullLower(cp, null, full, root, locCache); 3496 addCaseMapping(foldSet, result, full); 3497 3498 result = csp.toFullTitle(cp, null, full, root, locCache); 3499 addCaseMapping(foldSet, result, full); 3500 3501 result = csp.toFullUpper(cp, null, full, root, locCache); 3502 addCaseMapping(foldSet, result, full); 3503 3504 result = csp.toFullFolding(cp, full, 0); 3505 addCaseMapping(foldSet, result, full); 3506 } 3507 } 3508 } 3509 if (!strings.isEmpty()) { 3510 String str; 3511 if ((attribute & CASE) != 0) { 3512 Iterator it = strings.iterator(); 3513 while (it.hasNext()) { 3514 str = UCharacter.foldCase((String )it.next(), 0); 3515 if(!csp.addStringCaseClosure(str, foldSet)) { 3516 foldSet.add(str); } 3518 } 3519 } else { 3520 BreakIterator bi = BreakIterator.getWordInstance(root); 3521 Iterator it = strings.iterator(); 3522 while (it.hasNext()) { 3523 str = (String )it.next(); 3524 foldSet.add(UCharacter.toLowerCase(root, str)); 3525 foldSet.add(UCharacter.toTitleCase(root, str, bi)); 3526 foldSet.add(UCharacter.toUpperCase(root, str)); 3527 foldSet.add(UCharacter.foldCase(str, 0)); 3528 } 3529 } 3530 } 3531 set(foldSet); 3532 } 3533 return this; 3534 } 3535 3536 3543 abstract public static class XSymbolTable implements SymbolTable { 3544 3549 public XSymbolTable(){} 3550 3554 public UnicodeMatcher lookupMatcher(int i) { 3555 return null; 3556 } 3557 3561 public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) { 3562 return false; 3563 } 3564 3568 public char[] lookup(String s) { 3569 return null; 3570 } 3571 3575 public String parseReference(String text, ParsePosition pos, int limit) { 3576 return null; 3577 } 3578 } 3579 3580 private boolean frozen; 3581 3582 3588 public boolean isFrozen() { 3589 return frozen; 3590 } 3591 3592 3598 public Object freeze() { 3599 frozen = true; 3600 return this; 3601 } 3602 3603 3609 public Object cloneAsThawed() { 3610 UnicodeSet result = (UnicodeSet) clone(); 3611 result.frozen = false; 3612 return result; 3613 } 3614 3615 private void checkFrozen() { 3617 if (frozen) { 3618 throw new UnsupportedOperationException ("Attempt to modify frozen object"); 3619 } 3620 } 3621} 3622 | Popular Tags |