| 1 7 8 package java.util.regex; 9 10 import java.security.AccessController ; 11 import java.security.PrivilegedAction ; 12 import java.text.CharacterIterator ; 13 import sun.text.Normalizer; 14 import java.util.ArrayList ; 15 import java.util.HashMap ; 16 17 18 594 595 public final class Pattern 596 implements java.io.Serializable  597 { 598 599 611 612 621 public static final int UNIX_LINES = 0x01; 622 623 636 public static final int CASE_INSENSITIVE = 0x02; 637 638 647 public static final int COMMENTS = 0x04; 648 649 660 public static final int MULTILINE = 0x08; 661 662 676 public static final int LITERAL = 0x10; 677 678 689 public static final int DOTALL = 0x20; 690 691 705 public static final int UNICODE_CASE = 0x40; 706 707 721 public static final int CANON_EQ = 0x80; 722 723 727 728 729 private static final long serialVersionUID = 5073258162644648461L; 730 731 736 private String pattern; 737 738 743 private int flags; 744 745 749 private transient volatile boolean compiled = false; 750 751 754 private transient String normalizedPattern; 755 756 760 transient Node root; 761 762 767 transient Node matchRoot; 768 769 772 transient int[] buffer; 773 774 777 transient GroupHead[] groupNodes; 778 779 782 private transient int[] temp; 783 784 788 transient int capturingGroupCount; 789 790 794 transient int localCount; 795 796 800 private transient int cursor; 801 802 805 private transient int patternLength; 806 807 816 public static Pattern compile(String regex) { 817 return new Pattern (regex, 0); 818 } 819 820 839 public static Pattern compile(String regex, int flags) { 840 return new Pattern (regex, flags); 841 } 842 843 849 public String pattern() { 850 return pattern; 851 } 852 853 861 public String toString() { 862 return pattern; 863 } 864 865 874 public Matcher matcher(CharSequence input) { 875 synchronized(this) { 876 if (!compiled) 877 compile(); 878 } 879 Matcher m = new Matcher (this, input); 880 return m; 881 } 882 883 888 public int flags() { 889 return flags; 890 } 891 892 918 public static boolean matches(String regex, CharSequence input) { 919 Pattern p = Pattern.compile(regex); 920 Matcher m = p.matcher(input); 921 return m.matches(); 922 } 923 924 984 public String [] split(CharSequence input, int limit) { 985 int index = 0; 986 boolean matchLimited = limit > 0; 987 ArrayList matchList = new ArrayList (); 988 Matcher m = matcher(input); 989 990 while(m.find()) { 992 if (!matchLimited || matchList.size() < limit - 1) { 993 String match = input.subSequence(index, m.start()).toString(); 994 matchList.add(match); 995 index = m.end(); 996 } else if (matchList.size() == limit - 1) { String match = input.subSequence(index, 998 input.length()).toString(); 999 matchList.add(match); 1000 index = m.end(); 1001 } 1002 } 1003 1004 if (index == 0) 1006 return new String [] {input.toString()}; 1007 1008 if (!matchLimited || matchList.size() < limit) 1010 matchList.add(input.subSequence(index, input.length()).toString()); 1011 1012 int resultSize = matchList.size(); 1014 if (limit == 0) 1015 while (resultSize > 0 && matchList.get(resultSize-1).equals("")) 1016 resultSize--; 1017 String [] result = new String [resultSize]; 1018 return (String [])matchList.subList(0, resultSize).toArray(result); 1019 } 1020 1021 1049 public String [] split(CharSequence input) { 1050 return split(input, 0); 1051 } 1052 1053 1067 public static String quote(String s) { 1068 int slashEIndex = s.indexOf("\\E"); 1069 if (slashEIndex == -1) 1070 return "\\Q" + s + "\\E"; 1071 1072 StringBuilder sb = new StringBuilder (s.length() * 2); 1073 sb.append("\\Q"); 1074 slashEIndex = 0; 1075 int current = 0; 1076 while ((slashEIndex = s.indexOf("\\E", current)) != -1) { 1077 sb.append(s.substring(current, slashEIndex)); 1078 current = slashEIndex + 2; 1079 sb.append("\\E\\\\E\\Q"); 1080 } 1081 sb.append(s.substring(current, s.length())); 1082 sb.append("\\E"); 1083 return sb.toString(); 1084 } 1085 1086 1090 private void readObject(java.io.ObjectInputStream s) 1091 throws java.io.IOException , ClassNotFoundException { 1092 1093 s.defaultReadObject(); 1095 1096 capturingGroupCount = 1; 1098 localCount = 0; 1099 1100 compiled = false; 1102 if (pattern.length() == 0) { 1103 root = new Start(lastAccept); 1104 matchRoot = lastAccept; 1105 compiled = true; 1106 } 1107 } 1108 1109 1115 private Pattern(String p, int f) { 1116 pattern = p; 1117 flags = f; 1118 1119 capturingGroupCount = 1; 1121 localCount = 0; 1122 1123 if (pattern.length() > 0) { 1124 compile(); 1125 } else { 1126 root = new Start(lastAccept); 1127 matchRoot = lastAccept; 1128 } 1129 } 1130 1131 1135 private void normalize() { 1136 boolean inCharClass = false; 1137 int lastCodePoint = -1; 1138 1139 normalizedPattern = Normalizer.decompose(pattern, false, 0); 1141 patternLength = normalizedPattern.length(); 1142 1143 StringBuilder newPattern = new StringBuilder (patternLength); 1145 for(int i=0; i<patternLength; ) { 1146 int c = normalizedPattern.codePointAt(i); 1147 StringBuilder sequenceBuffer; 1148 if ((Character.getType(c) == Character.NON_SPACING_MARK) 1149 && (lastCodePoint != -1)) { 1150 sequenceBuffer = new StringBuilder (); 1151 sequenceBuffer.appendCodePoint(lastCodePoint); 1152 sequenceBuffer.appendCodePoint(c); 1153 while(Character.getType(c) == Character.NON_SPACING_MARK) { 1154 i += Character.charCount(c); 1155 if (i >= patternLength) 1156 break; 1157 c = normalizedPattern.codePointAt(i); 1158 sequenceBuffer.appendCodePoint(c); 1159 } 1160 String ea = produceEquivalentAlternation( 1161 sequenceBuffer.toString()); 1162 newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint)); 1163 newPattern.append("(?:").append(ea).append(")"); 1164 } else if (c == '[' && lastCodePoint != '\\') { 1165 i = normalizeCharClass(newPattern, i); 1166 } else { 1167 newPattern.appendCodePoint(c); 1168 } 1169 lastCodePoint = c; 1170 i += Character.charCount(c); 1171 } 1172 normalizedPattern = newPattern.toString(); 1173 } 1174 1175 1180 private int normalizeCharClass(StringBuilder newPattern, int i) { 1181 StringBuilder charClass = new StringBuilder (); 1182 StringBuilder eq = null; 1183 int lastCodePoint = -1; 1184 String result; 1185 1186 i++; 1187 charClass.append("["); 1188 while(true) { 1189 int c = normalizedPattern.codePointAt(i); 1190 StringBuilder sequenceBuffer; 1191 1192 if (c == ']' && lastCodePoint != '\\') { 1193 charClass.append((char)c); 1194 break; 1195 } else if (Character.getType(c) == Character.NON_SPACING_MARK) { 1196 sequenceBuffer = new StringBuilder (); 1197 sequenceBuffer.appendCodePoint(lastCodePoint); 1198 while(Character.getType(c) == Character.NON_SPACING_MARK) { 1199 sequenceBuffer.appendCodePoint(c); 1200 i += Character.charCount(c); 1201 if (i >= normalizedPattern.length()) 1202 break; 1203 c = normalizedPattern.codePointAt(i); 1204 } 1205 String ea = produceEquivalentAlternation( 1206 sequenceBuffer.toString()); 1207 1208 charClass.setLength(charClass.length()-Character.charCount(lastCodePoint)); 1209 if (eq == null) 1210 eq = new StringBuilder (); 1211 eq.append('|'); 1212 eq.append(ea); 1213 } else { 1214 charClass.appendCodePoint(c); 1215 i++; 1216 } 1217 if (i == normalizedPattern.length()) 1218 error("Unclosed character class"); 1219 lastCodePoint = c; 1220 } 1221 1222 if (eq != null) { 1223 result = new String ("(?:"+charClass.toString()+ 1224 eq.toString()+")"); 1225 } else { 1226 result = charClass.toString(); 1227 } 1228 1229 newPattern.append(result); 1230 return i; 1231 } 1232 1233 1238 private String produceEquivalentAlternation(String source) { 1239 int len = countChars(source, 0, 1); 1240 if (source.length() == len) 1241 return new String (source); 1243 1244 String base = source.substring(0,len); 1245 String combiningMarks = source.substring(len); 1246 1247 String [] perms = producePermutations(combiningMarks); 1248 StringBuilder result = new StringBuilder (source); 1249 1250 for(int x=0; x<perms.length; x++) { 1252 String next = base + perms[x]; 1253 if (x>0) 1254 result.append("|"+next); 1255 next = composeOneStep(next); 1256 if (next != null) 1257 result.append("|"+produceEquivalentAlternation(next)); 1258 } 1259 return result.toString(); 1260 } 1261 1262 1271 private String [] producePermutations(String input) { 1272 if (input.length() == countChars(input, 0, 1)) 1273 return new String [] {input}; 1274 1275 if (input.length() == countChars(input, 0, 2)) { 1276 int c0 = Character.codePointAt(input, 0); 1277 int c1 = Character.codePointAt(input, Character.charCount(c0)); 1278 if (getClass(c1) == getClass(c0)) { 1279 return new String [] {input}; 1280 } 1281 String [] result = new String [2]; 1282 result[0] = input; 1283 StringBuilder sb = new StringBuilder (2); 1284 sb.appendCodePoint(c1); 1285 sb.appendCodePoint(c0); 1286 result[1] = sb.toString(); 1287 return result; 1288 } 1289 1290 int length = 1; 1291 int nCodePoints = countCodePoints(input); 1292 for(int x=1; x<nCodePoints; x++) 1293 length = length * (x+1); 1294 1295 String [] temp = new String [length]; 1296 1297 int combClass[] = new int[nCodePoints]; 1298 for(int x=0, i=0; x<nCodePoints; x++) { 1299 int c = Character.codePointAt(input, i); 1300 combClass[x] = getClass(c); 1301 i += Character.charCount(c); 1302 } 1303 1304 int index = 0; 1307 int len; 1308 loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) { 1310 len = countChars(input, offset, 1); 1311 boolean skip = false; 1312 for(int y=x-1; y>=0; y--) { 1313 if (combClass[y] == combClass[x]) { 1314 continue loop; 1315 } 1316 } 1317 StringBuilder sb = new StringBuilder (input); 1318 String otherChars = sb.delete(offset, offset+len).toString(); 1319 String [] subResult = producePermutations(otherChars); 1320 1321 String prefix = input.substring(offset, offset+len); 1322 for(int y=0; y<subResult.length; y++) 1323 temp[index++] = prefix + subResult[y]; 1324 } 1325 String [] result = new String [index]; 1326 for (int x=0; x<index; x++) 1327 result[x] = temp[x]; 1328 return result; 1329 } 1330 1331 private int getClass(int c) { 1332 return Normalizer.getClass(c); 1333 } 1334 1335 1342 private String composeOneStep(String input) { 1343 int len = countChars(input, 0, 2); 1344 String firstTwoCharacters = input.substring(0, len); 1345 String result = Normalizer.compose(firstTwoCharacters, false, 0); 1346 1347 if (result.equals(firstTwoCharacters)) 1348 return null; 1349 else { 1350 String remainder = input.substring(len); 1351 return result + remainder; 1352 } 1353 } 1354 1355 1359 private void compile() { 1360 if (has(CANON_EQ) && !has(LITERAL)) { 1362 normalize(); 1363 } else { 1364 normalizedPattern = pattern; 1365 } 1366 1367 patternLength = normalizedPattern.length(); 1369 temp = new int[patternLength + 2]; 1370 1371 boolean hasSupplementary = false; 1372 int c, count = 0; 1374 for (int x = 0; x < patternLength; x += Character.charCount(c)) { 1376 c = normalizedPattern.codePointAt(x); 1377 if (isSupplementary(c)) { 1378 hasSupplementary = true; 1379 } 1380 temp[count++] = c; 1381 } 1382 1383 patternLength = count; temp[patternLength] = 0; 1385 temp[patternLength + 1] = 0; 1386 1387 buffer = new int[32]; 1389 groupNodes = new GroupHead[10]; 1390 1391 if (has(LITERAL)) { 1392 matchRoot = newSlice(temp, patternLength, hasSupplementary); 1394 matchRoot.next = lastAccept; 1395 } else { 1396 matchRoot = expr(lastAccept); 1398 if (patternLength != cursor) { 1400 if (peek() == ')') { 1401 error("Unmatched closing ')'"); 1402 } else { 1403 error("Unexpected internal error"); 1404 } 1405 } 1406 } 1407 1408 if (matchRoot instanceof Slice) { 1410 root = BnM.optimize(matchRoot); 1411 if (root == matchRoot) { 1412 root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot); 1413 } 1414 } else if (matchRoot instanceof Begin || matchRoot instanceof First) { 1415 root = matchRoot; 1416 } else { 1417 root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot); 1418 } 1419 1420 temp = null; 1422 buffer = null; 1423 groupNodes = null; 1424 patternLength = 0; 1425 compiled = true; 1426 } 1427 1428 1431 private static void printObjectTree(Node node) { 1432 while(node != null) { 1433 if (node instanceof Prolog) { 1434 System.out.println(node); 1435 printObjectTree(((Prolog)node).loop); 1436 System.out.println("**** end contents prolog loop"); 1437 } else if (node instanceof Loop) { 1438 System.out.println(node); 1439 printObjectTree(((Loop)node).body); 1440 System.out.println("**** end contents Loop body"); 1441 } else if (node instanceof Curly) { 1442 System.out.println(node); 1443 printObjectTree(((Curly)node).atom); 1444 System.out.println("**** end contents Curly body"); 1445 } else if (node instanceof GroupCurly) { 1446 System.out.println(node); 1447 printObjectTree(((GroupCurly)node).atom); 1448 System.out.println("**** end contents GroupCurly body"); 1449 } else if (node instanceof GroupTail) { 1450 System.out.println(node); 1451 System.out.println("Tail next is "+node.next); 1452 return; 1453 } else { 1454 System.out.println(node); 1455 } 1456 node = node.next; 1457 if (node != null) 1458 System.out.println("->next:"); 1459 if (node == Pattern.accept) { 1460 System.out.println("Accept Node"); 1461 node = null; 1462 } 1463 } 1464 } 1465 1466 1470 static final class TreeInfo { 1471 int minLength; 1472 int maxLength; 1473 boolean maxValid; 1474 boolean deterministic; 1475 1476 TreeInfo() { 1477 reset(); 1478 } 1479 void reset() { 1480 minLength = 0; 1481 maxLength = 0; 1482 maxValid = true; 1483 deterministic = true; 1484 } 1485 } 1486 1487 1492 1493 1496 private boolean has(int f) { 1497 return (flags & f) > 0; 1498 } 1499 1500 1503 private void accept(int ch, String s) { 1504 int testChar = temp[cursor++]; 1505 if (has(COMMENTS)) 1506 testChar = parsePastWhitespace(testChar); 1507 if (ch != testChar) { 1508 error(s); 1509 } 1510 } 1511 1512 1515 private void mark(int c) { 1516 temp[patternLength] = c; 1517 } 1518 1519 1522 private int peek() { 1523 int ch = temp[cursor]; 1524 if (has(COMMENTS)) 1525 ch = peekPastWhitespace(ch); 1526 return ch; 1527 } 1528 1529 1532 private int read() { 1533 int ch = temp[cursor++]; 1534 if (has(COMMENTS)) 1535 ch = parsePastWhitespace(ch); 1536 return ch; 1537 } 1538 1539 1543 private int readEscaped() { 1544 int ch = temp[cursor++]; 1545 return ch; 1546 } 1547 1548 1551 private int next() { 1552 int ch = temp[++cursor]; 1553 if (has(COMMENTS)) 1554 ch = peekPastWhitespace(ch); 1555 return ch; 1556 } 1557 1558 1562 private int nextEscaped() { 1563 int ch = temp[++cursor]; 1564 return ch; 1565 } 1566 1567 1570 private int peekPastWhitespace(int ch) { 1571 while (ASCII.isSpace(ch) || ch == '#') { 1572 while (ASCII.isSpace(ch)) 1573 ch = temp[++cursor]; 1574 if (ch == '#') { 1575 ch = peekPastLine(); 1576 } 1577 } 1578 return ch; 1579 } 1580 1581 1584 private int parsePastWhitespace(int ch) { 1585 while (ASCII.isSpace(ch) || ch == '#') { 1586 while (ASCII.isSpace(ch)) 1587 ch = temp[cursor++]; 1588 if (ch == '#') 1589 ch = parsePastLine(); 1590 } 1591 return ch; 1592 } 1593 1594 1597 private int parsePastLine() { 1598 int ch = temp[cursor++]; 1599 while (ch != 0 && !isLineSeparator(ch)) 1600 ch = temp[cursor++]; 1601 return ch; 1602 } 1603 1604 1607 private int peekPastLine() { 1608 int ch = temp[++cursor]; 1609 while (ch != 0 && !isLineSeparator(ch)) 1610 ch = temp[++cursor]; 1611 return ch; 1612 } 1613 1614 1617 private boolean isLineSeparator(int ch) { 1618 if (has(UNIX_LINES)) { 1619 return ch == '\n'; 1620 } else { 1621 return (ch == '\n' || 1622 ch == '\r' || 1623 (ch|1) == '\u2029' || 1624 ch == '\u0085'); 1625 } 1626 } 1627 1628 1631 private int skip() { 1632 int i = cursor; 1633 int ch = temp[i+1]; 1634 cursor = i + 2; 1635 return ch; 1636 } 1637 1638 1641 private void unread() { 1642 cursor--; 1643 } 1644 1645 1649 private Node error(String s) { 1650 throw new PatternSyntaxException (s, normalizedPattern, 1651 cursor - 1); 1652 } 1653 1654 1658 private boolean findSupplementary(int start, int end) { 1659 for (int i = start; i < end; i++) { 1660 if (isSupplementary(temp[i])) 1661 return true; 1662 } 1663 return false; 1664 } 1665 1666 1670 private static final boolean isSupplementary(int ch) { 1671 return ch >= Character.MIN_SUPPLEMENTARY_CODE_POINT || isSurrogate(ch); 1672 } 1673 1674 1678 1679 1684 private Node expr(Node end) { 1685 Node prev = null; 1686 for (;;) { 1687 Node node = sequence(end); 1688 if (prev == null) { 1689 prev = node; 1690 } else { 1691 prev = new Branch(prev, node); 1692 } 1693 if (peek() != '|') { 1694 return prev; 1695 } 1696 next(); 1697 } 1698 } 1699 1700 1703 private Node sequence(Node end) { 1704 Node head = null; 1705 Node tail = null; 1706 Node node = null; 1707 int i, j, ch; 1708 LOOP: 1709 for (;;) { 1710 ch = peek(); 1711 switch (ch) { 1712 case '(': 1713 node = group0(); 1716 if (node == null) 1718 continue; 1719 if (head == null) 1720 head = node; 1721 else 1722 tail.next = node; 1723 tail = root; 1725 continue; 1726 case '[': 1727 node = clazz(true); 1728 break; 1729 case '\\': 1730 ch = nextEscaped(); 1731 if (ch == 'p' || ch == 'P') { 1732 boolean comp = (ch == 'P'); 1733 boolean oneLetter = true; 1734 ch = next(); if (ch != '{') { 1736 unread(); 1737 } else { 1738 oneLetter = false; 1739 } 1740 node = family(comp, oneLetter); 1741 } else { 1742 unread(); 1743 node = atom(); 1744 } 1745 break; 1746 case '^': 1747 next(); 1748 if (has(MULTILINE)) { 1749 if (has(UNIX_LINES)) 1750 node = new UnixCaret(); 1751 else 1752 node = new Caret(); 1753 } else { 1754 node = new Begin(); 1755 } 1756 break; 1757 case '$': 1758 next(); 1759 if (has(UNIX_LINES)) 1760 node = new UnixDollar(has(MULTILINE)); 1761 else 1762 node = new Dollar(has(MULTILINE)); 1763 break; 1764 case '.': 1765 next(); 1766 if (has(DOTALL)) { 1767 node = new All(); 1768 } else { 1769 if (has(UNIX_LINES)) 1770 node = new UnixDot(); 1771 else { 1772 node = new Dot(); 1773 } 1774 } 1775 break; 1776 case '|': 1777 case ')': 1778 break LOOP; 1779 case ']': case '}': 1781 node = atom(); 1782 break; 1783 case '?': 1784 case '*': 1785 case '+': 1786 next(); 1787 return error("Dangling meta character '" + ((char)ch) + "'"); 1788 case 0: 1789 if (cursor >= patternLength) { 1790 break LOOP; 1791 } 1792 default: 1794 node = atom(); 1795 break; 1796 } 1797 1798 node = closure(node); 1799 1800 if (head == null) { 1801 head = tail = node; 1802 } else { 1803 tail.next = node; 1804 tail = node; 1805 } 1806 } 1807 if (head == null) { 1808 return end; 1809 } 1810 tail.next = end; 1811 return head; 1812 } 1813 1814 1817 private Node atom() { 1818 int first = 0; 1819 int prev = -1; 1820 boolean hasSupplementary = false; 1821 int ch = peek(); 1822 for (;;) { 1823 switch (ch) { 1824 case '*': 1825 case '+': 1826 case '?': 1827 case '{': 1828 if (first > 1) { 1829 cursor = prev; first--; 1831 } 1832 break; 1833 case '$': 1834 case '.': 1835 case '^': 1836 case '(': 1837 case '[': 1838 case '|': 1839 case ')': 1840 break; 1841 case '\\': 1842 ch = nextEscaped(); 1843 if (ch == 'p' || ch == 'P') { if (first > 0) { unread(); 1846 break; 1847 } else { if (ch == 'p' || ch == 'P') { 1849 boolean comp = (ch == 'P'); 1850 boolean oneLetter = true; 1851 ch = next(); if (ch != '{') 1853 unread(); 1854 else 1855 oneLetter = false; 1856 return family(comp, oneLetter); 1857 } 1858 } 1859 break; 1860 } 1861 unread(); 1862 prev = cursor; 1863 ch = escape(false, first == 0); 1864 if (ch >= 0) { 1865 append(ch, first); 1866 first++; 1867 if (isSupplementary(ch)) { 1868 hasSupplementary = true; 1869 } 1870 ch = peek(); 1871 continue; 1872 } else if (first == 0) { 1873 return root; 1874 } 1875 cursor = prev; 1877 break; 1878 case 0: 1879 if (cursor >= patternLength) { 1880 break; 1881 } 1882 default: 1884 prev = cursor; 1885 append(ch, first); 1886 first++; 1887 if (isSupplementary(ch)) { 1888 hasSupplementary = true; 1889 } 1890 ch = next(); 1891 continue; 1892 } 1893 break; 1894 } 1895 if (first == 1) { 1896 return newSingle(buffer[0]); 1897 } else { 1898 return newSlice(buffer, first, hasSupplementary); 1899 } 1900 } 1901 1902 private void append(int ch, int len) { 1903 if (len >= buffer.length) { 1904 int[] tmp = new int[len+len]; 1905 System.arraycopy(buffer, 0, tmp, 0, len); 1906 buffer = tmp; 1907 } 1908 buffer[len] = ch; 1909 } 1910 1911 1917 private Node ref(int refNum) { 1918 boolean done = false; 1919 while(!done) { 1920 int ch = peek(); 1921 switch(ch) { 1922 case '0': 1923 case '1': 1924 case '2': 1925 case '3': 1926 case '4': 1927 case '5': 1928 case '6': 1929 case '7': 1930 case '8': 1931 case '9': 1932 int newRefNum = (refNum * 10) + (ch - '0'); 1933 if (capturingGroupCount - 1 < newRefNum) { 1936 done = true; 1937 break; 1938 } 1939 refNum = newRefNum; 1940 read(); 1941 break; 1942 default: 1943 done = true; 1944 break; 1945 } 1946 } 1947 if (has(CASE_INSENSITIVE) || has(UNICODE_CASE)) 1948 return new CIBackRef(refNum); 1949 else 1950 return new BackRef(refNum); 1951 } 1952 1953 1961 private int escape(boolean inclass, boolean create) { 1962 int ch = skip(); 1963 switch (ch) { 1964 case '0': 1965 return o(); 1966 case '1': 1967 case '2': 1968 case '3': 1969 case '4': 1970 case '5': 1971 case '6': 1972 case '7': 1973 case '8': 1974 case '9': 1975 if (inclass) break; 1976 if (capturingGroupCount < (ch - '0')) 1977 error("No such group yet exists at this point in the pattern"); 1978 if (create) { 1979 root = ref((ch - '0')); 1980 } 1981 return -1; 1982 case 'A': 1983 if (inclass) break; 1984 if (create) root = new Begin(); 1985 return -1; 1986 case 'B': 1987 if (inclass) break; 1988 if (create) root = new Bound(Bound.NONE); 1989 return -1; 1990 case 'C': 1991 break; 1992 case 'D': 1993 if (create) root = new NotCtype(ASCII.DIGIT); 1994 return -1; 1995 case 'E': 1996 case 'F': 1997 break; 1998 case 'G': 1999 if (inclass) break; 2000 if (create) root = new LastMatch(); 2001 return -1; 2002 case 'H': 2003 case 'I': 2004 case 'J': 2005 case 'K': 2006 case 'L': 2007 case 'M': 2008 case 'N': 2009 case 'O': 2010 case 'P': 2011 break; 2012 case 'Q': 2013 if (create) { 2014 int i = cursor; 2017 int c; 2018 while ((c = readEscaped()) != 0) { 2019 if (c == '\\') { 2020 c = readEscaped(); 2021 if (c == 'E' || c == 0) 2022 break; 2023 else 2024 unread(); 2025 } 2026 } 2027 int j = cursor-1; 2028 if (c == 'E') 2029 j--; 2030 else 2031 unread(); 2032 boolean hasSupplementary = false; 2033 for (int x = i; x<j; x++) { 2034 c = temp[x]; 2035 append(c, x-i); 2036 if (isSupplementary(c)) { 2037 hasSupplementary = true; 2038 } 2039 } 2040 root = newSlice(buffer, j-i, hasSupplementary); 2041 } 2042 return -1; 2043 case 'R': 2044 break; 2045 case 'S': 2046 if (create) root = new NotCtype(ASCII.SPACE); 2047 return -1; 2048 case 'T': 2049 case 'U': 2050 case 'V': 2051 break; 2052 case 'W': 2053 if (create) root = new NotCtype(ASCII.WORD); 2054 return -1; 2055 case 'X': 2056 case 'Y': 2057 break; 2058 case 'Z': 2059 if (inclass) break; 2060 if (create) { 2061 if (has(UNIX_LINES)) 2062 root = new UnixDollar(false); 2063 else 2064 root = new Dollar(false); 2065 } 2066 return -1; 2067 case 'a': 2068 return '\007'; 2069 case 'b': 2070 if (inclass) break; 2071 if (create) root = new Bound(Bound.BOTH); 2072 return -1; 2073 case 'c': 2074 return c(); 2075 case 'd': 2076 if (create) root = new Ctype(ASCII.DIGIT); 2077 return -1; 2078 case 'e': 2079 return '\033'; 2080 case 'f': 2081 return '\f'; 2082 case 'g': 2083 case 'h': 2084 case 'i': 2085 case 'j': 2086 case 'k': 2087 case 'l': 2088 case 'm': 2089 break; 2090 case 'n': 2091 return '\n'; 2092 case 'o': 2093 case 'p': 2094 case 'q': 2095 break; 2096 case 'r': 2097 return '\r'; 2098 case 's': 2099 if (create) root = new Ctype(ASCII.SPACE); 2100 return -1; 2101 case 't': 2102 return '\t'; 2103 case 'u': 2104 return u(); 2105 case 'v': 2106 return '\013'; 2107 case 'w': 2108 if (create) root = new Ctype(ASCII.WORD); 2109 return -1; 2110 case 'x': 2111 return x(); 2112 case 'y': 2113 break; 2114 case 'z': 2115 if (inclass) break; 2116 if (create) root = new End(); 2117 return -1; 2118 default: 2119 return ch; 2120 } 2121 error("Illegal/unsupported escape squence"); 2122 return -2; 2123 } 2124 2125 2132 private Node clazz(boolean consume) { 2133 Node prev = null; 2134 Node node = null; 2135 BitClass bits = new BitClass(false); 2136 boolean include = true; 2137 boolean firstInClass = true; 2138 int ch = next(); 2139 for (;;) { 2140 switch (ch) { 2141 case '^': 2142 if (firstInClass) { 2144 if (temp[cursor-1] != '[') 2145 break; 2146 ch = next(); 2147 include = !include; 2148 continue; 2149 } else { 2150 break; 2152 } 2153 case '[': 2154 firstInClass = false; 2155 node = clazz(true); 2156 if (prev == null) 2157 prev = node; 2158 else 2159 prev = new Add(prev, node); 2160 ch = peek(); 2161 continue; 2162 case '&': 2163 firstInClass = false; 2164 ch = next(); 2165 if (ch == '&') { 2166 ch = next(); 2167 Node rightNode = null; 2168 while (ch != ']' && ch != '&') { 2169 if (ch == '[') { 2170 if (rightNode == null) 2171 rightNode = clazz(true); 2172 else 2173 rightNode = new Add(rightNode, clazz(true)); 2174 } else { unread(); 2176 rightNode = clazz(false); 2177 } 2178 ch = peek(); 2179 } 2180 if (rightNode != null) 2181 node = rightNode; 2182 if (prev == null) { 2183 if (rightNode == null) 2184 return error("Bad class syntax"); 2185 else 2186 prev = rightNode; 2187 } else { 2188 prev = new Both(prev, node); 2189 } 2190 } else { 2191 unread(); 2193 break; 2194 } 2195 continue; 2196 case 0: 2197 firstInClass = false; 2198 if (cursor >= patternLength) 2199 return error("Unclosed character class"); 2200 break; 2201 case ']': 2202 firstInClass = false; 2203 if (prev != null) { 2204 if (consume) 2205 next(); 2206 return prev; 2207 } 2208 break; 2209 default: 2210 firstInClass = false; 2211 break; 2212 } 2213 node = range(bits); 2214 if (include) { 2215 if (prev == null) { 2216 prev = node; 2217 } else { 2218 if (prev != node) 2219 prev = new Add(prev, node); 2220 } 2221 } else { 2222 if (prev == null) { 2223 prev = node.dup(true); } else { 2225 if (prev != node) 2226 prev = new Sub(prev, node); 2227 } 2228 } 2229 ch = peek(); 2230 } 2231 } 2232 2233 2237 private Node range(BitClass bits) { 2238 int ch = peek(); 2239 if (ch == '\\') { 2240 ch = nextEscaped(); 2241 if (ch == 'p' || ch == 'P') { boolean comp = (ch == 'P'); 2243 boolean oneLetter = true; 2244 ch = next(); 2246 if (ch != '{') 2247 unread(); 2248 else 2249 oneLetter = false; 2250 return family(comp, oneLetter); 2251 } else { unread(); 2253 ch = escape(true, true); 2254 if (ch == -1) 2255 return root; 2256 } 2257 } else { 2258 ch = single(); 2259 } 2260 if (ch >= 0) { 2261 if (peek() == '-') { 2262 int endRange = temp[cursor+1]; 2263 if (endRange == '[') { 2264 if (ch < 256) 2265 return bits.add(ch, flags()); 2266 return newSingle(ch); 2267 } 2268 if (endRange != ']') { 2269 next(); 2270 int m = single(); 2271 if (m < ch) 2272 return error("Illegal character range"); 2273 if (has(CASE_INSENSITIVE) || has(UNICODE_CASE)) 2274 return new CIRange(ch, m); 2275 else 2276 return new Range(ch, m); 2277 } 2278 } 2279 if (ch < 256) 2280 return bits.add(ch, flags()); 2281 return newSingle(ch); 2282 } 2283 return error("Unexpected character '"+((char)ch)+"'"); 2284 } 2285 2286 private int single() { 2287 int ch = peek(); 2288 switch (ch) { 2289 case '\\': 2290 return escape(true, false); 2291 default: 2292 next(); 2293 return ch; 2294 } 2295 } 2296 2297 2302 private Node family(boolean not, boolean singleLetter) { 2303 next(); 2304 String name; 2305 2306 if (singleLetter) { 2307 int c = temp[cursor]; 2308 if (!Character.isSupplementaryCodePoint(c)) { 2309 name = String.valueOf((char)c); 2310 } else { 2311 name = new String (temp, cursor, 1); 2312 } 2313 name = name.intern(); 2314 read(); 2315 } else { 2316 int i = cursor; 2317 mark('}'); 2318 while(read() != '}') { 2319 } 2320 mark('\000'); 2321 int j = cursor; 2322 if (j > patternLength) 2323 return error("Unclosed character family"); 2324 if (i + 1 >= j) 2325 return error("Empty character family"); 2326 name = new String (temp, i, j-i-1).intern(); 2327 } 2328 2329 if (name.startsWith("In")) { 2330 name = name.substring(2, name.length()).intern(); 2331 return retrieveFamilyNode(name, not); 2332 } 2333 if (name.startsWith("Is")) 2334 name = name.substring(2, name.length()).intern(); 2335 return retrieveCategoryNode(name).dup(not); 2336 } 2337 2338 private Node retrieveFamilyNode(String name, boolean not) { 2339 if (name == null) { 2340 return familyError("", "Null character family."); 2341 } 2342 Node n = null; 2343 try { 2344 Character.UnicodeBlock block = Character.UnicodeBlock.forName(name); 2345 n = new UBlock(block, not); 2346 } catch (IllegalArgumentException iae) { 2347 return familyError(name, "Unknown character family {"); 2348 } 2349 return n; 2350 } 2351 2352 private Node retrieveCategoryNode(String name) { 2353 Node n = (Node)categoryNames.cMap.get(name); 2354 if (n != null) 2355 return n; 2356 2357 return familyError(name, "Unknown character category {"); 2358 } 2359 2360 private Node familyError(String name, String type) { 2361 StringBuilder sb = new StringBuilder (); 2362 sb.append(type); 2363 sb.append(name); 2364 sb.append("}"); 2365 name = sb.toString(); 2366 return error(name); 2367 } 2368 2369 2374 private Node group0() { 2375 boolean capturingGroup = false; 2376 Node head = null; 2377 Node tail = null; 2378 int save = flags; 2379 root = null; 2380 int ch = next(); 2381 if (ch == '?') { 2382 ch = skip(); 2383 switch (ch) { 2384 case ':': head = createGroup(true); 2386 tail = root; 2387 head.next = expr(tail); 2388 break; 2389 case '=': case '!': 2391 head = createGroup(true); 2392 tail = root; 2393 head.next = expr(tail); 2394 if (ch == '=') { 2395 head = tail = new Pos(head); 2396 } else { 2397 head = tail = new Neg(head); 2398 } 2399 break; 2400 case '>': head = createGroup(true); 2402 tail = root; 2403 head.next = expr(tail); 2404 head = tail = new Ques(head, INDEPENDENT); 2405 break; 2406 case '<': ch = read(); 2408 int start = cursor; 2409 head = createGroup(true); 2410 tail = root; 2411 head.next = expr(tail); 2412 TreeInfo info = new TreeInfo(); 2413 head.study(info); 2414 if (info.maxValid == false) { 2415 return error("Look-behind group does not have " 2416 + "an obvious maximum length"); 2417 } 2418 boolean hasSupplementary = findSupplementary(start, patternLength); 2419 if (ch == '=') { 2420 head = tail = (hasSupplementary ? 2421 new BehindS(head, info.maxLength, 2422 info.minLength) : 2423 new Behind(head, info.maxLength, 2424 info.minLength)); 2425 } else if (ch == '!') { 2426 head = tail = (hasSupplementary ? 2427 new NotBehindS(head, info.maxLength, 2428 info.minLength) : 2429 new NotBehind(head, info.maxLength, 2430 info.minLength)); 2431 } else { 2432 error("Unknown look-behind group"); 2433 } 2434 break; 2435 case '$': 2436 case '@': 2437 return error("Unknown group type"); 2438 default: unread(); 2440 addFlag(); 2441 ch = read(); 2442 if (ch == ')') { 2443 return null; } 2445 if (ch != ':') { 2446 return error("Unknown inline modifier"); 2447 } 2448 head = createGroup(true); 2449 tail = root; 2450 head.next = expr(tail); 2451 break; 2452 } 2453 } else { capturingGroup = true; 2455 head = createGroup(false); 2456 tail = root; 2457 head.next = expr(tail); 2458 } 2459 2460 accept(')', "Unclosed group"); 2461 flags = save; 2462 2463 Node node = closure(head); 2465 if (node == head) { root = tail; 2467 return node; } 2469 if (head == tail) { root = node; 2471 return node; } 2473 2474 if (node instanceof Ques) { 2475 Ques ques = (Ques) node; 2476 if (ques.type == POSSESSIVE) { 2477 root = node; 2478 return node; 2479 } 2480 tail.next = new Dummy(); 2482 tail = tail.next; 2483 if (ques.type == GREEDY) { 2484 head = new Branch(head, tail); 2485 } else { head = new Branch(tail, head); 2487 } 2488 root = tail; 2489 return head; 2490 } else if (node instanceof Curly) { 2491 Curly curly = (Curly) node; 2492 if (curly.type == POSSESSIVE) { 2493 root = node; 2494 return node; 2495 } 2496 TreeInfo info = new TreeInfo(); 2498 if (head.study(info)) { GroupTail temp = (GroupTail) tail; 2500 head = root = new GroupCurly(head.next, curly.cmin, 2501 curly.cmax, curly.type, 2502 ((GroupTail)tail).localIndex, 2503 ((GroupTail)tail).groupIndex, 2504 capturingGroup); 2505 return head; 2506 } else { int temp = ((GroupHead) head).localIndex; 2508 Loop loop; 2509 if (curly.type == GREEDY) 2510 loop = new Loop(this.localCount, temp); 2511 else loop = new LazyLoop(this.localCount, temp); 2513 Prolog prolog = new Prolog(loop); 2514 this.localCount += 1; 2515 loop.cmin = curly.cmin; 2516 loop.cmax = curly.cmax; 2517 loop.body = head; 2518 tail.next = loop; 2519 root = loop; 2520 return prolog; } 2522 } else if (node instanceof First) { 2523 root = node; 2524 return node; 2525 } 2526 return error("Internal logic error"); 2527 } 2528 2529 2534 private Node createGroup(boolean anonymous) { 2535 int localIndex = localCount++; 2536 int groupIndex = 0; 2537 if (!anonymous) 2538 groupIndex = capturingGroupCount++; 2539 GroupHead head = new GroupHead(localIndex); 2540 root = new GroupTail(localIndex, groupIndex); 2541 if (!anonymous && groupIndex < 10) 2542 groupNodes[groupIndex] = head; 2543 return head; 2544 } 2545 2546 2549 private void addFlag() { 2550 int ch = peek(); 2551 for (;;) { 2552 switch (ch) { 2553 case 'i': 2554 flags |= CASE_INSENSITIVE; 2555 break; 2556 case 'm': 2557 flags |= MULTILINE; 2558 break; 2559 case 's': 2560 flags |= DOTALL; 2561 break; 2562 case 'd': 2563 flags |= UNIX_LINES; 2564 break; 2565 case 'u': 2566 flags |= UNICODE_CASE; 2567 break; 2568 case 'c': 2569 flags |= CANON_EQ; 2570 break; 2571 case 'x': 2572 flags |= COMMENTS; 2573 break; 2574 case '-': ch = next(); 2576 subFlag(); 2577 default: 2578 return; 2579 } 2580 ch = next(); 2581 } 2582 } 2583 2584 2588 private void subFlag() { 2589 int ch = peek(); 2590 for (;;) { 2591 switch (ch) { 2592 case 'i': 2593 flags &= ~CASE_INSENSITIVE; 2594 break; 2595 case 'm': 2596 flags &= ~MULTILINE; 2597 break; 2598 case 's': 2599 flags &= ~DOTALL; 2600 break; 2601 case 'd': 2602 flags &= ~UNIX_LINES; 2603 break; 2604 case 'u': 2605 flags &= ~UNICODE_CASE; 2606 break; 2607 case 'c': 2608 flags &= ~CANON_EQ; 2609 break; 2610 case 'x': 2611 flags &= ~COMMENTS; 2612 break; 2613 default: 2614 return; 2615 } 2616 ch = next(); 2617 } 2618 } 2619 2620 static final int MAX_REPS = 0x7FFFFFFF; 2621 2622 static final int GREEDY = 0; 2623 2624 static final int LAZY = 1; 2625 2626 static final int POSSESSIVE = 2; 2627 2628 static final int INDEPENDENT = 3; 2629 2630 2635 private Node closure(Node prev) { 2636 Node atom; 2637 int ch = peek(); 2638 switch (ch) { 2639 case '?': 2640 ch = next(); 2641 if (ch == '?') { 2642 next(); 2643 return new Ques(prev, LAZY); 2644 } else if (ch == '+') { 2645 next(); 2646 return new Ques(prev, POSSESSIVE); 2647 } 2648 return new Ques(prev, GREEDY); 2649 case '*': 2650 ch = next(); 2651 if (ch == '?') { 2652 next(); 2653 return new Curly(prev, 0, MAX_REPS, LAZY); 2654 } else if (ch == '+') { 2655 next(); 2656 return new Curly(prev, 0, MAX_REPS, POSSESSIVE); 2657 } 2658 return new Curly(prev, 0, MAX_REPS, GREEDY); 2659 case '+': 2660 ch = next(); 2661 if (ch == '?') { 2662 next(); 2663 return new Curly(prev, 1, MAX_REPS, LAZY); 2664 } else if (ch == '+') { 2665 next(); 2666 return new Curly(prev, 1, MAX_REPS, POSSESSIVE); 2667 } 2668 return new Curly(prev, 1, MAX_REPS, GREEDY); 2669 case '{': 2670 ch = temp[cursor+1]; 2671 if (ASCII.isDigit(ch)) { 2672 skip(); 2673 int cmin = 0; 2674 do { 2675 cmin = cmin * 10 + (ch - '0'); 2676 } while (ASCII.isDigit(ch = read())); 2677 int cmax = cmin; 2678 if (ch == ',') { 2679 ch = read(); 2680 cmax = MAX_REPS; 2681 if (ch != '}') { 2682 cmax = 0; 2683 while (ASCII.isDigit(ch)) { 2684 cmax = cmax * 10 + (ch - '0'); 2685 ch = read(); 2686 } 2687 } 2688 } 2689 if (ch != '}') 2690 return error("Unclosed counted closure"); 2691 if (((cmin) | (cmax) | (cmax - cmin)) < 0) 2692 return error("Illegal repetition range"); 2693 Curly curly; 2694 ch = peek(); 2695 if (ch == '?') { 2696 next(); 2697 curly = new Curly(prev, cmin, cmax, LAZY); 2698 } else if (ch == '+') { 2699 next(); 2700 curly = new Curly(prev, cmin, cmax, POSSESSIVE); 2701 } else { 2702 curly = new Curly(prev, cmin, cmax, GREEDY); 2703 } 2704 return curly; 2705 } else { 2706 error("Illegal repetition"); 2707 } 2708 return prev; 2709 default: 2710 return prev; 2711 } 2712 } 2713 2714 2717 private int c() { 2718 if (cursor < patternLength) { 2719 return read() ^ 64; 2720 } 2721 error("Illegal control escape sequence"); 2722 return -1; 2723 } 2724 2725 2728 private int o() { 2729 int n = read(); 2730 if (((n-'0')|('7'-n)) >= 0) { 2731 int m = read(); 2732 if (((m-'0')|('7'-m)) >= 0) { 2733 int o = read(); 2734 if ((((o-'0')|('7'-o)) >= 0) && (((n-'0')|('3'-n)) >= 0)) { 2735 return (n - '0') * 64 + (m - '0') * 8 + (o - '0'); 2736 } 2737 unread(); 2738 return (n - '0') * 8 + (m - '0'); 2739 } 2740 unread(); 2741 return (n - '0'); 2742 } 2743 error("Illegal octal escape sequence"); 2744 return -1; 2745 } 2746 2747 2750 private int x() { 2751 int n = read(); 2752 if (ASCII.isHexDigit(n)) { 2753 int m = read(); 2754 if (ASCII.isHexDigit(m)) { 2755 return ASCII.toDigit(n) * 16 + ASCII.toDigit(m); 2756 } 2757 } 2758 error("Illegal hexadecimal escape sequence"); 2759 return -1; 2760 } 2761 2762 2765 private int u() { 2766 int n = 0; 2767 for (int i = 0; i < 4; i++) { 2768 int ch = read(); 2769 if (!ASCII.isHexDigit(ch)) { 2770 error("Illegal Unicode escape sequence"); 2771 } 2772 n = n * 16 + ASCII.toDigit(ch); 2773 } 2774 return n; 2775 } 2776 2777 2781 2784 private static final boolean isSurrogate(int c) { 2785 return c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_LOW_SURROGATE; 2786 } 2787 2788 private static final int countChars(CharSequence seq, int index, 2789 int lengthInCodePoints) { 2790 if (lengthInCodePoints == 1 && !Character.isHighSurrogate(seq.charAt(index))) { 2792 assert (index >= 0 && index < seq.length()); 2793 return 1; 2794 } 2795 int length = seq.length(); 2796 int x = index; 2797 if (lengthInCodePoints >= 0) { 2798 assert (index >= 0 && index < length); 2799 for (int i = 0; x < length && i < lengthInCodePoints; i++) { 2800 if (Character.isHighSurrogate(seq.charAt(x++))) { 2801 if (x < length && Character.isLowSurrogate(seq.charAt(x))) { 2802 x++; 2803 } 2804 } 2805 } 2806 return x - index; 2807 } 2808 2809 assert (index >= 0 && index <= length); 2810 if (index == 0) { 2811 return 0; 2812 } 2813 int len = -lengthInCodePoints; 2814 for (int i = 0; x > 0 && i < len; i++) { 2815 if (Character.isLowSurrogate(seq.charAt(--x))) { 2816 if (x > 0 && Character.isHighSurrogate(seq.charAt(x-1))) { 2817 x--; 2818 } 2819 } 2820 } 2821 return index - x; 2822 } 2823 2824 private static final int countCodePoints(CharSequence seq) { 2825 int length = seq.length(); 2826 int n = 0; 2827 for (int i = 0; i < length; ) { 2828 n++; 2829 if (Character.isHighSurrogate(seq.charAt(i++))) { 2830 if (i < length && Character.isLowSurrogate(seq.charAt(i))) { 2831 i++; 2832 } 2833 } 2834 } 2835 return n; 2836 } 2837 2838 2843 static final class BitClass extends Node { 2844 boolean[] bits = new boolean[256]; 2845 boolean complementMe = false; 2846 BitClass(boolean not) { 2847 complementMe = not; 2848 } 2849 BitClass(boolean[] newBits, boolean not) { 2850 complementMe = not; 2851 bits = newBits; 2852 } 2853 Node add(int c, int f) { 2854 if ((f & CASE_INSENSITIVE) == 0) { 2855 bits[c] = true; 2856 } else if (ASCII.isAscii(c)) { 2857 bits[ASCII.toUpper(c)] = true; 2858 bits[ASCII.toLower(c)] = true; 2859 } else { 2860 bits[Character.toLowerCase((char)c)] = true; 2861 bits[Character.toUpperCase((char)c)] = true; 2862 } 2863 return this; 2864 } 2865 Node dup(boolean not) { 2866 return new BitClass(bits, not); 2867 } 2868 boolean match(Matcher matcher, int i, CharSequence seq) { 2869 if (i >= matcher.to) { 2870 matcher.hitEnd = true; 2871 return false; 2872 } 2873 int c = Character.codePointAt(seq, i); 2874 boolean charMatches = 2875 (c > 255) ? complementMe : (bits[c] ^ complementMe); 2876 return charMatches && next.match(matcher, i+Character.charCount(c), seq); 2877 } 2878 boolean study(TreeInfo info) { 2879 info.minLength++; 2880 info.maxLength++; 2881 return next.study(info); 2882 } 2883 } 2884 2885 2888 private Node newSingle(int ch) { 2889 int f = flags; 2890 if ((f & (CASE_INSENSITIVE|UNICODE_CASE)) == 0) { 2891 return new Single(ch); 2892 } 2893 if ((f & UNICODE_CASE) == 0) { 2894 return new SingleA(ch); 2895 } 2896 return new SingleU(ch); 2897 } 2898 2899 2902 private Node newSlice(int[] buf, int count, boolean hasSupplementary) { 2903 int[] tmp = new int[count]; 2904 int i = flags; 2905 if ((i & (CASE_INSENSITIVE|UNICODE_CASE)) == 0) { 2906 for (i = 0; i < count; i++) { 2907 tmp[i] = buf[i]; 2908 } 2909 return hasSupplementary ? new SliceS(tmp) : new Slice(tmp); 2910 } else if ((i & UNICODE_CASE) == 0) { 2911 for (i = 0; i < count; i++) { 2912 tmp[i] = (char)ASCII.toLower(buf[i]); 2913 } 2914 return new SliceA(tmp); 2915 } else { 2916 for (i = 0; i < count; i++) { 2917 int c = buf[i]; 2918 c = Character.toUpperCase(c); 2919 c = Character.toLowerCase(c); 2920 tmp[i] = c; 2921 } 2922 return new SliceU(tmp); 2923 } 2924 } 2925 2926 2933 2934 2939 static class Node extends Object { 2940 Node next; 2941 Node() { 2942 next = Pattern.accept; 2943 } 2944 Node dup(boolean not) { 2945 if (not) { 2946 return new Not(this); 2947 } else { 2948 throw new RuntimeException ("internal error in Node dup()"); 2949 } 2950 } 2951 2954 boolean match(Matcher matcher, int i, CharSequence seq) { 2955 matcher.last = i; 2956 matcher.groups[0] = matcher.first; 2957 matcher.groups[1] = matcher.last; 2958 return true; 2959 } 2960 2963 boolean study(TreeInfo info) { 2964 if (next != null) { 2965 return next.study(info); 2966 } else { 2967 return info.deterministic; 2968 } 2969 } 2970 } 2971 2972 static class LastNode extends Node { 2973 2978 boolean match(Matcher matcher, int i, CharSequence seq) { 2979 if (matcher.acceptMode == Matcher.ENDANCHOR && i != matcher.to) 2980 return false; 2981 matcher.last = i; 2982 matcher.groups[0] = matcher.first; 2983 matcher.groups[1] = matcher.last; 2984 return true; 2985 } 2986 } 2987 2988 2991 static class Dummy extends Node { 2992 boolean match(Matcher matcher, int i, CharSequence seq) { 2993 return next.match(matcher, i, seq); 2994 } 2995 } 2996 2997 3003 static class Start extends Node { 3004 int minLength; 3005 Start(Node node) { 3006 this.next = node; 3007 TreeInfo info = new TreeInfo(); 3008 next.study(info); 3009 minLength = info.minLength; 3010 } 3011 boolean match(Matcher matcher, int i, CharSequence seq) { 3012 if (i > matcher.to - minLength) { 3013 matcher.hitEnd = true; 3014 return false; 3015 } 3016 boolean ret = false; 3017 int guard = matcher.to - minLength; 3018 for (; i <= guard; i++) { 3019 if (ret = next.match(matcher, i, seq)) 3020 break; 3021 if (i == guard) 3022 matcher.hitEnd = true; 3023 } 3024 if (ret) { 3025 matcher.first = i; 3026 matcher.groups[0] = matcher.first; 3027 matcher.groups[1] = matcher.last; 3028 } 3029 return ret; 3030 } 3031 boolean study(TreeInfo info) { 3032 next.study(info); 3033 info.maxValid = false; 3034 info.deterministic = false; 3035 return false; 3036 } 3037 } 3038 3039 3042 static final class StartS extends Start { 3043 StartS(Node node) { 3044 super(node); 3045 } 3046 boolean match(Matcher matcher, int i, CharSequence seq) { 3047 if (i > matcher.to - minLength) { 3048 matcher.hitEnd = true; 3049 return false; 3050 } 3051 boolean ret = false; 3052 int guard = matcher.to - minLength; 3053 while (i <= guard) { 3054 if ((ret = next.match(matcher, i, seq)) || i == guard) 3055 break; 3056 if (Character.isHighSurrogate(seq.charAt(i++))) { 3059 if (i < seq.length() && Character.isLowSurrogate(seq.charAt(i))) { 3060 i++; 3061 } 3062 } 3063 if (i == guard) 3064 matcher.hitEnd = true; 3065 } 3066 if (ret) { 3067 matcher.first = i; 3068 matcher.groups[0] = matcher.first; 3069 matcher.groups[1] = matcher.last; 3070 } 3071 return ret; 3072 } 3073 } 3074 3075 3080 static final class Begin extends Node { 3081 boolean match(Matcher matcher, int i, CharSequence seq) { 3082 int fromIndex = (matcher.anchoringBounds) ? 3083 matcher.from : 0; 3084 if (i == fromIndex && next.match(matcher, i, seq)) { 3085 matcher.first = i; 3086 matcher.groups[0] = i; 3087 matcher.groups[1] = matcher.last; 3088 return true; 3089 } else { 3090 return false; 3091 } 3092 } 3093 } 3094 3095 3099 static final class End extends Node { 3100 boolean match(Matcher matcher, int i, CharSequence seq) { 3101 int endIndex = (matcher.anchoringBounds) ? 3102 matcher.to : matcher.getTextLength(); 3103 if (i == endIndex) { 3104 matcher.hitEnd = true; 3105 return next.match(matcher, i, seq); 3106 } 3107 return false; 3108 } 3109 } 3110 3111 3115 static final class Caret extends Node { 3116 boolean match(Matcher matcher, int i, CharSequence seq) { 3117 int startIndex = matcher.from; 3118 int endIndex = matcher.to; 3119 if (!matcher.anchoringBounds) { 3120 startIndex = 0; 3121 endIndex = matcher.getTextLength(); 3122 } 3123 if (i == endIndex) { 3125 matcher.hitEnd = true; 3126 return false; 3127 } 3128 if (i > startIndex) { 3129 char ch = seq.charAt(i-1); 3130 if (ch != '\n' && ch != '\r' 3131 && (ch|1) != '\u2029' 3132 && ch != '\u0085' ) { 3133 return false; 3134 } 3135 if (ch == '\r' && seq.charAt(i) == '\n') 3137 return false; 3138 } 3139 return next.match(matcher, i, seq); 3140 } 3141 } 3142 3143 3146 static final class UnixCaret extends Node { 3147 boolean match(Matcher matcher, int i, CharSequence seq) { 3148 int startIndex = matcher.from; 3149 int endIndex = matcher.to; 3150 if (!matcher.anchoringBounds) { 3151 startIndex = 0; 3152 endIndex = matcher.getTextLength(); 3153 } 3154 if (i == endIndex) { 3156 matcher.hitEnd = true; 3157 return false; 3158 } 3159 if (i > startIndex) { 3160 char ch = seq.charAt(i-1); 3161 if (ch != '\n') { 3162 return false; 3163 } 3164 } 3165 return next.match(matcher, i, seq); 3166 } 3167 } 3168 3169 3173 static final class LastMatch extends Node { 3174 boolean match(Matcher matcher, int i, CharSequence seq) { 3175 if (i != matcher.oldLast) 3176 return false; 3177 return next.match(matcher, i, seq); 3178 } 3179 } 3180 3181 3194 static final class Dollar extends Node { 3195 boolean multiline; 3196 Dollar(boolean mul) { 3197 multiline = mul; 3198 } 3199 boolean match(Matcher matcher, int i, CharSequence seq) { 3200 int endIndex = (matcher.anchoringBounds) ? 3201 matcher.to : matcher.getTextLength(); 3202 if (!multiline) { 3203 if (i < endIndex - 2) 3204 return false; 3205 if (i == endIndex - 2) { 3206 char ch = seq.charAt(i); 3207 if (ch != '\r') 3208 return false; 3209 ch = seq.charAt(i + 1); 3210 if (ch != '\n') 3211 return false; 3212 } 3213 } 3214 if (i < endIndex) { 3223 char ch = seq.charAt(i); 3224 if (ch == '\n') { 3225 if (i > 0 && seq.charAt(i-1) == '\r') 3227 return false; 3228 if (multiline) 3229 return next.match(matcher, i, seq); 3230 } else if (ch == '\r' || ch == '\u0085' || 3231 (ch|1) == '\u2029') { 3232 if (multiline) 3233 return next.match(matcher, i, seq); 3234 } else { return false; 3236 } 3237 } 3238 matcher.hitEnd = true; 3240 matcher.requireEnd = true; 3243 return next.match(matcher, i, seq); 3244 } 3245 boolean study(TreeInfo info) { 3246 next.study(info); 3247 return info.deterministic; 3248 } 3249 } 3250 3251 3255 static final class UnixDollar extends Node { 3256 boolean multiline; 3257 UnixDollar(boolean mul) { 3258 multiline = mul; 3259 } 3260 boolean match(Matcher matcher, int i, CharSequence seq) { 3261 int endIndex = (matcher.anchoringBounds) ? 3262 matcher.to : matcher.getTextLength(); 3263 if (i < endIndex) { 3264 char ch = seq.charAt(i); 3265 if (ch == '\n') { 3266 if (multiline == false && i != endIndex - 1) 3269 return false; 3270 if (multiline) 3273 return next.match(matcher, i, seq); 3274 } else { 3275 return false; 3276 } 3277 } 3278 matcher.hitEnd = true; 3281 matcher.requireEnd = true; 3284 return next.match(matcher, i, seq); 3285 } 3286 boolean study(TreeInfo info) { 3287 next.study(info); 3288 return info.deterministic; 3289 } 3290 } 3291 3292 3295 static final class Single extends Node { 3296 int ch; 3297 int len; 3298 Single(int n) { 3299 ch = n; 3300 len = Character.charCount(ch); 3301 } 3302 Node dup(boolean not) { 3303 if (not) 3304 return new NotSingle(ch); 3305 else 3306 return new Single(ch); 3307 } 3308 boolean match(Matcher matcher, int i, CharSequence seq) { 3309 if (i >= matcher.to) { 3310 matcher.hitEnd = true; 3311 return false; 3312 } 3313 int c = Character.codePointAt(seq, i); 3314 return (c == ch 3315 && next.match(matcher, i+len, seq)); 3316 } 3317 boolean study(TreeInfo info) { 3318 info.minLength++; 3319 info.maxLength++; 3320 return next.study(info); 3321 } 3322 } 3323 3324 3327 static final class NotSingle extends Node { 3328 int ch; 3329 NotSingle(int n) { 3330 ch = n; 3331 } 3332 Node dup(boolean not) { 3333 if (not) 3334 return new Single(ch); 3335 else 3336 return new NotSingle(ch); 3337 } 3338 boolean match(Matcher matcher, int i, CharSequence seq) { 3339 if (i >= matcher.to) { 3340 matcher.hitEnd = true; 3341 return false; 3342 } 3343 int c = Character.codePointAt(seq, i); 3344 return (c != ch 3345 && next.match(matcher, i+Character.charCount(c), seq)); 3346 } 3347 boolean study(TreeInfo info) { 3348 info.minLength++; 3349 info.maxLength++; 3350 return next.study(info); 3351 } 3352 } 3353 3354 3357 static final class SingleA extends Node { 3358 int ch; 3359 SingleA(int n) { 3360 ch = ASCII.toLower(n); 3361 } 3362 Node dup(boolean not) { 3363 if (not) 3364 return new NotSingleA(ch); 3365 else 3366 return new SingleA(ch); 3367 } 3368 boolean match(Matcher matcher, int i, CharSequence seq) { 3369 if (i >= matcher.to) { 3370 matcher.hitEnd = true; 3371 } else { 3372 int c = seq.charAt(i); 3373 if (c == ch || ASCII.toLower(c) == ch) { 3374 return next.match(matcher, i+1, seq); 3375 } 3376 } 3377 return false; 3378 } 3379 3380 boolean study(TreeInfo info) { 3381 info.minLength++; 3382 info.maxLength++; 3383 return next.study(info); 3384 } 3385 } 3386 3387 static final class NotSingleA extends Node { 3388 int ch; 3389 NotSingleA(int n) { 3390 ch = ASCII.toLower(n); 3391 } 3392 Node dup(boolean not) { 3393 if (not) 3394 return new SingleA(ch); 3395 else 3396 return new NotSingleA(ch); 3397 } 3398 boolean match(Matcher matcher, int i, CharSequence seq) { 3399 if (i >= matcher.to) { 3400 matcher.hitEnd = true; 3401 } else { 3402 int c = Character.codePointAt(seq, i); 3403 if (c != ch && ASCII.toLower(c) != ch) { 3404 return next.match(matcher, i+Character.charCount(c), seq); 3405 } 3406 } 3407 return false; 3408 } 3409 3410 boolean study(TreeInfo info) { 3411 info.minLength++; 3412 info.maxLength++; 3413 return next.study(info); 3414 } 3415 } 3416 3417 3420 static final class SingleU extends Node { 3421 int ch; 3422 int len; 3423 SingleU(int c) { 3424 ch = Character.toLowerCase(Character.toUpperCase(c)); 3425 len = Character.charCount(ch); 3426 } 3427 Node dup(boolean not) { 3428 if (not) 3429 return new NotSingleU(ch); 3430 else 3431 return new SingleU(ch); 3432 } 3433 boolean match(Matcher matcher, int i, CharSequence seq) { 3434 if (i >= matcher.to) { 3435 matcher.hitEnd = true; 3436 } else { 3437 int c = Character.codePointAt(seq, i); 3438 if (c == ch) 3439 return next.match(m
|