1 64 65 package com.jcorporate.expresso.ext.regexp; 66 67 123 124 import java.util.Hashtable ; 125 126 127 139 public class RECompiler { 140 141 char[] instruction; int lenInstruction; 145 String pattern; int len; int idx; int parens; 151 static final int NODE_NORMAL = 0; static final int NODE_NULLABLE = 1; static final int NODE_TOPLEVEL = 2; 156 static final char ESC_MASK = 0xfff0; static final char ESC_BACKREF = 0xffff; static final char ESC_COMPLEX = 0xfffe; static final char ESC_CLASS = 0xfffd; 162 static final int maxBrackets = 10; static int brackets = 0; static int[] bracketStart = null; static int[] bracketEnd = null; static int[] bracketMin = null; static int[] bracketOpt = null; static final int bracketUnbounded = -1; static final int bracketFinished = -2; 172 static Hashtable hashPOSIX = new Hashtable (); 174 175 static { 176 hashPOSIX.put("alnum", new Character (RE.POSIX_CLASS_ALNUM)); 177 hashPOSIX.put("alpha", new Character (RE.POSIX_CLASS_ALPHA)); 178 hashPOSIX.put("blank", new Character (RE.POSIX_CLASS_BLANK)); 179 hashPOSIX.put("cntrl", new Character (RE.POSIX_CLASS_CNTRL)); 180 hashPOSIX.put("digit", new Character (RE.POSIX_CLASS_DIGIT)); 181 hashPOSIX.put("graph", new Character (RE.POSIX_CLASS_GRAPH)); 182 hashPOSIX.put("lower", new Character (RE.POSIX_CLASS_LOWER)); 183 hashPOSIX.put("print", new Character (RE.POSIX_CLASS_PRINT)); 184 hashPOSIX.put("punct", new Character (RE.POSIX_CLASS_PUNCT)); 185 hashPOSIX.put("space", new Character (RE.POSIX_CLASS_SPACE)); 186 hashPOSIX.put("upper", new Character (RE.POSIX_CLASS_UPPER)); 187 hashPOSIX.put("xdigit", new Character (RE.POSIX_CLASS_XDIGIT)); 188 hashPOSIX.put("javastart", new Character (RE.POSIX_CLASS_JSTART)); 189 hashPOSIX.put("javapart", new Character (RE.POSIX_CLASS_JPART)); 190 } 191 192 195 class RERange { 196 int size = 16; int[] minRange = new int[size]; int[] maxRange = new int[size]; int num = 0; 201 206 void delete(int index) { 207 208 if (num == 0 || index >= num) { 210 return; 211 } 212 while (index++ < num) { 214 if (index - 1 >= 0) { 215 minRange[index - 1] = minRange[index]; 216 maxRange[index - 1] = maxRange[index]; 217 } 218 } 219 220 num--; 222 } 223 224 230 void merge(int min, int max) { 231 232 for (int i = 0; i < num; i++) { 234 235 if (min >= minRange[i] && max <= maxRange[i]) { 237 return; 238 } 239 240 else if (min <= minRange[i] && max >= maxRange[i]) { 242 delete(i); 243 merge(min, max); 244 245 return; 246 } 247 248 else if (min >= minRange[i] && min <= maxRange[i]) { 250 delete(i); 251 min = minRange[i]; 252 merge(min, max); 253 254 return; 255 } 256 257 else if (max >= minRange[i] && max <= maxRange[i]) { 259 delete(i); 260 max = maxRange[i]; 261 merge(min, max); 262 263 return; 264 } 265 } 266 if (num >= size) { 268 size *= 2; 269 270 int[] newMin = new int[size]; 271 int[] newMax = new int[size]; 272 System.arraycopy(minRange, 0, newMin, 0, num); 273 System.arraycopy(maxRange, 0, newMax, 0, num); 274 minRange = newMin; 275 maxRange = newMax; 276 } 277 278 minRange[num] = min; 279 maxRange[num] = max; 280 num++; 281 } 282 283 289 void remove(int min, int max) { 290 291 for (int i = 0; i < num; i++) { 293 294 if (minRange[i] >= min && maxRange[i] <= max) { 296 delete(i); 297 i--; 298 299 return; 300 } 301 302 else if (min >= minRange[i] && max <= maxRange[i]) { 304 int minr = minRange[i]; 305 int maxr = maxRange[i]; 306 delete(i); 307 308 if (minr < min - 1) { 309 merge(minr, min - 1); 310 } 311 if (max + 1 < maxr) { 312 merge(max + 1, maxr); 313 } 314 315 return; 316 } 317 318 else if (minRange[i] >= min && minRange[i] <= max) { 320 minRange[i] = max + 1; 321 322 return; 323 } 324 325 else if (maxRange[i] >= min && maxRange[i] <= max) { 327 maxRange[i] = min - 1; 328 329 return; 330 } 331 } 332 } 333 334 341 void include(int min, int max, boolean include) { 342 if (include) { 343 merge(min, max); 344 } else { 345 remove(min, max); 346 } 347 } 348 349 355 void include(char minmax, boolean include) { 356 include(minmax, minmax, include); 357 } 358 } 359 360 363 public RECompiler() { 364 365 instruction = new char[128]; 367 lenInstruction = 0; 368 } 369 370 373 void allocBrackets() { 374 375 if (bracketStart == null) { 377 378 bracketStart = new int[maxBrackets]; 380 bracketEnd = new int[maxBrackets]; 381 bracketMin = new int[maxBrackets]; 382 bracketOpt = new int[maxBrackets]; 383 384 for (int i = 0; i < maxBrackets; i++) { 386 bracketStart[i] = bracketEnd[i] = bracketMin[i] = bracketOpt[i] = -1; 387 } 388 } 389 } 390 391 400 int atom() 401 throws RESyntaxException { 402 403 int ret = node(RE.OP_ATOM, 0); 405 406 int lenAtom = 0; 408 409 atomLoop: 411 412 while (idx < len) { 413 414 if ((idx + 1) < len) { 416 char c = pattern.charAt(idx + 1); 417 418 if (pattern.charAt(idx) == '\\') { 420 int idxEscape = idx; 421 escape(); 422 423 if (idx < len) { 424 c = pattern.charAt(idx); 425 } 426 427 idx = idxEscape; 428 } 429 switch (c) { 431 case '{': 432 case '?': 433 case '*': 434 case '+': 435 436 if (lenAtom != 0) { 439 break atomLoop; 440 } 441 } 442 } 443 switch (pattern.charAt(idx)) { 445 case ']': 446 case '^': 447 case '$': 448 case '.': 449 case '[': 450 case '(': 451 case ')': 452 case '|': 453 break atomLoop; 454 455 case '{': 456 case '?': 457 case '*': 458 case '+': 459 460 if (lenAtom == 0) { 462 463 syntaxError("Missing operand to closure"); 465 } 466 467 break atomLoop; 468 469 case '\\': 470 { 471 472 int idxBeforeEscape = idx; 474 char c = escape(); 475 476 if ((c & ESC_MASK) == ESC_MASK) { 478 479 idx = idxBeforeEscape; 481 break atomLoop; 482 } 483 484 emit(c); 486 lenAtom++; 487 } 488 489 break; 490 491 default: 492 493 emit(pattern.charAt(idx++)); 495 lenAtom++; 496 break; 497 } 498 } 499 if (lenAtom == 0) { 501 internalError(); 502 } 503 504 instruction[ret + RE.offsetOpdata] = (char) lenAtom; 506 507 return ret; 508 } 509 510 515 void bracket() 516 throws RESyntaxException { 517 518 if (idx >= len || pattern.charAt(idx++) != '{') { 520 internalError(); 521 } 522 if (idx >= len || !Character.isDigit(pattern.charAt(idx))) { 524 syntaxError("Expected digit"); 525 } 526 527 StringBuffer number = new StringBuffer (); 529 530 while (idx < len && Character.isDigit(pattern.charAt(idx))) { 531 number.append(pattern.charAt(idx++)); 532 } 533 try { 534 bracketMin[brackets] = Integer.parseInt(number.toString()); 535 } catch (NumberFormatException e) { 536 syntaxError("Expected valid number"); 537 } 538 if (idx >= len) { 540 syntaxError("Expected comma or right bracket"); 541 } 542 if (pattern.charAt(idx) == '}') { 544 idx++; 545 bracketOpt[brackets] = 0; 546 547 return; 548 } 549 if (idx >= len || pattern.charAt(idx++) != ',') { 551 syntaxError("Expected comma"); 552 } 553 if (idx >= len) { 555 syntaxError("Expected comma or right bracket"); 556 } 557 if (pattern.charAt(idx) == '}') { 559 idx++; 560 bracketOpt[brackets] = bracketUnbounded; 561 562 return; 563 } 564 if (idx >= len || !Character.isDigit(pattern.charAt(idx))) { 566 syntaxError("Expected digit"); 567 } 568 569 number.setLength(0); 571 572 while (idx < len && Character.isDigit(pattern.charAt(idx))) { 573 number.append(pattern.charAt(idx++)); 574 } 575 try { 576 bracketOpt[brackets] = Integer.parseInt(number.toString()) - 577 bracketMin[brackets]; 578 } catch (NumberFormatException e) { 579 syntaxError("Expected valid number"); 580 } 581 if (bracketOpt[brackets] <= 0) { 583 syntaxError("Bad range"); 584 } 585 if (idx >= len || pattern.charAt(idx++) != '}') { 587 syntaxError("Missing close brace"); 588 } 589 } 590 591 598 int branch(int[] flags) 599 throws RESyntaxException { 600 601 int node; 603 int ret = node(RE.OP_BRANCH, 0); 604 int chain = -1; 605 int[] closureFlags = new int[1]; 606 boolean nullable = true; 607 608 while (idx < len && pattern.charAt(idx) != '|' && pattern.charAt(idx) != ')') { 609 610 closureFlags[0] = NODE_NORMAL; 612 node = closure(closureFlags); 613 614 if (closureFlags[0] == NODE_NORMAL) { 615 nullable = false; 616 } 617 if (chain != -1) { 619 setNextOfEnd(chain, node); 620 } 621 622 chain = node; 624 } 625 if (chain == -1) { 627 node(RE.OP_NOTHING, 0); 628 } 629 if (nullable) { 631 flags[0] |= NODE_NULLABLE; 632 } 633 634 return ret; 635 } 636 637 643 int characterClass() 644 throws RESyntaxException { 645 646 if (pattern.charAt(idx) != '[') { 648 internalError(); 649 } 650 if ((idx + 1) >= len || pattern.charAt(++idx) == ']') { 652 syntaxError("Empty or unterminated class"); 653 } 654 if (idx < len && pattern.charAt(idx) == ':') { 656 657 idx++; 659 660 int idxStart = idx; 662 663 while (idx < len && pattern.charAt(idx) >= 'a' && pattern.charAt(idx) <= 'z') { 664 idx++; 665 } 666 if ((idx + 1) < len && pattern.charAt(idx) == ':' && 668 pattern.charAt(idx + 1) == ']') { 669 670 String charClass = pattern.substring(idxStart, idx); 672 673 Character i = (Character ) hashPOSIX.get(charClass); 675 676 if (i != null) { 677 678 idx += 2; 680 681 return node(RE.OP_POSIXCLASS, i.charValue()); 683 } 684 685 syntaxError("Invalid POSIX character class '" + charClass + 686 "'"); 687 } 688 689 syntaxError("Invalid POSIX character class syntax"); 690 } 691 692 int ret = node(RE.OP_ANYOF, 0); 694 695 char CHAR_INVALID = Character.MAX_VALUE; 697 char last = CHAR_INVALID; 698 char simpleChar = 0; 699 boolean include = true; 700 boolean definingRange = false; 701 int idxFirst = idx; 702 char rangeStart = Character.MIN_VALUE; 703 char rangeEnd; 704 RERange range = new RERange(); 705 706 while (idx < len && pattern.charAt(idx) != ']') { 707 switchOnCharacter: 708 709 switch (pattern.charAt(idx)) { 711 case '^': 712 include = !include; 713 714 if (idx == idxFirst) { 715 range.include(Character.MIN_VALUE, Character.MAX_VALUE, 716 true); 717 } 718 719 idx++; 720 continue; 721 722 case '\\': 723 { 724 725 char c; 727 728 switch (c = escape()) { 729 case ESC_COMPLEX: 730 case ESC_BACKREF: 731 732 syntaxError("Bad character class"); 734 735 case ESC_CLASS: 736 737 if (definingRange) { 739 syntaxError("Bad character class"); 740 } 741 switch (pattern.charAt(idx - 1)) { 743 case RE.E_NSPACE: 744 case RE.E_NDIGIT: 745 case RE.E_NALNUM: 746 syntaxError("Bad character class"); 747 748 case RE.E_SPACE: 749 range.include('\t', include); 750 range.include('\r', include); 751 range.include('\f', include); 752 range.include('\n', include); 753 range.include('\b', include); 754 range.include(' ', include); 755 break; 756 757 case RE.E_ALNUM: 758 range.include('a', 'z', include); 759 range.include('A', 'Z', include); 760 range.include('_', include); 761 762 case RE.E_DIGIT: 764 range.include('0', '9', include); 765 break; 766 } 767 768 last = CHAR_INVALID; 770 break; 771 772 default: 773 774 simpleChar = c; 776 break switchOnCharacter; 777 } 778 } 779 780 continue; 781 782 case '-': 783 784 if (definingRange) { 786 syntaxError("Bad class range"); 787 } 788 789 definingRange = true; 790 791 rangeStart = (last == CHAR_INVALID ? 0 : last); 793 794 if ((idx + 1) < len && pattern.charAt(++idx) == ']') { 796 simpleChar = Character.MAX_VALUE; 797 break; 798 } 799 800 continue; 801 802 default: 803 simpleChar = pattern.charAt(idx++); 804 break; 805 } 806 if (definingRange) { 808 809 rangeEnd = simpleChar; 811 812 if (rangeStart >= rangeEnd) { 814 syntaxError("Bad character class"); 815 } 816 817 range.include(rangeStart, rangeEnd, include); 818 819 last = CHAR_INVALID; 821 definingRange = false; 822 } else { 823 824 if ((idx + 1) >= len || pattern.charAt(idx + 1) != '-') { 826 range.include(simpleChar, include); 827 } 828 829 last = simpleChar; 830 } 831 } 832 if (idx == len) { 834 syntaxError("Unterminated character class"); 835 } 836 837 idx++; 839 840 instruction[ret + RE.offsetOpdata] = (char) range.num; 842 843 for (int i = 0; i < range.num; i++) { 844 emit((char) range.minRange[i]); 845 emit((char) range.maxRange[i]); 846 } 847 848 return ret; 849 } 850 851 858 int closure(int[] flags) 859 throws RESyntaxException { 860 861 int idxBeforeTerminal = idx; 863 864 int[] terminalFlags = {NODE_NORMAL}; 866 867 int ret = terminal(terminalFlags); 869 870 flags[0] |= terminalFlags[0]; 872 873 if (idx >= len) { 875 return ret; 876 } 877 878 boolean greedy = true; 879 char closureType = pattern.charAt(idx); 880 881 switch (closureType) { 882 case '?': 883 case '*': 884 885 flags[0] |= NODE_NULLABLE; 887 888 case '+': 889 890 idx++; 892 893 case '{': 894 895 int opcode = instruction[ret + RE.offsetOpcode]; 897 898 if (opcode == RE.OP_BOL || opcode == RE.OP_EOL) { 899 syntaxError("Bad closure operand"); 900 } 901 if ((terminalFlags[0] & NODE_NULLABLE) != 0) { 902 syntaxError("Closure operand can't be nullable"); 903 } 904 905 break; 906 } 907 if (idx < len && pattern.charAt(idx) == '?') { 909 idx++; 910 greedy = false; 911 } 912 if (greedy) { 913 914 switch (closureType) { 916 case '{': 917 { 918 919 boolean found = false; 921 int i; 922 allocBrackets(); 923 924 for (i = 0; i < brackets; i++) { 925 if (bracketStart[i] == idx) { 926 found = true; 927 break; 928 } 929 } 930 if (!found) { 932 if (brackets >= maxBrackets) { 933 syntaxError("Too many bracketed closures (limit is 10)"); 934 } 935 936 bracketStart[brackets] = idx; 937 bracket(); 938 bracketEnd[brackets] = idx; 939 i = brackets++; 940 } 941 if (--bracketMin[i] > 0) { 943 944 idx = idxBeforeTerminal; 946 break; 947 } 948 if (bracketOpt[i] == bracketFinished) { 950 951 closureType = '*'; 954 bracketOpt[i] = 0; 955 idx = bracketEnd[i]; 956 } else if (bracketOpt[i] == bracketUnbounded) { 957 idx = idxBeforeTerminal; 958 bracketOpt[i] = bracketFinished; 959 break; 960 } else if (bracketOpt[i]-- > 0) { 961 962 idx = idxBeforeTerminal; 964 closureType = '?'; 965 } else { 966 967 idx = bracketEnd[i]; 969 break; 970 } 971 } 972 case '?': 974 case '*': 975 if (!greedy) { 976 break; 977 } 978 if (closureType == '?') { 979 980 nodeInsert(RE.OP_BRANCH, 0, ret); setNextOfEnd(ret, node(RE.OP_BRANCH, 0)); 984 int nothing = node(RE.OP_NOTHING, 0); setNextOfEnd(ret, nothing); setNextOfEnd(ret + RE.nodeSize, nothing); } 988 if (closureType == '*') { 989 990 nodeInsert(RE.OP_BRANCH, 0, ret); setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 0)); setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0)); setNextOfEnd(ret + RE.nodeSize, ret); setNextOfEnd(ret, node(RE.OP_BRANCH, 0)); setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); } 998 999 break; 1000 1001 case '+': 1002 { 1003 1004 int branch; 1006 branch = node(RE.OP_BRANCH, 0); setNextOfEnd(ret, branch); setNextOfEnd(node(RE.OP_GOTO, 0), ret); setNextOfEnd(branch, node(RE.OP_BRANCH, 0)); setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); } 1012 1013 break; 1014 } 1015 } else { 1016 1017 setNextOfEnd(ret, node(RE.OP_END, 0)); 1019 1020 switch (closureType) { 1022 case '?': 1023 nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret); 1024 break; 1025 1026 case '*': 1027 nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret); 1028 break; 1029 1030 case '+': 1031 nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret); 1032 break; 1033 } 1034 1035 setNextOfEnd(ret, lenInstruction); 1037 } 1038 1039 return ret; 1040 } 1041 1042 1053 public REProgram compile(String pattern) 1054 throws RESyntaxException { 1055 1056 this.pattern = pattern; len = pattern.length(); idx = 0; lenInstruction = 0; parens = 1; brackets = 0; 1064 int[] flags = {NODE_TOPLEVEL}; 1066 1067 expr(flags); 1069 1070 if (idx != len) { 1072 if (pattern.charAt(idx) == ')') { 1073 syntaxError("Unmatched close paren"); 1074 } 1075 1076 syntaxError("Unexpected input remains"); 1077 } 1078 1079 char[] ins = new char[lenInstruction]; 1081 System.arraycopy(instruction, 0, ins, 0, lenInstruction); 1082 1083 return new REProgram(ins); 1084 } 1085 1086 1091 void emit(char c) { 1092 1093 ensure(1); 1095 1096 instruction[lenInstruction++] = c; 1098 } 1099 1100 1106 void ensure(int n) { 1107 1108 int curlen = instruction.length; 1110 1111 if (lenInstruction + n >= curlen) { 1113 1114 while (lenInstruction + n >= curlen) { 1116 curlen *= 2; 1117 } 1118 1119 char[] newInstruction = new char[curlen]; 1121 System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction); 1122 instruction = newInstruction; 1123 } 1124 } 1125 1126 1136 char escape() 1137 throws RESyntaxException { 1138 1139 if (pattern.charAt(idx) != '\\') { 1141 internalError(); 1142 } 1143 if (idx + 1 == len) { 1145 syntaxError("Escape terminates string"); 1146 } 1147 1148 idx += 2; 1150 1151 char escapeChar = pattern.charAt(idx - 1); 1152 1153 switch (escapeChar) { 1154 case RE.E_BOUND: 1155 case RE.E_NBOUND: 1156 return ESC_COMPLEX; 1157 1158 case RE.E_ALNUM: 1159 case RE.E_NALNUM: 1160 case RE.E_SPACE: 1161 case RE.E_NSPACE: 1162 case RE.E_DIGIT: 1163 case RE.E_NDIGIT: 1164 return ESC_CLASS; 1165 1166 case 'u': 1167 case 'x': 1168 { 1169 1170 int hexDigits = (escapeChar == 'u' ? 4 : 2); 1172 1173 int val = 0; 1175 1176 for (; idx < len && hexDigits-- > 0; idx++) { 1177 1178 char c = pattern.charAt(idx); 1180 1181 if (c >= '0' && c <= '9') { 1183 1184 val = (val << 4) + c - '0'; 1186 } else { 1187 1188 c = Character.toLowerCase(c); 1190 1191 if (c >= 'a' && c <= 'f') { 1192 1193 val = (val << 4) + (c - 'a') + 10; 1195 } else { 1196 1197 syntaxError("Expected " + hexDigits + 1200 " hexadecimal digits after \\" + 1201 escapeChar); 1202 } 1203 } 1204 } 1205 1206 return (char) val; 1207 } 1208 case 't': 1209 return '\t'; 1210 1211 case 'n': 1212 return '\n'; 1213 1214 case 'r': 1215 return '\r'; 1216 1217 case 'f': 1218 return '\f'; 1219 1220 case '0': 1221 case '1': 1222 case '2': 1223 case '3': 1224 case '4': 1225 case '5': 1226 case '6': 1227 case '7': 1228 case '8': 1229 case '9': 1230 1231 if ((idx < len && Character.isDigit(pattern.charAt(idx))) || 1233 escapeChar == '0') { 1234 1235 int val = escapeChar - '0'; 1237 1238 if (idx < len && Character.isDigit(pattern.charAt(idx))) { 1239 val = ((val << 3) + (pattern.charAt(idx++) - '0')); 1240 1241 if (idx < len && 1242 Character.isDigit(pattern.charAt(idx))) { 1243 val = ((val << 3) + (pattern.charAt(idx++) - '0')); 1244 } 1245 } 1246 1247 return (char) val; 1248 } 1249 1250 return ESC_BACKREF; 1252 1253 default: 1254 1255 return escapeChar; 1257 } 1258 } 1259 1260 1268 int expr(int[] flags) 1269 throws RESyntaxException { 1270 1271 boolean paren = false; 1273 int ret = -1; 1274 int closeParens = parens; 1275 1276 if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(') { 1277 idx++; 1278 paren = true; 1279 ret = node(RE.OP_OPEN, parens++); 1280 } 1281 1282 flags[0] &= ~NODE_TOPLEVEL; 1283 1284 int branch = branch(flags); 1286 1287 if (ret == -1) { 1288 ret = branch; 1289 } else { 1290 setNextOfEnd(ret, branch); 1291 } 1292 while (idx < len && pattern.charAt(idx) == '|') { 1294 idx++; 1295 branch = branch(flags); 1296 setNextOfEnd(ret, branch); 1297 } 1298 1299 int end; 1301 1302 if (paren) { 1303 if (idx < len && pattern.charAt(idx) == ')') { 1304 idx++; 1305 } else { 1306 syntaxError("Missing close paren"); 1307 } 1308 1309 end = node(RE.OP_CLOSE, closeParens); 1310 } else { 1311 end = node(RE.OP_END, 0); 1312 } 1313 1314 setNextOfEnd(ret, end); 1316 1317 for (int next = -1, i = ret; 1319 next != 0; 1320 next = instruction[i + RE.offsetNext], i += next) { 1321 1322 if (instruction[i + RE.offsetOpcode] == RE.OP_BRANCH) { 1324 setNextOfEnd(i + RE.nodeSize, end); 1325 } 1326 } 1327 1328 return ret; 1330 } 1331 1332 1337 void internalError() 1338 throws Error { 1339 throw new Error ("Internal error!"); 1340 } 1341 1342 1349 int node(char opcode, int opdata) { 1350 1351 ensure(RE.nodeSize); 1353 1354 instruction[lenInstruction + RE.offsetOpcode] = opcode; 1356 instruction[lenInstruction + RE.offsetOpdata] = (char) opdata; 1357 instruction[lenInstruction + RE.offsetNext] = 0; 1358 lenInstruction += RE.nodeSize; 1359 1360 return lenInstruction - RE.nodeSize; 1362 } 1363 1364 1372 void nodeInsert(char opcode, int opdata, int insertAt) { 1373 1374 ensure(RE.nodeSize); 1376 1377 System.arraycopy(instruction, insertAt, instruction, 1379 insertAt + RE.nodeSize, lenInstruction - insertAt); 1380 instruction[insertAt + RE.offsetOpcode] = opcode; 1381 instruction[insertAt + RE.offsetOpdata] = (char) opdata; 1382 instruction[insertAt + RE.offsetNext] = 0; 1383 lenInstruction += RE.nodeSize; 1384 } 1385 1386 1392 void setNextOfEnd(int node, int pointTo) { 1393 1394 int next; 1396 1397 while ((next = instruction[node + RE.offsetNext]) != 0) { 1398 node += next; 1399 } 1400 1401 instruction[node + RE.offsetNext] = (char) (short) (pointTo - node); 1403 } 1404 1405 1410 void syntaxError(String s) 1411 throws RESyntaxException { 1412 throw new RESyntaxException(s); 1413 } 1414 1415 1422 int terminal(int[] flags) 1423 throws RESyntaxException { 1424 switch (pattern.charAt(idx)) { 1425 case RE.OP_EOL: 1426 case RE.OP_BOL: 1427 case RE.OP_ANY: 1428 return node(pattern.charAt(idx++), 0); 1429 1430 case '[': 1431 return characterClass(); 1432 1433 case '(': 1434 return expr(flags); 1435 1436 case ')': 1437 syntaxError("Unexpected close paren"); 1438 1439 case '|': 1440 internalError(); 1441 1442 case ']': 1443 syntaxError("Mismatched class"); 1444 1445 case 0: 1446 syntaxError("Unexpected end of input"); 1447 1448 case '?': 1449 case '+': 1450 case '{': 1451 case '*': 1452 syntaxError("Missing operand to closure"); 1453 1454 case '\\': 1455 { 1456 1457 int idxBeforeEscape = idx; 1459 1460 switch (escape()) { 1462 case ESC_CLASS: 1463 case ESC_COMPLEX: 1464 flags[0] &= ~NODE_NULLABLE; 1465 1466 return node(RE.OP_ESCAPE, pattern.charAt(idx - 1)); 1467 1468 case ESC_BACKREF: 1469 { 1470 char backreference = (char) (pattern.charAt(idx - 1) - 1471 '0'); 1472 1473 if (parens <= backreference) { 1474 syntaxError("Bad backreference"); 1475 } 1476 1477 flags[0] |= NODE_NULLABLE; 1478 1479 return node(RE.OP_BACKREF, backreference); 1480 } 1481 default: 1482 1483 idx = idxBeforeEscape; 1486 flags[0] &= ~NODE_NULLABLE; 1487 break; 1488 } 1489 } 1490 } 1491 1492 flags[0] &= ~NODE_NULLABLE; 1495 1496 return atom(); 1497 } 1498} | Popular Tags |