1 package org.apache.oro.text.regex; 2 3 59 60 import java.io.IOException ; 61 import java.util.*; 62 63 74 public final class Perl5Matcher implements PatternMatcher { 75 private static final char __EOS = Character.MAX_VALUE; 76 private static final int __INITIAL_NUM_OFFSETS = 20; 77 78 private boolean __multiline = false, __lastSuccess = false; 79 private char __previousChar, __input[], __originalInput[]; 80 private Perl5Repetition __currentRep; 81 private int __numParentheses, __bol, __eol, __currentOffset, __endOffset; 82 83 private char[] __program; 84 private int __expSize, __inputOffset, __lastParen; 85 private int[] __beginMatchOffsets, __endMatchOffsets; 86 private Stack __stack = new Stack(); 87 private Perl5MatchResult __lastMatchResult = null; 88 89 private static boolean 90 __compare(char[] s1, int s1Offs, char[] s2, int s2Offs, int n) 91 { 92 int cnt; 93 94 for(cnt = 0; cnt < n; cnt++, s1Offs++, s2Offs++) { 95 if(s1Offs >= s1.length) 96 return false; 97 if(s2Offs >= s2.length) 98 return false; 99 if(s1[s1Offs] != s2[s2Offs]) 100 return false; 101 } 102 103 return true; 104 } 105 106 private static int __findFirst(char[] input, int current, int endOffset, 107 char[] mustString) 108 { 109 int count, saveCurrent; 110 char ch; 111 112 113 if(input.length == 0) 114 return endOffset; 115 116 ch = mustString[0]; 117 while(current < endOffset) { 119 if(ch == input[current]){ 120 saveCurrent = current; 121 count = 0; 122 123 while(current < endOffset && count < mustString.length) { 124 if(mustString[count] != input[current]) 125 break; 126 ++count; 127 ++current; 128 } 129 130 current = saveCurrent; 131 132 if(count >= mustString.length) 133 break; 134 } 135 ++current; 136 } 137 138 return current; 139 } 140 141 142 private void __pushState(int parenFloor) { 143 int[] state; 144 int stateEntries, paren; 145 146 stateEntries = 3*(__expSize - parenFloor); 147 if(stateEntries <= 0) 148 state = new int[3]; 149 else 150 state = new int[stateEntries + 3]; 151 152 state[0] = __expSize; 153 state[1] = __lastParen; 154 state[2] = __inputOffset; 155 156 for(paren = __expSize; paren > parenFloor; paren-=3, stateEntries-=3) { 157 state[stateEntries] = __endMatchOffsets[paren]; 158 state[stateEntries + 1] = __beginMatchOffsets[paren]; 159 state[stateEntries + 2] = paren; 160 } 161 162 __stack.push(state); 163 } 164 165 166 private void __popState() { 167 int[] state; 168 int entry, paren; 169 170 state = (int[])__stack.pop(); 171 172 __expSize = state[0]; 173 __lastParen = state[1]; 174 __inputOffset = state[2]; 175 176 for(entry = 3; entry < state.length; entry+=3) { 177 paren = state[entry + 2]; 178 __beginMatchOffsets[paren] = state[entry + 1]; 179 180 if(paren <= __lastParen) 181 __endMatchOffsets[paren] = state[entry]; 182 } 183 184 for(paren = __lastParen + 1; paren <= __numParentheses; paren++) { 185 if(paren > __expSize) 186 __beginMatchOffsets[paren] = OpCode._NULL_OFFSET; 187 __endMatchOffsets[paren] = OpCode._NULL_OFFSET; 188 } 189 } 190 191 192 private void __initInterpreterGlobals(Perl5Pattern expression, char[] input, 194 int beginOffset, int endOffset) { 195 __input = input; 196 __endOffset = endOffset; 197 __currentRep = new Perl5Repetition(); 198 __currentRep._numInstances = 0; 199 __currentRep._lastRepetition = null; 200 __program = expression._program; 201 __stack.setSize(0); 202 203 if(beginOffset == 0) 204 __previousChar = '\n'; 205 else { 206 __previousChar = input[beginOffset - 1]; 207 if(!__multiline && __previousChar == '\n') 208 __previousChar = '\0'; 209 } 210 211 __numParentheses = expression._numParentheses; 212 __currentOffset = beginOffset; 213 214 __bol = beginOffset; 215 __eol = endOffset; 216 217 endOffset = __numParentheses + 1; 219 if(__beginMatchOffsets == null || endOffset > __beginMatchOffsets.length) { 220 if(endOffset < __INITIAL_NUM_OFFSETS) 221 endOffset = __INITIAL_NUM_OFFSETS; 222 __beginMatchOffsets = new int[endOffset]; 223 __endMatchOffsets = new int[endOffset]; 224 } 225 } 226 227 private void __setLastMatchResult() { 230 int offs; 231 232 234 __lastMatchResult = new Perl5MatchResult(__numParentheses + 1); 235 236 if(__endMatchOffsets[0] > __originalInput.length) 238 throw new ArrayIndexOutOfBoundsException (); 239 240 __lastMatchResult._match = 241 new String (__originalInput, __beginMatchOffsets[0], 242 __endMatchOffsets[0] - __beginMatchOffsets[0]); 243 244 __lastMatchResult._matchBeginOffset = __beginMatchOffsets[0]; 245 246 while(__numParentheses >= 0) { 247 offs = __beginMatchOffsets[__numParentheses]; 248 249 if(offs >= 0) 250 __lastMatchResult._beginGroupOffset[__numParentheses] 251 = offs - __lastMatchResult._matchBeginOffset; 252 else 253 __lastMatchResult._beginGroupOffset[__numParentheses] = 254 OpCode._NULL_OFFSET; 255 256 offs = __endMatchOffsets[__numParentheses]; 257 258 if(offs >= 0) 259 __lastMatchResult._endGroupOffset[__numParentheses] 260 = offs - __lastMatchResult._matchBeginOffset; 261 else 262 __lastMatchResult._endGroupOffset[__numParentheses] = 263 OpCode._NULL_OFFSET; 264 265 --__numParentheses; 266 } 267 268 __originalInput = null; 270 } 271 272 273 274 private boolean __interpret(Perl5Pattern expression, char[] input, 279 int beginOffset, int endOffset) 280 { 281 boolean success; 282 int minLength = 0, dontTry = 0, offset; 283 char ch, mustString[]; 284 285 __initInterpreterGlobals(expression, input, beginOffset, endOffset); 286 287 success = false; 288 mustString = expression._mustString; 289 290 _mainLoop: 291 while(true) { 292 293 if(mustString != null && 294 ((expression._anchor & Perl5Pattern._OPT_ANCH) == 0 || 295 (__multiline && expression._back >= 0))) { 296 297 __currentOffset = 298 __findFirst(__input, __currentOffset, endOffset, mustString); 299 300 if(__currentOffset >= endOffset) { 301 if((expression._options & Perl5Compiler.READ_ONLY_MASK) == 0) 302 expression._mustUtility++; 303 success = false; 304 break _mainLoop; 305 } else if(expression._back >= 0) { 306 __currentOffset-=expression._back; 307 if(__currentOffset < beginOffset) 308 __currentOffset = beginOffset; 309 minLength = expression._back + mustString.length; 310 } else if(!expression._isExpensive && 311 (expression._options & Perl5Compiler.READ_ONLY_MASK) == 0 && 312 (--expression._mustUtility < 0)) { 313 mustString = expression._mustString = null; 317 __currentOffset = beginOffset; 318 } else { 319 __currentOffset = beginOffset; 320 minLength = mustString.length; 321 } 322 } 323 324 if((expression._anchor & Perl5Pattern._OPT_ANCH) != 0) { 325 if(__tryExpression(expression, beginOffset)) { 326 success = true; 327 break _mainLoop; 328 } else if(__multiline || 329 (expression._anchor & Perl5Pattern._OPT_IMPLICIT) != 0) { 330 331 if(minLength > 0) 332 dontTry = minLength - 1; 333 endOffset-=dontTry; 334 335 if(__currentOffset > beginOffset) 336 --__currentOffset; 337 338 while(__currentOffset < endOffset) { 339 if(__input[__currentOffset++] == '\n') { 340 if(__currentOffset < endOffset && 341 __tryExpression(expression, __currentOffset)) { 342 success = true; 343 break _mainLoop; 344 } 345 } 346 } 347 } 348 349 break _mainLoop; 350 } 351 352 353 if(expression._startString != null) { 354 mustString = expression._startString; 355 if((expression._anchor & Perl5Pattern._OPT_SKIP) != 0) { 356 ch = mustString[0]; 357 358 while(__currentOffset < endOffset) { 359 if(ch == __input[__currentOffset]) { 360 if(__tryExpression(expression, __currentOffset)){ 361 success = true; 362 break _mainLoop; 363 } 364 ++__currentOffset; 365 while(__currentOffset < endOffset && 366 __input[__currentOffset] == ch) 367 ++__currentOffset; 368 } 369 ++__currentOffset; 370 } 371 } else { 372 373 while((__currentOffset = 374 __findFirst(__input, __currentOffset, endOffset, mustString)) 375 < endOffset){ 376 if(__tryExpression(expression, __currentOffset)) { 377 success = true; 378 break _mainLoop; 379 } 380 ++__currentOffset; 381 } 382 } 383 384 break _mainLoop; 385 } 386 387 if((offset = expression._startClassOffset) != OpCode._NULL_OFFSET) { 388 boolean doEvery, tmp; 389 390 doEvery = ((expression._anchor & Perl5Pattern._OPT_SKIP) == 0); 391 392 if(minLength > 0) 393 dontTry = minLength - 1; 394 endOffset -= dontTry; 395 tmp = true; 396 397 switch(__program[offset]) { 398 case OpCode._ANYOF: 399 offset = OpCode._getOperand(offset); 400 while(__currentOffset < endOffset) { 401 ch = __input[__currentOffset]; 402 403 if(ch < 256 && 404 (__program[offset + (ch >> 4)] & (1 << (ch & 0xf))) == 0) { 405 if(tmp && __tryExpression(expression, __currentOffset)) { 406 success = true; 407 break _mainLoop; 408 } else 409 tmp = doEvery; 410 } else 411 tmp = true; 412 ++__currentOffset; 413 } 414 415 break; 416 417 case OpCode._BOUND: 418 if(minLength > 0) { 419 ++dontTry; 420 --endOffset; 421 } 422 423 if(__currentOffset != beginOffset) { 424 ch = __input[__currentOffset - 1]; 425 tmp = OpCode._isWordCharacter(ch); 426 } else 427 tmp = OpCode._isWordCharacter(__previousChar); 428 429 while(__currentOffset < endOffset) { 430 ch = __input[__currentOffset]; 431 if(tmp != OpCode._isWordCharacter(ch)){ 432 tmp = !tmp; 433 if(__tryExpression(expression, __currentOffset)) { 434 success = true; 435 break _mainLoop; 436 } 437 } 438 ++__currentOffset; 439 } 440 441 if((minLength > 0 || tmp) && 442 __tryExpression(expression, __currentOffset)) { 443 success = true; 444 break _mainLoop; 445 } 446 break; 447 448 case OpCode._NBOUND: 449 if(minLength > 0) { 450 ++dontTry; 451 --endOffset; 452 } 453 454 if(__currentOffset != beginOffset) { 455 ch = __input[__currentOffset - 1]; 456 tmp = OpCode._isWordCharacter(ch); 457 } else 458 tmp = OpCode._isWordCharacter(__previousChar); 459 460 while(__currentOffset < endOffset) { 461 ch = __input[__currentOffset]; 462 if(tmp != OpCode._isWordCharacter(ch)) 463 tmp = !tmp; 464 else if(__tryExpression(expression, __currentOffset)) { 465 success = true; 466 break _mainLoop; 467 } 468 469 ++__currentOffset; 470 } 471 472 if((minLength > 0 || !tmp) && 473 __tryExpression(expression, __currentOffset)) { 474 success = true; 475 break _mainLoop; 476 } 477 break; 478 479 case OpCode._ALNUM: 480 while(__currentOffset < endOffset) { 481 ch = __input[__currentOffset]; 482 if(OpCode._isWordCharacter(ch)) { 483 if(tmp && __tryExpression(expression, __currentOffset)) { 484 success = true; 485 break _mainLoop; 486 } else 487 tmp = doEvery; 488 } else 489 tmp = true; 490 ++__currentOffset; 491 } 492 break; 493 494 case OpCode._NALNUM: 495 while(__currentOffset < endOffset) { 496 ch = __input[__currentOffset]; 497 if(!OpCode._isWordCharacter(ch)) { 498 if(tmp && __tryExpression(expression, __currentOffset)) { 499 success = true; 500 break _mainLoop; 501 } else 502 tmp = doEvery; 503 } else 504 tmp = true; 505 ++__currentOffset; 506 } 507 break; 508 509 case OpCode._SPACE: 510 while(__currentOffset < endOffset) { 511 if(Character.isWhitespace(__input[__currentOffset])) { 512 if(tmp && __tryExpression(expression, __currentOffset)) { 513 success = true; 514 break _mainLoop; 515 } else 516 tmp = doEvery; 517 } else 518 tmp = true; 519 ++__currentOffset; 520 } 521 break; 522 523 case OpCode._NSPACE: 524 while(__currentOffset < endOffset) { 525 if(!Character.isWhitespace(__input[__currentOffset])) { 526 if(tmp && __tryExpression(expression, __currentOffset)) { 527 success = true; 528 break _mainLoop; 529 } else 530 tmp = doEvery; 531 } else 532 tmp = true; 533 ++__currentOffset; 534 } 535 break; 536 537 case OpCode._DIGIT: 538 while(__currentOffset < endOffset) { 539 if(Character.isDigit(__input[__currentOffset])) { 540 if(tmp && __tryExpression(expression, __currentOffset)) { 541 success = true; 542 break _mainLoop; 543 } else 544 tmp = doEvery; 545 } else 546 tmp = true; 547 ++__currentOffset; 548 } 549 break; 550 551 552 case OpCode._NDIGIT: 553 while(__currentOffset < endOffset) { 554 if(!Character.isDigit(__input[__currentOffset])) { 555 if(tmp && __tryExpression(expression, __currentOffset)) { 556 success = true; 557 break _mainLoop; 558 } else 559 tmp = doEvery; 560 } else 561 tmp = true; 562 ++__currentOffset; 563 } 564 break; 565 } 567 } else { 568 if(minLength > 0) 569 dontTry = minLength - 1; 570 endOffset-=dontTry; 571 572 do { 573 if(__tryExpression(expression, __currentOffset)) { 574 success = true; 575 break _mainLoop; 576 } 577 } while(__currentOffset++ < endOffset); 578 579 } 580 581 582 break _mainLoop; 583 } 585 __lastSuccess = success; 586 __lastMatchResult = null; 587 588 return success; 589 } 590 591 592 private boolean __tryExpression(Perl5Pattern expression, int offset) { 593 int count; 594 595 __inputOffset = offset; 596 __lastParen = 0; 597 __expSize = 0; 598 599 if(__numParentheses > 0) { 600 for(count=0; count <= __numParentheses; count++) { 601 __beginMatchOffsets[count] = OpCode._NULL_OFFSET; 602 __endMatchOffsets[count] = OpCode._NULL_OFFSET; 603 } 604 } 605 606 if(__match(1)){ 607 __beginMatchOffsets[0] = offset; 608 __endMatchOffsets[0] = __inputOffset; 609 return true; 610 } 611 612 return false; 613 } 614 615 616 private int __repeat(int offset, int max) { 617 int scan, eol, operand, ret; 618 char ch; 619 620 scan = __inputOffset; 621 eol = __eol; 622 623 if(max != Character.MAX_VALUE && max < eol - scan) 624 eol = scan + max; 625 626 operand = OpCode._getOperand(offset); 627 628 switch(__program[offset]) { 629 630 case OpCode._ANY: 631 while(scan < eol && __input[scan] != '\n') 632 ++scan; 633 break; 634 635 case OpCode._SANY: 636 scan = eol; 637 break; 638 639 case OpCode._EXACTLY: 640 ++operand; 641 while(scan < eol && __program[operand] == __input[scan]) 642 ++scan; 643 break; 644 645 case OpCode._ANYOF: 646 if(scan < eol && (ch = __input[scan]) < 256) { 647 while((__program[operand + (ch >> 4)] & (1 << (ch & 0xf))) == 0) { 648 if(++scan < eol) 649 ch = __input[scan]; 650 else 651 break; 652 } 653 } 654 break; 655 656 case OpCode._ALNUM: 657 while(scan < eol && OpCode._isWordCharacter(__input[scan])) 658 ++scan; 659 break; 660 661 case OpCode._NALNUM: 662 while(scan < eol && !OpCode._isWordCharacter(__input[scan])) 663 ++scan; 664 break; 665 666 case OpCode._SPACE: 667 while(scan < eol && Character.isWhitespace(__input[scan])) 668 ++scan; 669 break; 670 671 case OpCode._NSPACE: 672 while(scan < eol && !Character.isWhitespace(__input[scan])) 673 ++scan; 674 break; 675 676 case OpCode._DIGIT: 677 while(scan < eol && Character.isDigit(__input[scan])) 678 ++scan; 679 break; 680 681 case OpCode._NDIGIT: 682 while(scan < eol && !Character.isDigit(__input[scan])) 683 ++scan; 684 break; 685 686 default: 687 break; 688 689 } 690 691 ret = scan - __inputOffset; 692 __inputOffset = scan; 693 694 return ret; 695 } 696 697 698 private boolean __match(int offset) { 699 char nextChar, op; 700 int scan, next, input, maxScan, current, line, arg; 701 boolean inputRemains = true, minMod = false; 702 Perl5Repetition rep; 703 704 705 input = __inputOffset; 706 inputRemains = (input < __endOffset); 707 nextChar = (inputRemains ? __input[input] : __EOS); 708 709 scan = offset; 710 maxScan = __program.length; 711 712 while(scan < maxScan ){ 713 next = OpCode._getNext(__program, scan); 714 715 switch(op = __program[scan]) { 716 717 case OpCode._BOL: 718 if(input == __bol ? __previousChar == '\n' : 719 (__multiline && (inputRemains || input < __eol) && 720 __input[input - 1] == '\n')) 721 break; 722 return false; 723 724 case OpCode._MBOL: 725 if(input == __bol ? __previousChar == '\n' : 726 ((inputRemains || input < __eol) && __input[input - 1] == '\n')) 727 break; 728 return false; 729 730 case OpCode._SBOL: 731 if(input == __bol && __previousChar == '\n') 732 break; 733 return false; 734 735 case OpCode._GBOL: 736 if(input == __bol) 737 break; 738 return true; 739 740 case OpCode._EOL : 741 if((inputRemains || input < __eol) && nextChar != '\n') 742 return false; 743 if(!__multiline && __eol - input > 1) 744 return false; 745 break; 746 747 case OpCode._MEOL: 748 if((inputRemains || input < __eol) && nextChar != '\n') 749 return false; 750 break; 751 752 case OpCode._SEOL: 753 if((inputRemains || input < __eol) && nextChar != '\n') 754 return false; 755 if(__eol - input > 1) 756 return false; 757 break; 758 759 case OpCode._SANY: 760 if(!inputRemains && input >= __eol) 761 return false; 762 inputRemains = (++input < __endOffset); 763 nextChar = (inputRemains ? __input[input] : __EOS); 764 break; 765 766 case OpCode._ANY: 767 if((!inputRemains && input >= __eol) || nextChar == '\n') 768 return false; 769 inputRemains = (++input < __endOffset); 770 nextChar = (inputRemains ? __input[input] : __EOS); 771 break; 772 773 case OpCode._EXACTLY: 774 current = OpCode._getOperand(scan); 775 line = __program[current++]; 776 777 if(__program[current] != nextChar) 778 return false; 779 if(__eol - input < line) 780 return false; 781 782 if(line > 1 && !__compare(__program, current, __input, input, line)) 783 return false; 784 785 input+=line; 786 inputRemains = (input < __endOffset); 787 nextChar = (inputRemains ? __input[input] : __EOS); 788 break; 789 790 case OpCode._ANYOF: 791 current = OpCode._getOperand(scan); 792 793 if(nextChar == __EOS && inputRemains) 794 nextChar = __input[input]; 795 796 if(nextChar >= 256 || (__program[current + (nextChar >> 4)] & 797 (1 << (nextChar & 0xf))) != 0) 798 return false; 799 800 if(!inputRemains && input >= __eol) 801 return false; 802 803 inputRemains = (++input < __endOffset); 804 nextChar = (inputRemains ? __input[input] : __EOS); 805 break; 806 807 case OpCode._ALNUM: 808 if(!inputRemains) 809 return false; 810 if(!OpCode._isWordCharacter(nextChar)) 811 return false; 812 inputRemains = (++input < __endOffset); 813 nextChar = (inputRemains ? __input[input] : __EOS); 814 break; 815 816 case OpCode._NALNUM: 817 if(!inputRemains && input >= __eol) 818 return false; 819 if(OpCode._isWordCharacter(nextChar)) 820 return false; 821 inputRemains = (++input < __endOffset); 822 nextChar = (inputRemains ? __input[input] : __EOS); 823 break; 824 825 826 case OpCode._NBOUND: 827 case OpCode._BOUND: 828 boolean a, b; 829 830 if(input == __bol) 831 a = OpCode._isWordCharacter(__previousChar); 832 else 833 a = OpCode._isWordCharacter(__input[input - 1]); 834 835 b = OpCode._isWordCharacter(nextChar); 836 837 if((a == b) == (__program[scan] == OpCode._BOUND)) 838 return false; 839 break; 840 841 case OpCode._SPACE: 842 if(!inputRemains && input >= __eol) 843 return false; 844 if(!Character.isWhitespace(nextChar)) 845 return false; 846 inputRemains = (++input < __endOffset); 847 nextChar = (inputRemains ? __input[input] : __EOS); 848 break; 849 850 851 case OpCode._NSPACE: 852 if(!inputRemains) 853 return false; 854 if(Character.isWhitespace(nextChar)) 855 return false; 856 inputRemains = (++input < __endOffset); 857 nextChar = (inputRemains ? __input[input] : __EOS); 858 break; 859 860 case OpCode._DIGIT: 861 if(!Character.isDigit(nextChar)) 862 return false; 863 inputRemains = (++input < __endOffset); 864 nextChar = (inputRemains ? __input[input] : __EOS); 865 break; 866 867 case OpCode._NDIGIT: 868 if(!inputRemains && input >= __eol) 869 return false; 870 if(Character.isDigit(nextChar)) 871 return false; 872 inputRemains = (++input < __endOffset); 873 nextChar = (inputRemains ? __input[input] : __EOS); 874 break; 875 876 case OpCode._REF: 877 arg = OpCode._getArg1(__program, scan); 878 current = __beginMatchOffsets[arg]; 879 880 if(current == OpCode._NULL_OFFSET) 881 return false; 882 883 if(__endMatchOffsets[arg] == OpCode._NULL_OFFSET) 884 return false; 885 886 if(current == __endMatchOffsets[arg]) 887 break; 888 889 if(__input[current] != nextChar) 890 return false; 891 892 line = __endMatchOffsets[arg] - current; 893 894 if(input + line > __eol) 895 return false; 896 897 if(line > 1 && !__compare(__input, current, __input, input, line)) 898 return false; 899 900 input+=line; 901 inputRemains = (input < __endOffset); 902 nextChar = (inputRemains ? __input[input] : __EOS); 903 break; 904 905 case OpCode._NOTHING: 906 break; 907 908 case OpCode._BACK: 909 break; 910 911 case OpCode._OPEN: 912 arg = OpCode._getArg1(__program, scan); 913 __beginMatchOffsets[arg] = input; 914 915 if(arg > __expSize) 916 __expSize = arg; 917 break; 918 919 case OpCode._CLOSE: 920 arg = OpCode._getArg1(__program, scan); 921 __endMatchOffsets[arg] = input; 922 923 if(arg > __lastParen) 924 __lastParen = arg; 925 break; 926 927 case OpCode._CURLYX: 928 rep = new Perl5Repetition(); 929 rep._lastRepetition = __currentRep; 930 __currentRep = rep; 931 932 rep._parenFloor = __lastParen; 933 rep._numInstances = -1; 934 rep._min = OpCode._getArg1(__program, scan); 935 rep._max = OpCode._getArg2(__program, scan); 936 rep._scan = OpCode._getNextOperator(scan) + 2; 937 rep._next = next; 938 rep._minMod = minMod; 939 rep._lastLocation = -1; 943 __inputOffset = input; 944 945 minMod = __match(OpCode._getPrevOperator(next)); 947 948 __currentRep = rep._lastRepetition; 950 return minMod; 951 952 case OpCode._WHILEM: 953 rep = __currentRep; 954 955 arg = rep._numInstances + 1; 956 __inputOffset = input; 957 958 if(input == rep._lastLocation) { 959 __currentRep = rep._lastRepetition; 960 line = __currentRep._numInstances; 961 if(__match(rep._next)) 962 return true; 963 __currentRep._numInstances = line; 964 __currentRep = rep; 965 return false; 966 } 967 968 if(arg < rep._min) { 969 rep._numInstances = arg; 970 rep._lastLocation = input; 971 if(__match(rep._scan)) 972 return true; 973 rep._numInstances = arg - 1; 974 return false; 975 } 976 977 if(rep._minMod) { 978 __currentRep = rep._lastRepetition; 979 line = __currentRep._numInstances; 980 if(__match(rep._next)) 981 return true; 982 __currentRep._numInstances = line; 983 __currentRep = rep; 984 985 if(arg >= rep._max) 986 return false; 987 988 __inputOffset = input; 989 rep._numInstances = arg; 990 rep._lastLocation = input; 991 992 if(__match(rep._scan)) 993 return true; 994 995 rep._numInstances = arg - 1; 996 return false; 997 } 998 999 if(arg < rep._max) { 1000 __pushState(rep._parenFloor); 1001 rep._numInstances = arg; 1002 rep._lastLocation = input; 1003 if(__match(rep._scan)) 1004 return true; 1005 __popState(); 1006 __inputOffset = input; 1007 } 1008 1009 __currentRep = rep._lastRepetition; 1010 line = __currentRep._numInstances; 1011 if(__match(rep._next)) 1012 return true; 1013 1014 rep._numInstances = line; 1015 __currentRep = rep; 1016 rep._numInstances = arg - 1; 1017 return false; 1018 1019 case OpCode._BRANCH: 1020 if(__program[next] != OpCode._BRANCH) 1021 next = OpCode._getNextOperator(scan); 1022 else { 1023 int lastParen; 1024 1025 lastParen = __lastParen; 1026 1027 do { 1028 1029 __inputOffset = input; 1030 1031 if(__match(OpCode._getNextOperator(scan))) 1032 return true; 1033 1034 for(arg = __lastParen; arg > lastParen; --arg) 1035 __endMatchOffsets[arg] = OpCode._NULL_OFFSET; 1037 __lastParen = arg; 1038 1039 scan = OpCode._getNext(__program, scan); 1040 } while(scan != OpCode._NULL_OFFSET && 1041 __program[scan] == OpCode._BRANCH); 1042 return false; 1043 } 1044 1045 break; 1046 1047 case OpCode._MINMOD: 1048 minMod = true; 1049 break; 1050 1051 1052 case OpCode._CURLY: 1053 case OpCode._STAR: 1054 case OpCode._PLUS: 1055 if(op == OpCode._CURLY) { 1056 line = OpCode._getArg1(__program, scan); 1057 arg = OpCode._getArg2(__program, scan); 1058 scan = OpCode._getNextOperator(scan) + 2; 1059 } else if(op == OpCode._STAR) { 1060 line = 0; 1061 arg = Character.MAX_VALUE; 1062 scan = OpCode._getNextOperator(scan); 1063 } else { 1064 line = 1; 1065 arg = Character.MAX_VALUE; 1066 scan = OpCode._getNextOperator(scan); 1067 } 1068 1069 if(__program[next] == OpCode._EXACTLY) { 1070 nextChar = __program[OpCode._getOperand(next) + 1]; 1071 current = 0; 1072 } else { 1073 nextChar = __EOS; 1074 current = -1000; 1075 } 1076 __inputOffset = input; 1077 1078 if(minMod) { 1079 minMod = false; 1080 1081 if(line > 0 && __repeat(scan, line) < line) 1082 return false; 1083 1084 1085 while(arg >= line || (arg == Character.MAX_VALUE && line > 0)) { 1086 if(current == -1000 || __inputOffset >= __endOffset || 1091 __input[__inputOffset] == nextChar) { 1092 if(__match(next)) 1093 return true; 1094 } 1095 1096 __inputOffset = input + line; 1097 1098 if(__repeat(scan, 1) != 0) { 1099 ++line; 1100 __inputOffset = input + line; 1101 } else 1102 return false; 1103 } 1104 1105 } else { 1106 arg = __repeat(scan, arg); 1107 1108 if(line < arg && OpCode._opType[__program[next]] == OpCode._EOL && 1109 (!__multiline || __program[next] == OpCode._SEOL)) 1110 line = arg; 1111 1112 while(arg >= line) { 1113 if(current == -1000 || __inputOffset >= __endOffset || 1118 __input[__inputOffset] == nextChar) { 1119 if(__match(next)) 1120 return true; 1121 } 1122 1123 --arg; 1124 __inputOffset = input + arg; 1125 } 1126 } 1127 1128 return false; 1129 1130 case OpCode._SUCCEED: 1131 case OpCode._END: 1132 __inputOffset = input; 1133 if(__inputOffset == __lastMatchInputEndOffset) 1136 return false; 1137 return true; 1138 1139 case OpCode._IFMATCH: 1140 __inputOffset = input; 1141 scan = OpCode._getNextOperator(scan); 1142 if(!__match(scan)) 1143 return false; 1144 break; 1145 1146 case OpCode._UNLESSM: 1147 __inputOffset = input; 1148 scan = OpCode._getNextOperator(scan); 1149 if(__match(scan)) 1150 return false; 1151 break; 1152 1153 1154 default: 1155 1157 } 1159 scan = next; 1161 } 1163 1164 1165 return false; 1166 } 1167 1168 1169 1188 public void setMultiline(boolean multiline) { __multiline = multiline; } 1189 1190 1191 1196 public boolean isMultiline() { return __multiline; } 1197 1198 char[] _toLower(char[] input) { 1199 int current; 1200 char[] inp; 1201 inp = new char[input.length]; 1208 System.arraycopy(input, 0, inp, 0, input.length); 1209 input = inp; 1210 1211 for(current = 0; current < input.length; current++) 1213 if(Character.isUpperCase(input[current])) 1214 input[current] = Character.toLowerCase(input[current]); 1215 1216 return input; 1217 } 1218 1219 1220 1235 public boolean matchesPrefix(char[] input, Pattern pattern, int offset) { 1236 Perl5Pattern expression; 1237 1238 expression = (Perl5Pattern)pattern; 1239 __originalInput = input; 1240 if(expression._isCaseInsensitive) 1241 input = _toLower(input); 1242 1243 __initInterpreterGlobals(expression, input, offset, input.length); 1244 1245 __lastSuccess = __tryExpression(expression, offset); 1246 __lastMatchResult = null; 1247 1248 return __lastSuccess; 1249 } 1250 1251 1252 1266 public boolean matchesPrefix(char[] input, Pattern pattern) { 1267 return matchesPrefix(input, pattern, 0); 1268 } 1269 1270 1271 1284 public boolean matchesPrefix(String input, Pattern pattern) { 1285 return matchesPrefix(input.toCharArray(), pattern, 0); 1286 } 1287 1288 1307 public boolean matchesPrefix(PatternMatcherInput input, Pattern pattern) { 1308 char[] inp; 1309 Perl5Pattern expression; 1310 1311 expression = (Perl5Pattern)pattern; 1312 1313 __originalInput = input._originalBuffer; 1314 if(expression._isCaseInsensitive) { 1315 if(input._toLowerBuffer == null) 1316 input._toLowerBuffer = _toLower(__originalInput); 1317 inp = input._toLowerBuffer; 1318 } else 1319 inp = __originalInput; 1320 1321 __initInterpreterGlobals(expression, inp, input._currentOffset, 1322 input._endOffset); 1323 __lastSuccess = __tryExpression(expression, input._currentOffset); 1324 __lastMatchResult = null; 1325 1326 return __lastSuccess; 1327 } 1328 1329 1330 1331 1369 public boolean matches(char[] input, Pattern pattern) { 1370 Perl5Pattern expression; 1371 1372 expression = (Perl5Pattern)pattern; 1373 __originalInput = input; 1374 if(expression._isCaseInsensitive) 1375 input = _toLower(input); 1376 1383 __initInterpreterGlobals(expression, input, 0, input.length); 1384 __lastSuccess = (__tryExpression(expression, 0) && 1385 __endMatchOffsets[0] == input.length); 1386 __lastMatchResult = null; 1387 1388 return __lastSuccess; 1389 } 1390 1391 1392 1429 public boolean matches(String input, Pattern pattern) { 1430 return matches(input.toCharArray(), pattern); 1431 } 1432 1433 1434 1478 public boolean matches(PatternMatcherInput input, Pattern pattern) { 1479 char[] inp; 1480 Perl5Pattern expression; 1481 1482 expression = (Perl5Pattern)pattern; 1483 1484 __originalInput = input._originalBuffer; 1485 if(expression._isCaseInsensitive) { 1486 if(input._toLowerBuffer == null) 1487 input._toLowerBuffer = _toLower(__originalInput); 1488 inp = input._toLowerBuffer; 1489 } else 1490 inp = __originalInput; 1491 1492 1511 1512 __initInterpreterGlobals(expression, inp, input._beginOffset, 1513 input._endOffset); 1514 1515 __lastMatchResult = null; 1516 1517 if(__tryExpression(expression, input._beginOffset)) { 1518 if(__endMatchOffsets[0] == input._endOffset || 1519 input.length() == 0 || input._beginOffset == input._endOffset) { 1520 __lastSuccess = true; 1521 return true; 1522 } 1523 } 1524 1525 __lastSuccess = false; 1526 1527 return false; 1528 } 1529 1530 1531 1532 1555 public boolean contains(String input, Pattern pattern) { 1556 1570 return contains(input.toCharArray(), pattern); 1571 } 1572 1573 1574 1598 public boolean contains(char[] input, Pattern pattern) { 1599 Perl5Pattern expression; 1600 1601 expression = (Perl5Pattern)pattern; 1602 1603 __originalInput = input; 1604 1605 if(expression._isCaseInsensitive) 1606 input = _toLower(input); 1607 1608 return __interpret(expression, input, 0, input.length); 1609 } 1610 1611 1612 private static final int __DEFAULT_LAST_MATCH_END_OFFSET = -100; 1613 private int __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET; 1614 1676 public boolean contains(PatternMatcherInput input, Pattern pattern) { 1677 char[] inp; 1678 Perl5Pattern expression; 1679 boolean matchFound; 1680 1681 1682 if(input._currentOffset > input._endOffset) 1687 return false; 1688 1693 expression = (Perl5Pattern)pattern; 1694 __originalInput = input._originalBuffer; 1695 1696 __originalInput = input._originalBuffer; 1702 if(expression._isCaseInsensitive) { 1703 if(input._toLowerBuffer == null) 1704 input._toLowerBuffer = _toLower(__originalInput); 1705 inp = input._toLowerBuffer; 1706 } else 1707 inp = __originalInput; 1708 1709 __lastMatchInputEndOffset = input.getMatchEndOffset(); 1710 1711 matchFound = 1712 __interpret(expression, inp, input._currentOffset, input._endOffset); 1713 1714 if(matchFound) { 1715 input.setCurrentOffset(__endMatchOffsets[0]); 1716 input.setMatchOffsets(__beginMatchOffsets[0], __endMatchOffsets[0]); 1717 } else { 1718 input.setCurrentOffset(input._endOffset + 1); 1719 } 1720 1721 __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET; 1723 1724 return matchFound; 1725 } 1726 1727 1728 1743 public MatchResult getMatch() { 1744 if(!__lastSuccess) 1745 return null; 1746 1747 if(__lastMatchResult == null) 1748 __setLastMatchResult(); 1749 1750 return __lastMatchResult; 1751 } 1752 1753} 1754 | Popular Tags |