1 15 16 18 package org.python.modules.sre; 19 20 import java.util.*; 21 22 public class SRE_STATE { 23 24 public static final int SRE_ERROR_ILLEGAL = -1; 25 26 27 public static final int SRE_ERROR_STATE = -2; 28 29 30 public static final int SRE_ERROR_RECURSION_LIMIT = -3; 31 32 public static final int SRE_OP_FAILURE = 0; 33 public static final int SRE_OP_SUCCESS = 1; 34 public static final int SRE_OP_ANY = 2; 35 public static final int SRE_OP_ANY_ALL = 3; 36 public static final int SRE_OP_ASSERT = 4; 37 public static final int SRE_OP_ASSERT_NOT = 5; 38 public static final int SRE_OP_AT = 6; 39 public static final int SRE_OP_BRANCH = 7; 40 public static final int SRE_OP_CALL = 8; 41 public static final int SRE_OP_CATEGORY = 9; 42 public static final int SRE_OP_CHARSET = 10; 43 public static final int SRE_OP_BIGCHARSET = 11; 44 public static final int SRE_OP_GROUPREF = 12; 45 public static final int SRE_OP_GROUPREF_IGNORE = 13; 46 public static final int SRE_OP_IN = 14; 47 public static final int SRE_OP_IN_IGNORE = 15; 48 public static final int SRE_OP_INFO = 16; 49 public static final int SRE_OP_JUMP = 17; 50 public static final int SRE_OP_LITERAL = 18; 51 public static final int SRE_OP_LITERAL_IGNORE = 19; 52 public static final int SRE_OP_MARK = 20; 53 public static final int SRE_OP_MAX_UNTIL = 21; 54 public static final int SRE_OP_MIN_UNTIL = 22; 55 public static final int SRE_OP_NOT_LITERAL = 23; 56 public static final int SRE_OP_NOT_LITERAL_IGNORE = 24; 57 public static final int SRE_OP_NEGATE = 25; 58 public static final int SRE_OP_RANGE = 26; 59 public static final int SRE_OP_REPEAT = 27; 60 public static final int SRE_OP_REPEAT_ONE = 28; 61 public static final int SRE_OP_SUBPATTERN = 29; 62 63 public static final int SRE_AT_BEGINNING = 0; 64 public static final int SRE_AT_BEGINNING_LINE = 1; 65 public static final int SRE_AT_BEGINNING_STRING = 2; 66 public static final int SRE_AT_BOUNDARY = 3; 67 public static final int SRE_AT_NON_BOUNDARY = 4; 68 public static final int SRE_AT_END = 5; 69 public static final int SRE_AT_END_LINE = 6; 70 public static final int SRE_AT_END_STRING = 7; 71 public static final int SRE_AT_LOC_BOUNDARY = 8; 72 public static final int SRE_AT_LOC_NON_BOUNDARY = 9; 73 public static final int SRE_AT_UNI_BOUNDARY = 10; 74 public static final int SRE_AT_UNI_NON_BOUNDARY = 11; 75 76 public static final int SRE_CATEGORY_DIGIT = 0; 77 public static final int SRE_CATEGORY_NOT_DIGIT = 1; 78 public static final int SRE_CATEGORY_SPACE = 2; 79 public static final int SRE_CATEGORY_NOT_SPACE = 3; 80 public static final int SRE_CATEGORY_WORD = 4; 81 public static final int SRE_CATEGORY_NOT_WORD = 5; 82 public static final int SRE_CATEGORY_LINEBREAK = 6; 83 public static final int SRE_CATEGORY_NOT_LINEBREAK = 7; 84 public static final int SRE_CATEGORY_LOC_WORD = 8; 85 public static final int SRE_CATEGORY_LOC_NOT_WORD = 9; 86 public static final int SRE_CATEGORY_UNI_DIGIT = 10; 87 public static final int SRE_CATEGORY_UNI_NOT_DIGIT = 11; 88 public static final int SRE_CATEGORY_UNI_SPACE = 12; 89 public static final int SRE_CATEGORY_UNI_NOT_SPACE = 13; 90 public static final int SRE_CATEGORY_UNI_WORD = 14; 91 public static final int SRE_CATEGORY_UNI_NOT_WORD = 15; 92 public static final int SRE_CATEGORY_UNI_LINEBREAK = 16; 93 public static final int SRE_CATEGORY_UNI_NOT_LINEBREAK = 17; 94 95 public static final int SRE_FLAG_TEMPLATE = 1; 96 public static final int SRE_FLAG_IGNORECASE = 2; 97 public static final int SRE_FLAG_LOCALE = 4; 98 public static final int SRE_FLAG_MULTILINE = 8; 99 public static final int SRE_FLAG_DOTALL = 16; 100 public static final int SRE_FLAG_UNICODE = 32; 101 public static final int SRE_FLAG_VERBOSE = 64; 102 103 public static final int SRE_INFO_PREFIX = 1; 104 public static final int SRE_INFO_LITERAL = 2; 105 public static final int SRE_INFO_CHARSET = 4; 106 107 108 public static final int USE_RECURSION_LIMIT = 2000; 109 110 111 112 int ptr; 113 int beginning; 114 int start; 115 int end; 116 117 118 char[] str; 119 int pos; 120 int endpos; 121 122 123 int charsize; 124 125 126 int lastindex; 127 int lastmark; 128 129 130 int[] mark = new int[200]; 131 132 133 int[] mark_stack; 134 int mark_stack_size; 135 int mark_stack_base; 136 137 SRE_REPEAT repeat; 138 139 140 int maxlevel; 141 142 143 int flags; 144 145 146 147 148 public SRE_STATE(String str, int start, int end, int flags) { 149 this.str = str.toCharArray(); 150 int size = str.length(); 151 152 this.charsize = 1; 153 154 155 if (start < 0) 156 start = 0; 157 else if (start > size) 158 start = size; 159 160 if (end < 0) 161 end = 0; 162 else if (end > size) 163 end = size; 164 165 this.start = start; 166 this.end = end; 167 168 this.pos = start; 169 this.endpos = end; 170 171 state_reset(); 172 173 this.flags = flags; 174 } 175 176 177 178 private void mark_fini() { 179 mark_stack = null; 180 mark_stack_size = mark_stack_base = 0; 181 } 182 183 184 private void mark_save(int lo, int hi) { 185 if (hi <= lo) 186 return; 187 188 int size = (hi - lo) + 1; 189 190 int newsize = mark_stack_size; 191 int minsize = mark_stack_base + size; 192 193 int[] stack; 194 195 if (newsize < minsize) { 196 197 if (newsize == 0) { 198 newsize = 512; 199 if (newsize < minsize) 200 newsize = minsize; 201 stack = new int[newsize]; 203 } else { 204 205 while (newsize < minsize) 206 newsize += newsize; 207 stack = new int[newsize]; 209 System.arraycopy(mark_stack, 0, stack, 0, mark_stack.length); 210 } 211 mark_stack = stack; 212 mark_stack_size = newsize; 213 } 214 215 217 System.arraycopy(mark, lo, mark_stack, mark_stack_base, size); 218 219 mark_stack_base += size; 220 } 221 222 223 private void mark_restore(int lo, int hi) { 224 if (hi <= lo) 225 return; 226 227 int size = (hi - lo) + 1; 228 229 mark_stack_base -= size; 230 231 233 System.arraycopy(mark_stack, mark_stack_base, mark, lo, size); 234 } 235 236 237 238 final boolean SRE_AT(int ptr, char at) { 239 241 242 boolean thiS, that; 243 244 switch (at) { 245 case SRE_AT_BEGINNING: 246 case SRE_AT_BEGINNING_STRING: 247 return ptr == beginning; 248 249 case SRE_AT_BEGINNING_LINE: 250 return (ptr == beginning || SRE_IS_LINEBREAK(str[ptr-1])); 251 252 case SRE_AT_END: 253 return (ptr+1 == end && SRE_IS_LINEBREAK(str[ptr])) || ptr == end; 254 255 case SRE_AT_END_LINE: 256 return ptr == end || SRE_IS_LINEBREAK(str[ptr]); 257 258 case SRE_AT_END_STRING: 259 return ptr == end; 260 261 case SRE_AT_BOUNDARY: 262 263 if (beginning == end) 264 return false; 265 that = (ptr > beginning) ? SRE_IS_WORD(str[ptr-1]) : false; 266 thiS = (ptr < end) ? SRE_IS_WORD(str[ptr]) : false; 267 return thiS != that; 268 269 case SRE_AT_NON_BOUNDARY: 270 271 if (beginning == end) 272 return false; 273 that = (ptr > beginning) ? SRE_IS_WORD(str[ptr-1]) : false; 274 thiS = (ptr < end) ? SRE_IS_WORD(str[ptr]) : false; 275 return thiS == that; 276 277 case SRE_AT_LOC_BOUNDARY: 278 case SRE_AT_UNI_BOUNDARY: 279 if (beginning == end) 280 return false; 281 that = (ptr > beginning) ? SRE_LOC_IS_WORD(str[ptr-1]) : false; 282 thiS = (ptr < end) ? SRE_LOC_IS_WORD(str[ptr]) : false; 283 return thiS != that; 284 285 case SRE_AT_LOC_NON_BOUNDARY: 286 case SRE_AT_UNI_NON_BOUNDARY: 287 288 if (beginning == end) 289 return false; 290 that = (ptr > beginning) ? SRE_LOC_IS_WORD(str[ptr-1]) : false; 291 thiS = (ptr < end) ? SRE_LOC_IS_WORD(str[ptr]) : false; 292 return thiS == that; 293 } 294 295 return false; 296 } 297 298 299 300 301 final boolean SRE_CHARSET(char[] set, int setidx, char ch) { 302 304 305 boolean ok = true; 306 307 for (;;) { 308 switch (set[setidx++]) { 309 310 case SRE_OP_LITERAL: 311 313 if (ch == set[setidx]) 314 return ok; 315 setidx++; 316 break; 317 318 case SRE_OP_RANGE: 319 320 if (set[setidx] <= ch && ch <= set[setidx+1]) 322 return ok; 323 setidx += 2; 324 break; 325 326 case SRE_OP_CHARSET: 327 329 if (ch < 256 && 330 (set[setidx + (ch >> 4)] & (1 << (ch & 15))) != 0) 331 return ok; 332 setidx += 16; 333 break; 334 335 case SRE_OP_BIGCHARSET: 336 337 int count = set[setidx++]; 339 int shift = ((ch >> 8) & 1) == 0 ? 8 : 0; 340 int block = (set[setidx + (ch >> 8) / 2] >> shift) & 0xFF; 341 setidx += 128; 342 int idx = block*16 + ((ch & 255)>>4); 343 if ((set[setidx + idx] & (1 << (ch & 15))) != 0) 344 return ok; 345 setidx += count*16; 346 break; 347 348 case SRE_OP_CATEGORY: 349 350 if (sre_category(set[setidx], ch)) 352 return ok; 353 setidx++; 354 break; 355 356 case SRE_OP_NEGATE: 357 ok = !ok; 359 break; 360 361 case SRE_OP_FAILURE: 362 return !ok; 364 365 default: 366 368 return false; 369 } 370 } 371 } 372 373 374 375 376 private int SRE_COUNT(char[] pattern, int pidx, int maxcount, int level) { 377 char chr; 378 int ptr = this.ptr; 379 int end = this.end; 380 int i; 381 382 383 if (maxcount < end - ptr && maxcount != 65535) 384 end = ptr + maxcount; 385 386 switch (pattern[pidx]) { 387 388 case SRE_OP_ANY: 389 390 while (ptr < end && !SRE_IS_LINEBREAK(str[ptr])) 392 ptr++; 393 break; 394 395 case SRE_OP_ANY_ALL: 396 398 ptr = end; 400 break; 401 402 case SRE_OP_LITERAL: 403 404 chr = pattern[pidx+1]; 405 while (ptr < end && str[ptr] == chr) 407 ptr++; 408 break; 409 410 case SRE_OP_LITERAL_IGNORE: 411 412 chr = pattern[pidx+1]; 413 while (ptr < end && lower(str[ptr]) == chr) 415 ptr++; 416 break; 417 418 case SRE_OP_NOT_LITERAL: 419 420 chr = pattern[pidx+1]; 421 while (ptr < end && str[ptr] != chr) 423 ptr++; 424 break; 425 426 case SRE_OP_NOT_LITERAL_IGNORE: 427 428 chr = pattern[pidx+1]; 429 while (ptr < end && lower(str[ptr]) != chr) 431 ptr++; 432 break; 433 434 case SRE_OP_IN: 435 436 while (ptr < end && SRE_CHARSET(pattern, pidx + 2, str[ptr])) 438 ptr++; 439 break; 440 441 default: 442 443 while (this.ptr < end) { 445 i = SRE_MATCH(pattern, pidx, level); 446 if (i < 0) 447 return i; 448 if (i == 0) 449 break; 450 } 451 return this.ptr - ptr; 452 } 453 454 return ptr - this.ptr; 455 } 456 457 458 459 final int SRE_MATCH(char[] pattern, int pidx, int level) { 460 462 463 int end = this.end; 464 int ptr = this.ptr; 465 int i, count; 466 char chr; 467 468 int lastmark; 469 470 472 if (level > USE_RECURSION_LIMIT) 473 return SRE_ERROR_RECURSION_LIMIT; 474 475 if (pattern[pidx] == SRE_OP_INFO) { 476 477 478 if (pattern[pidx+3] != 0 && (end - ptr) < pattern[pidx+3]) { 479 return 0; 480 } 481 pidx += pattern[pidx+1] + 1; 482 } 483 484 485 for (;;) { 486 487 switch (pattern[pidx++]) { 488 489 case SRE_OP_FAILURE: 490 491 return 0; 493 494 case SRE_OP_SUCCESS: 495 496 this.ptr = ptr; 498 return 1; 499 500 case SRE_OP_AT: 501 502 503 if (!SRE_AT(ptr, pattern[pidx])) 505 return 0; 506 pidx++; 507 break; 508 509 case SRE_OP_CATEGORY: 510 511 512 514 if (ptr >= end || !sre_category(pattern[pidx], str[ptr])) 515 return 0; 516 517 pidx++; 518 ptr++; 519 break; 520 521 case SRE_OP_LITERAL: 522 523 524 526 if (ptr >= end || str[ptr] != pattern[pidx]) 527 return 0; 528 pidx++; 529 ptr++; 530 break; 531 532 case SRE_OP_NOT_LITERAL: 533 534 535 if (ptr >= end || str[ptr] == pattern[pidx]) 537 return 0; 538 pidx++; 539 ptr++; 540 break; 541 542 case SRE_OP_ANY: 543 544 if (ptr >= end || SRE_IS_LINEBREAK(str[ptr])) 546 return 0; 547 ptr++; 548 break; 549 550 case SRE_OP_ANY_ALL: 551 552 553 if (ptr >= end) 555 return 0; 556 ptr++; 557 break; 558 559 case SRE_OP_IN: 560 561 562 if (ptr >= end || !SRE_CHARSET(pattern, pidx + 1, str[ptr])) 564 return 0; 565 pidx += (int)pattern[pidx]; 566 ptr++; 567 break; 568 569 case SRE_OP_GROUPREF: 570 571 i = pattern[pidx]; 572 int p = mark[i+i]; 574 int e = mark[i+i+1]; 575 if (p == -1 || e == -1 || e < p) 576 return 0; 577 while (p < e) { 578 if (ptr >= end || str[ptr] != str[p]) 579 return 0; 580 p++; 581 ptr++; 582 } 583 pidx++; 584 break; 585 586 case SRE_OP_GROUPREF_IGNORE: 587 588 i = pattern[pidx]; 589 p = mark[i+i]; 591 e = mark[i+i+1]; 592 if (p == -1 || e == -1 || e < p) 593 return 0; 594 while (p < e) { 595 if (ptr >= end || lower(str[ptr]) != lower(str[p])) 596 return 0; 597 p++; 598 ptr++; 599 } 600 pidx++; 601 break; 602 603 case SRE_OP_LITERAL_IGNORE: 604 if (ptr >= end || lower(str[ptr]) != lower(pattern[pidx])) 606 return 0; 607 pidx++; 608 ptr++; 609 break; 610 611 case SRE_OP_NOT_LITERAL_IGNORE: 612 if (ptr >= end || lower(str[ptr]) == lower(pattern[pidx])) 614 return 0; 615 pidx++; 616 ptr++; 617 break; 618 619 case SRE_OP_IN_IGNORE: 620 if (ptr >= end || 622 !SRE_CHARSET(pattern, pidx + 1, lower(str[ptr]))) 623 return 0; 624 pidx += (int)pattern[pidx]; 625 ptr++; 626 break; 627 628 case SRE_OP_MARK: 629 630 631 i = pattern[pidx]; 633 if ((i & 1) != 0) 634 lastindex = i / 2 + 1; 635 if (i > this.lastmark) 636 this.lastmark = i; 637 mark[i] = ptr; 638 pidx++; 639 break; 640 641 case SRE_OP_JUMP: 642 case SRE_OP_INFO: 643 644 645 pidx += (int)pattern[pidx]; 647 break; 648 649 case SRE_OP_ASSERT: 650 651 652 654 this.ptr = ptr - pattern[pidx + 1]; 655 if (this.ptr < this.beginning) 656 return 0; 657 i = SRE_MATCH(pattern, pidx + 2, level + 1); 658 if (i <= 0) 659 return i; 660 pidx += pattern[pidx]; 661 break; 662 663 case SRE_OP_ASSERT_NOT: 664 665 666 this.ptr = ptr - pattern[pidx + 1]; 668 if (this.ptr >= this.beginning) { 669 i = SRE_MATCH(pattern, pidx + 2, level + 1); 670 if (i < 0) 671 return i; 672 if (i != 0) 673 return 0; 674 } 675 pidx += pattern[pidx]; 676 break; 677 678 case SRE_OP_BRANCH: 679 680 681 lastmark = this.lastmark; 683 for (; pattern[pidx] != 0; pidx += pattern[pidx]) { 684 if (pattern[pidx+1] == SRE_OP_LITERAL && 685 (ptr >= end || str[ptr] != pattern[pidx+2])) 686 continue; 687 if (pattern[pidx+1] == SRE_OP_IN && (ptr >= end || 688 !SRE_CHARSET(pattern, pidx + 3, str[ptr]))) 689 continue; 690 this.ptr = ptr; 691 i = SRE_MATCH(pattern, pidx + 1, level + 1); 692 if (i != 0) 693 return i; 694 while (this.lastmark > lastmark) 695 mark[this.lastmark--] = -1; 696 } 697 698 return 0; 699 700 case SRE_OP_REPEAT_ONE: 701 702 703 707 708 709 710 int mincount = pattern[pidx+1]; 711 712 if (ptr + mincount > end) 714 return 0; 715 716 this.ptr = ptr; 717 718 count = SRE_COUNT(pattern, pidx + 3, pattern[pidx+2], 719 level + 1); 720 if (count < 0) 721 return count; 722 723 ptr += count; 724 725 729 730 if (count < mincount) 731 return 0; 732 733 if (pattern[pidx + pattern[pidx]] == SRE_OP_SUCCESS) { 734 735 this.ptr = ptr; 736 return 1; 737 738 } else if (pattern[pidx + pattern[pidx]] == SRE_OP_LITERAL) { 739 741 chr = pattern[pidx + pattern[pidx]+1]; 742 for (;;) { 743 while (count >= mincount && 744 (ptr >= end || str[ptr] != chr)) { 745 ptr--; 746 count--; 747 } 748 if (count < mincount) 749 break; 750 this.ptr = ptr; 751 i = SRE_MATCH(pattern, pidx + pattern[pidx], 752 level + 1); 753 if (i != 0) 754 return 1; 755 ptr--; 756 count--; 757 } 758 759 } else { 760 761 lastmark = this.lastmark; 762 while (count >= mincount) { 763 this.ptr = ptr; 764 i = SRE_MATCH(pattern, pidx + pattern[pidx], 765 level + 1); 766 if (i != 0) 767 return i; 768 ptr--; 769 count--; 770 while (this.lastmark > lastmark) 771 mark[this.lastmark--] = -1; 772 } 773 } 774 return 0; 775 776 777 case SRE_OP_REPEAT: 778 780 781 782 784 SRE_REPEAT rep = new SRE_REPEAT(repeat); 785 rep.count = -1; 786 rep.pidx = pidx; 787 repeat = rep; 788 789 this.ptr = ptr; 790 i = SRE_MATCH(pattern, pidx + pattern[pidx], level + 1); 791 792 repeat = rep.prev; 793 return i; 794 795 796 797 case SRE_OP_MAX_UNTIL: 798 799 800 801 803 804 SRE_REPEAT rp = this.repeat; 805 if (rp == null) 806 return SRE_ERROR_STATE; 807 808 this.ptr = ptr; 809 810 count = rp.count + 1; 811 812 814 if (count < pattern[rp.pidx + 1]) { 815 816 817 rp.count = count; 818 i = SRE_MATCH(pattern, rp.pidx + 3, level + 1); 819 if (i != 0) 820 return i; 821 rp.count = count - 1; 822 this.ptr = ptr; 823 return 0; 824 } 825 826 if (count < pattern[rp.pidx+2] || 827 pattern[rp.pidx+2] == 65535) { 828 830 rp.count = count; 831 lastmark = this.lastmark; 832 mark_save(0, lastmark); 833 834 i = SRE_MATCH(pattern, rp.pidx + 3, level + 1); 835 if (i != 0) 836 return i; 837 mark_restore(0, lastmark); 838 this.lastmark = lastmark; 839 rp.count = count - 1; 840 this.ptr = ptr; 841 } 842 843 845 this.repeat = rp.prev; 846 847 i = SRE_MATCH(pattern, pidx, level + 1); 848 if (i != 0) 849 return i; 850 this.repeat = rp; 851 this.ptr = ptr; 852 return 0; 853 854 case SRE_OP_MIN_UNTIL: 855 856 857 858 rp = this.repeat; 859 if (rp == null) 860 return SRE_ERROR_STATE; 861 862 count = rp.count + 1; 863 864 866 this.ptr = ptr; 867 868 if (count < pattern[rp.pidx + 1]) { 869 870 rp.count = count; 871 872 i = SRE_MATCH(pattern, rp.pidx + 3, level + 1); 873 if (i != 0) 874 return i; 875 rp.count = count-1; 876 this.ptr = ptr; 877 return 0; 878 } 879 880 881 this.repeat = rp.prev; 882 i = SRE_MATCH(pattern, pidx, level + 1); 883 if (i != 0) 884 return i; 885 886 this.ptr = ptr; 887 this.repeat = rp; 888 889 if (count >= pattern[rp.pidx+2] && 890 pattern[rp.pidx+2] != 65535) 891 return 0; 892 893 rp.count = count; 894 895 i = SRE_MATCH(pattern, rp.pidx + 3, level + 1); 896 if (i != 0) 897 return i; 898 rp.count = count - 1; 899 this.ptr = ptr; 900 return 0; 901 902 903 default: 904 return SRE_ERROR_ILLEGAL; 906 } 907 } 908 909 } 911 912 913 914 915 int SRE_SEARCH(char[] pattern, int pidx) { 916 int ptr = this.start; 917 int end = this.end; 918 int status = 0; 919 int prefix_len = 0; 920 int prefix_skip = 0; 921 int prefix = 0; 922 int charset = 0; 923 int overlap = 0; 924 int flags = 0; 925 926 if (pattern[pidx] == SRE_OP_INFO) { 927 928 929 930 flags = pattern[pidx+2]; 931 932 if (pattern[pidx+3] > 0) { 933 935 end -= pattern[pidx+3]-1; 936 if (end <= ptr) 937 end = ptr; } 939 940 if ((flags & SRE_INFO_PREFIX) != 0) { 941 942 943 prefix_len = pattern[pidx+5]; 944 prefix_skip = pattern[pidx+6]; 945 prefix = pidx + 7; 946 overlap = prefix + prefix_len - 1; 947 } else if ((flags & SRE_INFO_CHARSET) != 0) { 948 949 950 charset = pidx + 5; 951 } 952 953 pidx += 1 + pattern[pidx+1]; 954 } 955 956 957 if (prefix_len > 1) { 958 960 int i = 0; 961 end = this.end; 962 while (ptr < end) { 963 for (;;) { 964 if (str[ptr] != pattern[prefix+i]) { 965 if (i == 0) 966 break; 967 else 968 i = pattern[overlap+i]; 969 } else { 970 if (++i == prefix_len) { 971 972 this.start = ptr + 1 - prefix_len; 974 this.ptr = ptr + 1 - prefix_len + prefix_skip; 975 if ((flags & SRE_INFO_LITERAL) != 0) 976 return 1; 977 status = SRE_MATCH(pattern, 978 pidx + 2*prefix_skip, 1); 979 if (status != 0) 980 return status; 981 982 i = pattern[overlap + i]; 983 } 984 break; 985 } 986 987 } 988 ptr++; 989 } 990 return 0; 991 } 992 993 if (pattern[pidx] == SRE_OP_LITERAL) { 994 995 char chr = pattern[pidx + 1]; 996 end = this.end; 997 for (;;) { 998 while (ptr < end && str[ptr] != chr) 999 ptr++; 1000 if (ptr == end) 1001 return 0; 1002 this.start = ptr; 1004 this.ptr = ++ptr; 1005 if ((flags & SRE_INFO_LITERAL) != 0) 1006 return 1; 1007 status = SRE_MATCH(pattern, pidx + 2, 1); 1008 if (status != 0) 1009 break; 1010 } 1011 1012 } else if (charset != 0) { 1013 1014 end = this.end; 1015 for (;;) { 1016 while (ptr < end && !SRE_CHARSET(pattern, charset, str[ptr])) 1017 ptr++; 1018 if (ptr == end) 1019 return 0; 1020 this.start = ptr; 1022 this.ptr = ptr; 1023 status = SRE_MATCH(pattern, pidx, 1); 1024 if (status != 0) 1025 break; 1026 ptr++; 1027 } 1028 1029 } else { 1030 1031 while (ptr <= end) { 1032 this.start = this.ptr = ptr++; 1034 status = SRE_MATCH(pattern, pidx, 1); 1035 if (status != 0) 1036 break; 1037 } 1038 } 1039 1040 return status; 1041 } 1042 1043 1044 final boolean sre_category(char category, char ch) { 1045 switch (category) { 1046 1047 case SRE_CATEGORY_DIGIT: 1048 return SRE_IS_DIGIT(ch); 1049 case SRE_CATEGORY_NOT_DIGIT: 1050 return ! SRE_IS_DIGIT(ch); 1051 1052 case SRE_CATEGORY_SPACE: 1053 return SRE_IS_SPACE(ch); 1054 case SRE_CATEGORY_NOT_SPACE: 1055 return ! SRE_IS_SPACE(ch); 1056 1057 case SRE_CATEGORY_WORD: 1058 return SRE_IS_WORD(ch); 1059 case SRE_CATEGORY_NOT_WORD: 1060 return ! SRE_IS_WORD(ch); 1061 1062 case SRE_CATEGORY_LINEBREAK: 1063 return SRE_IS_LINEBREAK(ch); 1064 case SRE_CATEGORY_NOT_LINEBREAK: 1065 return ! SRE_IS_LINEBREAK(ch); 1066 1067 case SRE_CATEGORY_LOC_WORD: 1068 return SRE_LOC_IS_WORD(ch); 1069 case SRE_CATEGORY_LOC_NOT_WORD: 1070 return ! SRE_LOC_IS_WORD(ch); 1071 1072 1073 case SRE_CATEGORY_UNI_DIGIT: 1074 return Character.isDigit(ch); 1075 case SRE_CATEGORY_UNI_NOT_DIGIT: 1076 return !Character.isDigit(ch); 1077 1078 case SRE_CATEGORY_UNI_SPACE: 1079 return Character.isWhitespace(ch); 1080 case SRE_CATEGORY_UNI_NOT_SPACE: 1081 return !Character.isWhitespace(ch); 1082 1083 case SRE_CATEGORY_UNI_WORD: 1084 return Character.isLetterOrDigit(ch) || ch == '_'; 1085 case SRE_CATEGORY_UNI_NOT_WORD: 1086 return ! (Character.isLetterOrDigit(ch) || ch == '_'); 1087 1088 case SRE_CATEGORY_UNI_LINEBREAK: 1089 return SRE_UNI_IS_LINEBREAK(ch); 1090 case SRE_CATEGORY_UNI_NOT_LINEBREAK: 1091 return ! SRE_UNI_IS_LINEBREAK(ch); 1092 1093 } 1094 return false; 1095 } 1096 1097 1098 1099 1100 static final int SRE_DIGIT_MASK = 1; 1101 static final int SRE_SPACE_MASK = 2; 1102 static final int SRE_LINEBREAK_MASK = 4; 1103 static final int SRE_ALNUM_MASK = 8; 1104 static final int SRE_WORD_MASK = 16; 1105 1106 static byte[] sre_char_info = new byte[] { 1107 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, 1108 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 1110 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 1111 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 1112 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 1113 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 }; 1114 1115 static byte[] sre_char_lower = new byte[] { 1116 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1117 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1118 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 1119 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 1120 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 1121 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 1122 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 1123 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 1124 120, 121, 122, 123, 124, 125, 126, 127 }; 1125 1126 final boolean SRE_IS_DIGIT(char ch) { 1127 return ((ch) < 128 ? 1128 (sre_char_info[(ch)] & SRE_DIGIT_MASK) != 0 : false); 1129 } 1130 1131 final boolean SRE_IS_SPACE(char ch) { 1132 return ((ch) < 128 ? 1133 (sre_char_info[(ch)] & SRE_SPACE_MASK) != 0 : false); 1134 } 1135 1136 final boolean SRE_IS_WORD(char ch) { 1137 return ((ch) < 128 ? 1138 (sre_char_info[(ch)] & SRE_WORD_MASK) != 0 : false); 1139 } 1140 1141 final boolean SRE_IS_LINEBREAK(char ch) { 1142 return ch == '\n'; 1143 } 1144 1145 final boolean SRE_LOC_IS_WORD(char ch) { 1146 return Character.isLetterOrDigit(ch) || ch == '_'; 1147 } 1148 1149 final boolean SRE_UNI_IS_LINEBREAK(char ch) { 1150 switch (ch) { 1151 case 0x000A: 1152 case 0x000D: 1153 case 0x001C: 1154 case 0x001D: 1155 case 0x001E: 1156 case 0x0085: 1157 case 0x2028: 1158 case 0x2029: 1159 return true; 1160 default: 1161 return false; 1162 } 1163 } 1164 1165 1166 final char lower(char ch) { 1167 if ((flags & SRE_FLAG_LOCALE) != 0) 1168 return ((ch) < 256 ? Character.toLowerCase(ch) : ch); 1169 if ((flags & SRE_FLAG_UNICODE) != 0) 1170 return Character.toLowerCase(ch); 1171 return ((ch) < 128 ? (char)sre_char_lower[ch] : ch); 1172 } 1173 1174 1175 public static int getlower(int ch, int flags) { 1176 if ((flags & SRE_FLAG_LOCALE) != 0) 1177 return ((ch) < 256 ? Character.toLowerCase((char) ch) : ch); 1178 if ((flags & SRE_FLAG_UNICODE) != 0) 1179 return Character.toLowerCase((char)ch); 1180 return ((ch) < 128 ? (char)sre_char_lower[ch] : ch); 1181 } 1182 1183 1184 1185 1186 1187 String getslice(int index, String string, boolean empty) { 1188 int i, j; 1189 1190 index = (index - 1) * 2; 1191 1192 if (string == null || mark[index] == -1 || mark[index+1] == -1) { 1193 if (empty) { 1194 1195 i = j = 0; 1196 } else { 1197 return null; 1198 } 1199 } else { 1200 i = mark[index]; 1201 j = mark[index+1]; 1202 } 1203 1204 return string.substring(i, j); 1205 } 1206 1207 1208 1209 1210 void state_reset() { 1211 lastmark = 0; 1212 1213 1214 for (int i = 0; i < mark.length; i++) 1215 mark[i] = -1; 1216 1217 lastindex = -1; 1218 repeat = null; 1219 1220 mark_fini(); 1221 } 1222 1223 1224 private void TRACE(int pidx, int ptr, String string) { 1225 } 1227} 1228 | Popular Tags |