| 1 package org.apache.regexp; 2 3 59 60 import java.io.Serializable ; 61 import java.util.Vector ; 62 63 370 public class RE implements Serializable  371 { 372 375 public static final int MATCH_NORMAL = 0x0000; 376 377 380 public static final int MATCH_CASEINDEPENDENT = 0x0001; 381 382 385 public static final int MATCH_MULTILINE = 0x0002; 386 387 390 public static final int MATCH_SINGLELINE = 0x0004; 391 392 403 404 static final char OP_END = 'E'; static final char OP_BOL = '^'; static final char OP_EOL = '$'; static final char OP_ANY = '.'; static final char OP_ANYOF = '['; static final char OP_BRANCH = '|'; static final char OP_ATOM = 'A'; static final char OP_STAR = '*'; static final char OP_PLUS = '+'; static final char OP_MAYBE = '?'; static final char OP_ESCAPE = '\\'; static final char OP_OPEN = '('; static final char OP_OPEN_CLUSTER = '<'; static final char OP_CLOSE = ')'; static final char OP_CLOSE_CLUSTER = '>'; static final char OP_BACKREF = '#'; static final char OP_GOTO = 'G'; static final char OP_NOTHING = 'N'; static final char OP_RELUCTANTSTAR = '8'; static final char OP_RELUCTANTPLUS = '='; static final char OP_RELUCTANTMAYBE = '/'; static final char OP_POSIXCLASS = 'P'; 429 static final char E_ALNUM = 'w'; static final char E_NALNUM = 'W'; static final char E_BOUND = 'b'; static final char E_NBOUND = 'B'; static final char E_SPACE = 's'; static final char E_NSPACE = 'S'; static final char E_DIGIT = 'd'; static final char E_NDIGIT = 'D'; 439 static final char POSIX_CLASS_ALNUM = 'w'; static final char POSIX_CLASS_ALPHA = 'a'; static final char POSIX_CLASS_BLANK = 'b'; static final char POSIX_CLASS_CNTRL = 'c'; static final char POSIX_CLASS_DIGIT = 'd'; static final char POSIX_CLASS_GRAPH = 'g'; static final char POSIX_CLASS_LOWER = 'l'; static final char POSIX_CLASS_PRINT = 'p'; static final char POSIX_CLASS_PUNCT = '!'; static final char POSIX_CLASS_SPACE = 's'; static final char POSIX_CLASS_UPPER = 'u'; static final char POSIX_CLASS_XDIGIT = 'x'; static final char POSIX_CLASS_JSTART = 'j'; static final char POSIX_CLASS_JPART = 'k'; 455 static final int maxNode = 65536; static final int MAX_PAREN = 16; 459 static final int offsetOpcode = 0; static final int offsetOpdata = 1; static final int offsetNext = 2; static final int nodeSize = 3; 465 466 static final String NEWLINE = System.getProperty("line.separator"); 467 468 REProgram program; transient CharacterIterator search; int matchFlags; int maxParen = MAX_PAREN; 473 474 transient int parenCount; transient int start0; transient int end0; transient int start1; transient int end1; transient int start2; transient int end2; transient int[] startn; transient int[] endn; 485 transient int[] startBackref; transient int[] endBackref; 489 498 public RE(String pattern) throws RESyntaxException 499 { 500 this(pattern, MATCH_NORMAL); 501 } 502 503 513 public RE(String pattern, int matchFlags) throws RESyntaxException 514 { 515 this(new RECompiler().compile(pattern)); 516 setMatchFlags(matchFlags); 517 } 518 519 538 public RE(REProgram program, int matchFlags) 539 { 540 setProgram(program); 541 setMatchFlags(matchFlags); 542 } 543 544 551 public RE(REProgram program) 552 { 553 this(program, MATCH_NORMAL); 554 } 555 556 560 public RE() 561 { 562 this((REProgram)null, MATCH_NORMAL); 563 } 564 565 570 public static String simplePatternToFullRegularExpression(String pattern) 571 { 572 StringBuffer buf = new StringBuffer (); 573 for (int i = 0; i < pattern.length(); i++) 574 { 575 char c = pattern.charAt(i); 576 switch (c) 577 { 578 case '*': 579 buf.append(".*"); 580 break; 581 582 case '.': 583 case '[': 584 case ']': 585 case '\\': 586 case '+': 587 case '?': 588 case '{': 589 case '}': 590 case '$': 591 case '^': 592 case '|': 593 case '(': 594 case ')': 595 buf.append('\\'); 596 default: 597 buf.append(c); 598 break; 599 } 600 } 601 return buf.toString(); 602 } 603 604 617 public void setMatchFlags(int matchFlags) 618 { 619 this.matchFlags = matchFlags; 620 } 621 622 637 public int getMatchFlags() 638 { 639 return matchFlags; 640 } 641 642 649 public void setProgram(REProgram program) 650 { 651 this.program = program; 652 if (program != null && program.maxParens != -1) { 653 this.maxParen = program.maxParens; 654 } else { 655 this.maxParen = MAX_PAREN; 656 } 657 } 658 659 664 public REProgram getProgram() 665 { 666 return program; 667 } 668 669 673 public int getParenCount() 674 { 675 return parenCount; 676 } 677 678 683 public String getParen(int which) 684 { 685 int start; 686 if (which < parenCount && (start = getParenStart(which)) >= 0) 687 { 688 return search.substring(start, getParenEnd(which)); 689 } 690 return null; 691 } 692 693 698 public final int getParenStart(int which) 699 { 700 if (which < parenCount) 701 { 702 switch (which) 703 { 704 case 0: 705 return start0; 706 707 case 1: 708 return start1; 709 710 case 2: 711 return start2; 712 713 default: 714 if (startn == null) 715 { 716 allocParens(); 717 } 718 return startn[which]; 719 } 720 } 721 return -1; 722 } 723 724 729 public final int getParenEnd(int which) 730 { 731 if (which < parenCount) 732 { 733 switch (which) 734 { 735 case 0: 736 return end0; 737 738 case 1: 739 return end1; 740 741 case 2: 742 return end2; 743 744 default: 745 if (endn == null) 746 { 747 allocParens(); 748 } 749 return endn[which]; 750 } 751 } 752 return -1; 753 } 754 755 760 public final int getParenLength(int which) 761 { 762 if (which < parenCount) 763 { 764 return getParenEnd(which) - getParenStart(which); 765 } 766 return -1; 767 } 768 769 774 protected final void setParenStart(int which, int i) 775 { 776 if (which < parenCount) 777 { 778 switch (which) 779 { 780 case 0: 781 start0 = i; 782 break; 783 784 case 1: 785 start1 = i; 786 break; 787 788 case 2: 789 start2 = i; 790 break; 791 792 default: 793 if (startn == null) 794 { 795 allocParens(); 796 } 797 startn[which] = i; 798 break; 799 } 800 } 801 } 802 803 808 protected final void setParenEnd(int which, int i) 809 { 810 if (which < parenCount) 811 { 812 switch (which) 813 { 814 case 0: 815 end0 = i; 816 break; 817 818 case 1: 819 end1 = i; 820 break; 821 822 case 2: 823 end2 = i; 824 break; 825 826 default: 827 if (endn == null) 828 { 829 allocParens(); 830 } 831 endn[which] = i; 832 break; 833 } 834 } 835 } 836 837 843 protected void internalError(String s) throws Error  844 { 845 throw new Error ("RE internal error: " + s); 846 } 847 848 851 private final void allocParens() 852 { 853 startn = new int[maxParen]; 855 endn = new int[maxParen]; 856 857 for (int i = 0; i < maxParen; i++) 859 { 860 startn[i] = -1; 861 endn[i] = -1; 862 } 863 } 864 865 873 protected int matchNodes(int firstNode, int lastNode, int idxStart) 874 { 875 int idx = idxStart; 877 878 int next, opcode, opdata; 880 int idxNew; 881 char[] instruction = program.instruction; 882 for (int node = firstNode; node < lastNode; ) 883 { 884 opcode = instruction[node + offsetOpcode]; 885 next = node + (short)instruction[node + offsetNext]; 886 opdata = instruction[node + offsetOpdata]; 887 888 switch (opcode) 889 { 890 case OP_RELUCTANTMAYBE: 891 { 892 int once = 0; 893 do 894 { 895 if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 897 { 898 return idxNew; 899 } 900 } 901 while ((once++ == 0) && (idx = matchNodes(node + nodeSize, next, idx)) != -1); 902 return -1; 903 } 904 905 case OP_RELUCTANTPLUS: 906 while ((idx = matchNodes(node + nodeSize, next, idx)) != -1) 907 { 908 if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 910 { 911 return idxNew; 912 } 913 } 914 return -1; 915 916 case OP_RELUCTANTSTAR: 917 do 918 { 919 if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 921 { 922 return idxNew; 923 } 924 } 925 while ((idx = matchNodes(node + nodeSize, next, idx)) != -1); 926 return -1; 927 928 case OP_OPEN: 929 930 if ((program.flags & REProgram.OPT_HASBACKREFS) != 0) 932 { 933 startBackref[opdata] = idx; 934 } 935 if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 936 { 937 if ((opdata + 1) > parenCount) 939 { 940 parenCount = opdata + 1; 941 } 942 943 if (getParenStart(opdata) == -1) 945 { 946 setParenStart(opdata, idx); 947 } 948 } 949 return idxNew; 950 951 case OP_CLOSE: 952 953 if ((program.flags & REProgram.OPT_HASBACKREFS) != 0) 955 { 956 endBackref[opdata] = idx; 957 } 958 if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 959 { 960 if ((opdata + 1) > parenCount) 962 { 963 parenCount = opdata + 1; 964 } 965 966 if (getParenEnd
|