1 16 17 package org.apache.xerces.impl.xpath.regex; 18 19 import java.text.CharacterIterator ; 20 21 483 public class RegularExpression implements java.io.Serializable { 484 485 private static final long serialVersionUID = 3905241217112815923L; 486 487 static final boolean DEBUG = false; 488 489 492 private synchronized void compile(Token tok) { 493 if (this.operations != null) 494 return; 495 this.numberOfClosures = 0; 496 this.operations = this.compile(tok, null, false); 497 } 498 499 502 private Op compile(Token tok, Op next, boolean reverse) { 503 Op ret; 504 switch (tok.type) { 505 case Token.DOT: 506 ret = Op.createDot(); 507 ret.next = next; 508 break; 509 510 case Token.CHAR: 511 ret = Op.createChar(tok.getChar()); 512 ret.next = next; 513 break; 514 515 case Token.ANCHOR: 516 ret = Op.createAnchor(tok.getChar()); 517 ret.next = next; 518 break; 519 520 case Token.RANGE: 521 case Token.NRANGE: 522 ret = Op.createRange(tok); 523 ret.next = next; 524 break; 525 526 case Token.CONCAT: 527 ret = next; 528 if (!reverse) { 529 for (int i = tok.size()-1; i >= 0; i --) { 530 ret = compile(tok.getChild(i), ret, false); 531 } 532 } else { 533 for (int i = 0; i < tok.size(); i ++) { 534 ret = compile(tok.getChild(i), ret, true); 535 } 536 } 537 break; 538 539 case Token.UNION: 540 Op.UnionOp uni = Op.createUnion(tok.size()); 541 for (int i = 0; i < tok.size(); i ++) { 542 uni.addElement(compile(tok.getChild(i), next, reverse)); 543 } 544 ret = uni; break; 546 547 case Token.CLOSURE: 548 case Token.NONGREEDYCLOSURE: 549 Token child = tok.getChild(0); 550 int min = tok.getMin(); 551 int max = tok.getMax(); 552 if (min >= 0 && min == max) { ret = next; 554 for (int i = 0; i < min; i ++) { 555 ret = compile(child, ret, reverse); 556 } 557 break; 558 } 559 if (min > 0 && max > 0) 560 max -= min; 561 if (max > 0) { 562 ret = next; 564 for (int i = 0; i < max; i ++) { 565 Op.ChildOp q = Op.createQuestion(tok.type == Token.NONGREEDYCLOSURE); 566 q.next = next; 567 q.setChild(compile(child, ret, reverse)); 568 ret = q; 569 } 570 } else { 571 Op.ChildOp op; 572 if (tok.type == Token.NONGREEDYCLOSURE) { 573 op = Op.createNonGreedyClosure(); 574 } else { if (child.getMinLength() == 0) 576 op = Op.createClosure(this.numberOfClosures++); 577 else 578 op = Op.createClosure(-1); 579 } 580 op.next = next; 581 op.setChild(compile(child, op, reverse)); 582 ret = op; 583 } 584 if (min > 0) { 585 for (int i = 0; i < min; i ++) { 586 ret = compile(child, ret, reverse); 587 } 588 } 589 break; 590 591 case Token.EMPTY: 592 ret = next; 593 break; 594 595 case Token.STRING: 596 ret = Op.createString(tok.getString()); 597 ret.next = next; 598 break; 599 600 case Token.BACKREFERENCE: 601 ret = Op.createBackReference(tok.getReferenceNumber()); 602 ret.next = next; 603 break; 604 605 case Token.PAREN: 606 if (tok.getParenNumber() == 0) { 607 ret = compile(tok.getChild(0), next, reverse); 608 } else if (reverse) { 609 next = Op.createCapture(tok.getParenNumber(), next); 610 next = compile(tok.getChild(0), next, reverse); 611 ret = Op.createCapture(-tok.getParenNumber(), next); 612 } else { 613 next = Op.createCapture(-tok.getParenNumber(), next); 614 next = compile(tok.getChild(0), next, reverse); 615 ret = Op.createCapture(tok.getParenNumber(), next); 616 } 617 break; 618 619 case Token.LOOKAHEAD: 620 ret = Op.createLook(Op.LOOKAHEAD, next, compile(tok.getChild(0), null, false)); 621 break; 622 case Token.NEGATIVELOOKAHEAD: 623 ret = Op.createLook(Op.NEGATIVELOOKAHEAD, next, compile(tok.getChild(0), null, false)); 624 break; 625 case Token.LOOKBEHIND: 626 ret = Op.createLook(Op.LOOKBEHIND, next, compile(tok.getChild(0), null, true)); 627 break; 628 case Token.NEGATIVELOOKBEHIND: 629 ret = Op.createLook(Op.NEGATIVELOOKBEHIND, next, compile(tok.getChild(0), null, true)); 630 break; 631 632 case Token.INDEPENDENT: 633 ret = Op.createIndependent(next, compile(tok.getChild(0), null, reverse)); 634 break; 635 636 case Token.MODIFIERGROUP: 637 ret = Op.createModifier(next, compile(tok.getChild(0), null, reverse), 638 ((Token.ModifierToken)tok).getOptions(), 639 ((Token.ModifierToken)tok).getOptionsMask()); 640 break; 641 642 case Token.CONDITION: 643 Token.ConditionToken ctok = (Token.ConditionToken)tok; 644 int ref = ctok.refNumber; 645 Op condition = ctok.condition == null ? null : compile(ctok.condition, null, reverse); 646 Op yes = compile(ctok.yes, next, reverse); 647 Op no = ctok.no == null ? null : compile(ctok.no, next, reverse); 648 ret = Op.createCondition(next, ref, condition, yes, no); 649 break; 650 651 default: 652 throw new RuntimeException ("Unknown token type: "+tok.type); 653 } return ret; 655 } 656 657 658 660 665 public boolean matches(char[] target) { 666 return this.matches(target, 0, target .length , (Match)null); 667 } 668 669 677 public boolean matches(char[] target, int start, int end) { 678 return this.matches(target, start, end, (Match)null); 679 } 680 681 687 public boolean matches(char[] target, Match match) { 688 return this.matches(target, 0, target .length , match); 689 } 690 691 692 701 public boolean matches(char[] target, int start, int end, Match match) { 702 703 synchronized (this) { 704 if (this.operations == null) 705 this.prepare(); 706 if (this.context == null) 707 this.context = new Context(); 708 } 709 Context con = null; 710 synchronized (this.context) { 711 con = this.context.inuse ? new Context() : this.context; 712 con.reset(target, start, end, this.numberOfClosures); 713 } 714 if (match != null) { 715 match.setNumberOfGroups(this.nofparen); 716 match.setSource(target); 717 } else if (this.hasBackReferences) { 718 match = new Match(); 719 match.setNumberOfGroups(this.nofparen); 720 } 723 con.match = match; 724 725 if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) { 726 int matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options); 727 if (matchEnd == con.limit) { 729 if (con.match != null) { 730 con.match.setBeginning(0, con.start); 731 con.match.setEnd(0, matchEnd); 732 } 733 con.inuse = false; 734 return true; 735 } 736 return false; 737 } 738 739 743 if (this.fixedStringOnly) { 744 int o = this.fixedStringTable.matches(target, con.start, con.limit); 746 if (o >= 0) { 747 if (con.match != null) { 748 con.match.setBeginning(0, o); 749 con.match.setEnd(0, o+this.fixedString.length()); 750 } 751 con.inuse = false; 752 return true; 753 } 754 con.inuse = false; 755 return false; 756 } 757 758 763 if (this.fixedString != null) { 764 int o = this.fixedStringTable.matches(target, con.start, con.limit); 765 if (o < 0) { 766 con.inuse = false; 768 return false; 769 } 770 } 771 772 int limit = con.limit-this.minlength; 773 int matchStart; 774 int matchEnd = -1; 775 776 779 if (this.operations != null 780 && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { 781 if (isSet(this.options, SINGLE_LINE)) { 782 matchStart = con.start; 783 matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options); 784 } else { 785 boolean previousIsEOL = true; 786 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 787 int ch = target [ matchStart ] ; 788 if (isEOLChar(ch)) { 789 previousIsEOL = true; 790 } else { 791 if (previousIsEOL) { 792 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, 793 matchStart, 1, this.options))) 794 break; 795 } 796 previousIsEOL = false; 797 } 798 } 799 } 800 } 801 802 805 else if (this.firstChar != null) { 806 RangeToken range = this.firstChar; 808 if (RegularExpression.isSet(this.options, IGNORE_CASE)) { 809 range = this.firstChar.getCaseInsensitiveToken(); 810 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 811 int ch = target [ matchStart ] ; 812 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { 813 ch = REUtil.composeFromSurrogates(ch, target [ matchStart+1 ] ); 814 if (!range.match(ch)) continue; 815 } else { 816 if (!range.match(ch)) { 817 char ch1 = Character.toUpperCase((char)ch); 818 if (!range.match(ch1)) 819 if (!range.match(Character.toLowerCase(ch1))) 820 continue; 821 } 822 } 823 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, 824 matchStart, 1, this.options))) 825 break; 826 } 827 } else { 828 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 829 int ch = target [ matchStart ] ; 830 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) 831 ch = REUtil.composeFromSurrogates(ch, target [ matchStart+1 ] ); 832 if (!range.match(ch)) continue; 833 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, 834 matchStart, 1, this.options))) 835 break; 836 } 837 } 838 } 839 840 843 else { 844 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 845 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, matchStart, 1, this.options))) 846 break; 847 } 848 } 849 850 if (matchEnd >= 0) { 851 if (con.match != null) { 852 con.match.setBeginning(0, matchStart); 853 con.match.setEnd(0, matchEnd); 854 } 855 con.inuse = false; 856 return true; 857 } else { 858 con.inuse = false; 859 return false; 860 } 861 } 862 863 866 private int matchCharArray (Context con, Op op, int offset, int dx, int opts) { 867 868 char[] target = con.charTarget; 869 870 871 while (true) { 872 if (op == null) 873 return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; 874 if (offset > con.limit || offset < con.start) 875 return -1; 876 switch (op.type) { 877 case Op.CHAR: 878 if (isSet(opts, IGNORE_CASE)) { 879 int ch = op.getData(); 880 if (dx > 0) { 881 if (offset >= con.limit || !matchIgnoreCase(ch, target [ offset ] )) 882 return -1; 883 offset ++; 884 } else { 885 int o1 = offset-1; 886 if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target [ o1 ] )) 887 return -1; 888 offset = o1; 889 } 890 } else { 891 int ch = op.getData(); 892 if (dx > 0) { 893 if (offset >= con.limit || ch != target [ offset ] ) 894 return -1; 895 offset ++; 896 } else { 897 int o1 = offset-1; 898 if (o1 >= con.limit || o1 < 0 || ch != target [ o1 ] ) 899 return -1; 900 offset = o1; 901 } 902 } 903 op = op.next; 904 break; 905 906 case Op.DOT: 907 if (dx > 0) { 908 if (offset >= con.limit) 909 return -1; 910 int ch = target [ offset ] ; 911 if (isSet(opts, SINGLE_LINE)) { 912 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 913 offset ++; 914 } else { 915 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 916 ch = REUtil.composeFromSurrogates(ch, target [ ++offset ] ); 917 if (isEOLChar(ch)) 918 return -1; 919 } 920 offset ++; 921 } else { 922 int o1 = offset-1; 923 if (o1 >= con.limit || o1 < 0) 924 return -1; 925 int ch = target [ o1 ] ; 926 if (isSet(opts, SINGLE_LINE)) { 927 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 928 o1 --; 929 } else { 930 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 931 ch = REUtil.composeFromSurrogates( target [ --o1 ] , ch); 932 if (!isEOLChar(ch)) 933 return -1; 934 } 935 offset = o1; 936 } 937 op = op.next; 938 break; 939 940 case Op.RANGE: 941 case Op.NRANGE: 942 if (dx > 0) { 943 if (offset >= con.limit) 944 return -1; 945 int ch = target [ offset ] ; 946 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 947 ch = REUtil.composeFromSurrogates(ch, target [ ++offset ] ); 948 RangeToken tok = op.getToken(); 949 if (isSet(opts, IGNORE_CASE)) { 950 tok = tok.getCaseInsensitiveToken(); 951 if (!tok.match(ch)) { 952 if (ch >= 0x10000) return -1; 953 char uch; 954 if (!tok.match(uch = Character.toUpperCase((char)ch)) 955 && !tok.match(Character.toLowerCase(uch))) 956 return -1; 957 } 958 } else { 959 if (!tok.match(ch)) return -1; 960 } 961 offset ++; 962 } else { 963 int o1 = offset-1; 964 if (o1 >= con.limit || o1 < 0) 965 return -1; 966 int ch = target [ o1 ] ; 967 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 968 ch = REUtil.composeFromSurrogates( target [ --o1 ] , ch); 969 RangeToken tok = op.getToken(); 970 if (isSet(opts, IGNORE_CASE)) { 971 tok = tok.getCaseInsensitiveToken(); 972 if (!tok.match(ch)) { 973 if (ch >= 0x10000) return -1; 974 char uch; 975 if (!tok.match(uch = Character.toUpperCase((char)ch)) 976 && !tok.match(Character.toLowerCase(uch))) 977 return -1; 978 } 979 } else { 980 if (!tok.match(ch)) return -1; 981 } 982 offset = o1; 983 } 984 op = op.next; 985 break; 986 987 case Op.ANCHOR: 988 boolean go = false; 989 switch (op.getData()) { 990 case '^': 991 if (isSet(opts, MULTIPLE_LINES)) { 992 if (!(offset == con.start 993 || offset > con.start && isEOLChar( target [ offset-1 ] ))) 994 return -1; 995 } else { 996 if (offset != con.start) 997 return -1; 998 } 999 break; 1000 1001 case '@': if (!(offset == con.start 1004 || offset > con.start && isEOLChar( target [ offset-1 ] ))) 1005 return -1; 1006 break; 1007 1008 case '$': 1009 if (isSet(opts, MULTIPLE_LINES)) { 1010 if (!(offset == con.limit 1011 || offset < con.limit && isEOLChar( target [ offset ] ))) 1012 return -1; 1013 } else { 1014 if (!(offset == con.limit 1015 || offset+1 == con.limit && isEOLChar( target [ offset ] ) 1016 || offset+2 == con.limit && target [ offset ] == CARRIAGE_RETURN 1017 && target [ offset+1 ] == LINE_FEED)) 1018 return -1; 1019 } 1020 break; 1021 1022 case 'A': 1023 if (offset != con.start) return -1; 1024 break; 1025 1026 case 'Z': 1027 if (!(offset == con.limit 1028 || offset+1 == con.limit && isEOLChar( target [ offset ] ) 1029 || offset+2 == con.limit && target [ offset ] == CARRIAGE_RETURN 1030 && target [ offset+1 ] == LINE_FEED)) 1031 return -1; 1032 break; 1033 1034 case 'z': 1035 if (offset != con.limit) return -1; 1036 break; 1037 1038 case 'b': 1039 if (con.length == 0) return -1; 1040 { 1041 int after = getWordType(target, con.start, con.limit, offset, opts); 1042 if (after == WT_IGNORE) return -1; 1043 int before = getPreviousWordType(target, con.start, con.limit, offset, opts); 1044 if (after == before) return -1; 1045 } 1046 break; 1047 1048 case 'B': 1049 if (con.length == 0) 1050 go = true; 1051 else { 1052 int after = getWordType(target, con.start, con.limit, offset, opts); 1053 go = after == WT_IGNORE 1054 || after == getPreviousWordType(target, con.start, con.limit, offset, opts); 1055 } 1056 if (!go) return -1; 1057 break; 1058 1059 case '<': 1060 if (con.length == 0 || offset == con.limit) return -1; 1061 if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER 1062 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) 1063 return -1; 1064 break; 1065 1066 case '>': 1067 if (con.length == 0 || offset == con.start) return -1; 1068 if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER 1069 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) 1070 return -1; 1071 break; 1072 } op = op.next; 1074 break; 1075 1076 case Op.BACKREFERENCE: 1077 { 1078 int refno = op.getData(); 1079 if (refno <= 0 || refno >= this.nofparen) 1080 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+refno); 1081 if (con.match.getBeginning(refno) < 0 1082 || con.match.getEnd(refno) < 0) 1083 return -1; int o2 = con.match.getBeginning(refno); 1085 int literallen = con.match.getEnd(refno)-o2; 1086 if (!isSet(opts, IGNORE_CASE)) { 1087 if (dx > 0) { 1088 if (!regionMatches(target, offset, con.limit, o2, literallen)) 1089 return -1; 1090 offset += literallen; 1091 } else { 1092 if (!regionMatches(target, offset-literallen, con.limit, o2, literallen)) 1093 return -1; 1094 offset -= literallen; 1095 } 1096 } else { 1097 if (dx > 0) { 1098 if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen)) 1099 return -1; 1100 offset += literallen; 1101 } else { 1102 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 1103 o2, literallen)) 1104 return -1; 1105 offset -= literallen; 1106 } 1107 } 1108 } 1109 op = op.next; 1110 break; 1111 case Op.STRING: 1112 { 1113 String literal = op.getString(); 1114 int literallen = literal.length(); 1115 if (!isSet(opts, IGNORE_CASE)) { 1116 if (dx > 0) { 1117 if (!regionMatches(target, offset, con.limit, literal, literallen)) 1118 return -1; 1119 offset += literallen; 1120 } else { 1121 if (!regionMatches(target, offset-literallen, con.limit, literal, literallen)) 1122 return -1; 1123 offset -= literallen; 1124 } 1125 } else { 1126 if (dx > 0) { 1127 if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen)) 1128 return -1; 1129 offset += literallen; 1130 } else { 1131 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 1132 literal, literallen)) 1133 return -1; 1134 offset -= literallen; 1135 } 1136 } 1137 } 1138 op = op.next; 1139 break; 1140 1141 case Op.CLOSURE: 1142 { 1143 1147 int id = op.getData(); 1148 if (id >= 0) { 1149 int previousOffset = con.offsets[id]; 1150 if (previousOffset < 0 || previousOffset != offset) { 1151 con.offsets[id] = offset; 1152 } else { 1153 con.offsets[id] = -1; 1154 op = op.next; 1155 break; 1156 } 1157 } 1158 1159 int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts); 1160 if (id >= 0) con.offsets[id] = -1; 1161 if (ret >= 0) return ret; 1162 op = op.next; 1163 } 1164 break; 1165 1166 case Op.QUESTION: 1167 { 1168 int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts); 1169 if (ret >= 0) return ret; 1170 op = op.next; 1171 } 1172 break; 1173 1174 case Op.NONGREEDYCLOSURE: 1175 case Op.NONGREEDYQUESTION: 1176 { 1177 int ret = this. matchCharArray (con, op.next, offset, dx, opts); 1178 if (ret >= 0) return ret; 1179 op = op.getChild(); 1180 } 1181 break; 1182 1183 case Op.UNION: 1184 for (int i = 0; i < op.size(); i ++) { 1185 int ret = this. matchCharArray (con, op.elementAt(i), offset, dx, opts); 1186 if (DEBUG) { 1187 System.err.println("UNION: "+i+", ret="+ret); 1188 } 1189 if (ret >= 0) return ret; 1190 } 1191 return -1; 1192 1193 case Op.CAPTURE: 1194 int refno = op.getData(); 1195 if (con.match != null && refno > 0) { 1196 int save = con.match.getBeginning(refno); 1197 con.match.setBeginning(refno, offset); 1198 int ret = this. matchCharArray (con, op.next, offset, dx, opts); 1199 if (ret < 0) con.match.setBeginning(refno, save); 1200 return ret; 1201 } else if (con.match != null && refno < 0) { 1202 int index = -refno; 1203 int save = con.match.getEnd(index); 1204 con.match.setEnd(index, offset); 1205 int ret = this. matchCharArray (con, op.next, offset, dx, opts); 1206 if (ret < 0) con.match.setEnd(index, save); 1207 return ret; 1208 } 1209 op = op.next; 1210 break; 1211 1212 case Op.LOOKAHEAD: 1213 if (0 > this. matchCharArray (con, op.getChild(), offset, 1, opts)) return -1; 1214 op = op.next; 1215 break; 1216 case Op.NEGATIVELOOKAHEAD: 1217 if (0 <= this. matchCharArray (con, op.getChild(), offset, 1, opts)) return -1; 1218 op = op.next; 1219 break; 1220 case Op.LOOKBEHIND: 1221 if (0 > this. matchCharArray (con, op.getChild(), offset, -1, opts)) return -1; 1222 op = op.next; 1223 break; 1224 case Op.NEGATIVELOOKBEHIND: 1225 if (0 <= this. matchCharArray (con, op.getChild(), offset, -1, opts)) return -1; 1226 op = op.next; 1227 break; 1228 1229 case Op.INDEPENDENT: 1230 { 1231 int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts); 1232 if (ret < 0) return ret; 1233 offset = ret; 1234 op = op.next; 1235 } 1236 break; 1237 1238 case Op.MODIFIER: 1239 { 1240 int localopts = opts; 1241 localopts |= op.getData(); 1242 localopts &= ~op.getData2(); 1243 int ret = this. matchCharArray (con, op.getChild(), offset, dx, localopts); 1245 if (ret < 0) return ret; 1246 offset = ret; 1247 op = op.next; 1248 } 1249 break; 1250 1251 case Op.CONDITION: 1252 { 1253 Op.ConditionOp cop = (Op.ConditionOp)op; 1254 boolean matchp = false; 1255 if (cop.refNumber > 0) { 1256 if (cop.refNumber >= this.nofparen) 1257 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+cop.refNumber); 1258 matchp = con.match.getBeginning(cop.refNumber) >= 0 1259 && con.match.getEnd(cop.refNumber) >= 0; 1260 } else { 1261 matchp = 0 <= this. matchCharArray (con, cop.condition, offset, dx, opts); 1262 } 1263 1264 if (matchp) { 1265 op = cop.yes; 1266 } else if (cop.no != null) { 1267 op = cop.no; 1268 } else { 1269 op = cop.next; 1270 } 1271 } 1272 break; 1273 1274 default: 1275 throw new RuntimeException ("Unknown operation type: "+op.type); 1276 } } } 1279 1280 private static final int getPreviousWordType(char[] target, int begin, int end, 1281 int offset, int opts) { 1282 int ret = getWordType(target, begin, end, --offset, opts); 1283 while (ret == WT_IGNORE) 1284 ret = getWordType(target, begin, end, --offset, opts); 1285 return ret; 1286 } 1287 1288 private static final int getWordType(char[] target, int begin, int end, 1289 int offset, int opts) { 1290 if (offset < begin || offset >= end) return WT_OTHER; 1291 return getWordType0( target [ offset ] , opts); 1292 } 1293 1294 1295 1296 private static final boolean regionMatches(char[] target, int offset, int limit, 1297 String part, int partlen) { 1298 if (offset < 0) return false; 1299 if (limit-offset < partlen) 1300 return false; 1301 int i = 0; 1302 while (partlen-- > 0) { 1303 if ( target [ offset++ ] != part.charAt(i++)) 1304 return false; 1305 } 1306 return true; 1307 } 1308 1309 private static final boolean regionMatches(char[] target, int offset, int limit, 1310 int offset2, int partlen) { 1311 if (offset < 0) return false; 1312 if (limit-offset < partlen) 1313 return false; 1314 int i = offset2; 1315 while (partlen-- > 0) { 1316 if ( target [ offset++ ] != target [ i++ ] ) 1317 return false; 1318 } 1319 return true; 1320 } 1321 1322 1325 private static final boolean regionMatchesIgnoreCase(char[] target, int offset, int limit, 1326 String part, int partlen) { 1327 if (offset < 0) return false; 1328 if (limit-offset < partlen) 1329 return false; 1330 int i = 0; 1331 while (partlen-- > 0) { 1332 char ch1 = target [ offset++ ] ; 1333 char ch2 = part.charAt(i++); 1334 if (ch1 == ch2) 1335 continue; 1336 char uch1 = Character.toUpperCase(ch1); 1337 char uch2 = Character.toUpperCase(ch2); 1338 if (uch1 == uch2) 1339 continue; 1340 if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) 1341 return false; 1342 } 1343 return true; 1344 } 1345 1346 private static final boolean regionMatchesIgnoreCase(char[] target, int offset, int limit, 1347 int offset2, int partlen) { 1348 if (offset < 0) return false; 1349 if (limit-offset < partlen) 1350 return false; 1351 int i = offset2; 1352 while (partlen-- > 0) { 1353 char ch1 = target [ offset++ ] ; 1354 char ch2 = target [ i++ ] ; 1355 if (ch1 == ch2) 1356 continue; 1357 char uch1 = Character.toUpperCase(ch1); 1358 char uch2 = Character.toUpperCase(ch2); 1359 if (uch1 == uch2) 1360 continue; 1361 if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) 1362 return false; 1363 } 1364 return true; 1365 } 1366 1367 1368 1369 1370 1375 public boolean matches(String target) { 1376 return this.matches(target, 0, target .length() , (Match)null); 1377 } 1378 1379 1387 public boolean matches(String target, int start, int end) { 1388 return this.matches(target, start, end, (Match)null); 1389 } 1390 1391 1397 public boolean matches(String target, Match match) { 1398 return this.matches(target, 0, target .length() , match); 1399 } 1400 1401 1410 public boolean matches(String target, int start, int end, Match match) { 1411 1412 synchronized (this) { 1413 if (this.operations == null) 1414 this.prepare(); 1415 if (this.context == null) 1416 this.context = new Context(); 1417 } 1418 Context con = null; 1419 synchronized (this.context) { 1420 con = this.context.inuse ? new Context() : this.context; 1421 con.reset(target, start, end, this.numberOfClosures); 1422 } 1423 if (match != null) { 1424 match.setNumberOfGroups(this.nofparen); 1425 match.setSource(target); 1426 } else if (this.hasBackReferences) { 1427 match = new Match(); 1428 match.setNumberOfGroups(this.nofparen); 1429 } 1432 con.match = match; 1433 1434 if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) { 1435 if (DEBUG) { 1436 System.err.println("target string="+target); 1437 } 1438 int matchEnd = this. matchString (con, this.operations, con.start, 1, this.options); 1439 if (DEBUG) { 1440 System.err.println("matchEnd="+matchEnd); 1441 System.err.println("con.limit="+con.limit); 1442 } 1443 if (matchEnd == con.limit) { 1444 if (con.match != null) { 1445 con.match.setBeginning(0, con.start); 1446 con.match.setEnd(0, matchEnd); 1447 } 1448 con.inuse = false; 1449 return true; 1450 } 1451 return false; 1452 } 1453 1454 1458 if (this.fixedStringOnly) { 1459 int o = this.fixedStringTable.matches(target, con.start, con.limit); 1461 if (o >= 0) { 1462 if (con.match != null) { 1463 con.match.setBeginning(0, o); 1464 con.match.setEnd(0, o+this.fixedString.length()); 1465 } 1466 con.inuse = false; 1467 return true; 1468 } 1469 con.inuse = false; 1470 return false; 1471 } 1472 1473 1478 if (this.fixedString != null) { 1479 int o = this.fixedStringTable.matches(target, con.start, con.limit); 1480 if (o < 0) { 1481 con.inuse = false; 1483 return false; 1484 } 1485 } 1486 1487 int limit = con.limit-this.minlength; 1488 int matchStart; 1489 int matchEnd = -1; 1490 1491 1494 if (this.operations != null 1495 && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { 1496 if (isSet(this.options, SINGLE_LINE)) { 1497 matchStart = con.start; 1498 matchEnd = this. matchString (con, this.operations, con.start, 1, this.options); 1499 } else { 1500 boolean previousIsEOL = true; 1501 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 1502 int ch = target .charAt( matchStart ) ; 1503 if (isEOLChar(ch)) { 1504 previousIsEOL = true; 1505 } else { 1506 if (previousIsEOL) { 1507 if (0 <= (matchEnd = this. matchString (con, this.operations, 1508 matchStart, 1, this.options))) 1509 break; 1510 } 1511 previousIsEOL = false; 1512 } 1513 } 1514 } 1515 } 1516 1517 1520 else if (this.firstChar != null) { 1521 RangeToken range = this.firstChar; 1523 if (RegularExpression.isSet(this.options, IGNORE_CASE)) { 1524 range = this.firstChar.getCaseInsensitiveToken(); 1525 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 1526 int ch = target .charAt( matchStart ) ; 1527 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { 1528 ch = REUtil.composeFromSurrogates(ch, target .charAt( matchStart+1 ) ); 1529 if (!range.match(ch)) continue; 1530 } else { 1531 if (!range.match(ch)) { 1532 char ch1 = Character.toUpperCase((char)ch); 1533 if (!range.match(ch1)) 1534 if (!range.match(Character.toLowerCase(ch1))) 1535 continue; 1536 } 1537 } 1538 if (0 <= (matchEnd = this. matchString (con, this.operations, 1539 matchStart, 1, this.options))) 1540 break; 1541 } 1542 } else { 1543 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 1544 int ch = target .charAt( matchStart ) ; 1545 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) 1546 ch = REUtil.composeFromSurrogates(ch, target .charAt( matchStart+1 ) ); 1547 if (!range.match(ch)) continue; 1548 if (0 <= (matchEnd = this. matchString (con, this.operations, 1549 matchStart, 1, this.options))) 1550 break; 1551 } 1552 } 1553 } 1554 1555 1558 else { 1559 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 1560 if (0 <= (matchEnd = this. matchString (con, this.operations, matchStart, 1, this.options))) 1561 break; 1562 } 1563 } 1564 1565 if (matchEnd >= 0) { 1566 if (con.match != null) { 1567 con.match.setBeginning(0, matchStart); 1568 con.match.setEnd(0, matchEnd); 1569 } 1570 con.inuse = false; 1571 return true; 1572 } else { 1573 con.inuse = false; 1574 return false; 1575 } 1576 } 1577 1578 1581 private int matchString (Context con, Op op, int offset, int dx, int opts) { 1582 1583 1584 1585 1586 String target = con.strTarget; 1587 1588 1589 1590 1591 while (true) { 1592 if (op == null) 1593 return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; 1594 if (offset > con.limit || offset < con.start) 1595 return -1; 1596 switch (op.type) { 1597 case Op.CHAR: 1598 if (isSet(opts, IGNORE_CASE)) { 1599 int ch = op.getData(); 1600 if (dx > 0) { 1601 if (offset >= con.limit || !matchIgnoreCase(ch, target .charAt( offset ) )) 1602 return -1; 1603 offset ++; 1604 } else { 1605 int o1 = offset-1; 1606 if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target .charAt( o1 ) )) 1607 return -1; 1608 offset = o1; 1609 } 1610 } else { 1611 int ch = op.getData(); 1612 if (dx > 0) { 1613 if (offset >= con.limit || ch != target .charAt( offset ) ) 1614 return -1; 1615 offset ++; 1616 } else { 1617 int o1 = offset-1; 1618 if (o1 >= con.limit || o1 < 0 || ch != target .charAt( o1 ) ) 1619 return -1; 1620 offset = o1; 1621 } 1622 } 1623 op = op.next; 1624 break; 1625 1626 case Op.DOT: 1627 if (dx > 0) { 1628 if (offset >= con.limit) 1629 return -1; 1630 int ch = target .charAt( offset ) ; 1631 if (isSet(opts, SINGLE_LINE)) { 1632 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 1633 offset ++; 1634 } else { 1635 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 1636 ch = REUtil.composeFromSurrogates(ch, target .charAt( ++offset ) ); 1637 if (isEOLChar(ch)) 1638 return -1; 1639 } 1640 offset ++; 1641 } else { 1642 int o1 = offset-1; 1643 if (o1 >= con.limit || o1 < 0) 1644 return -1; 1645 int ch = target .charAt( o1 ) ; 1646 if (isSet(opts, SINGLE_LINE)) { 1647 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 1648 o1 --; 1649 } else { 1650 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 1651 ch = REUtil.composeFromSurrogates( target .charAt( --o1 ) , ch); 1652 if (!isEOLChar(ch)) 1653 return -1; 1654 } 1655 offset = o1; 1656 } 1657 op = op.next; 1658 break; 1659 1660 case Op.RANGE: 1661 case Op.NRANGE: 1662 if (dx > 0) { 1663 if (offset >= con.limit) 1664 return -1; 1665 int ch = target .charAt( offset ) ; 1666 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 1667 ch = REUtil.composeFromSurrogates(ch, target .charAt( ++offset ) ); 1668 RangeToken tok = op.getToken(); 1669 if (isSet(opts, IGNORE_CASE)) { 1670 tok = tok.getCaseInsensitiveToken(); 1671 if (!tok.match(ch)) { 1672 if (ch >= 0x10000) return -1; 1673 char uch; 1674 if (!tok.match(uch = Character.toUpperCase((char)ch)) 1675 && !tok.match(Character.toLowerCase(uch))) 1676 return -1; 1677 } 1678 } else { 1679 if (!tok.match(ch)) return -1; 1680 } 1681 offset ++; 1682 } else { 1683 int o1 = offset-1; 1684 if (o1 >= con.limit || o1 < 0) 1685 return -1; 1686 int ch = target .charAt( o1 ) ; 1687 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 1688 ch = REUtil.composeFromSurrogates( target .charAt( --o1 ) , ch); 1689 RangeToken tok = op.getToken(); 1690 if (isSet(opts, IGNORE_CASE)) { 1691 tok = tok.getCaseInsensitiveToken(); 1692 if (!tok.match(ch)) { 1693 if (ch >= 0x10000) return -1; 1694 char uch; 1695 if (!tok.match(uch = Character.toUpperCase((char)ch)) 1696 && !tok.match(Character.toLowerCase(uch))) 1697 return -1; 1698 } 1699 } else { 1700 if (!tok.match(ch)) return -1; 1701 } 1702 offset = o1; 1703 } 1704 op = op.next; 1705 break; 1706 1707 case Op.ANCHOR: 1708 boolean go = false; 1709 switch (op.getData()) { 1710 case '^': 1711 if (isSet(opts, MULTIPLE_LINES)) { 1712 if (!(offset == con.start 1713 || offset > con.start && isEOLChar( target .charAt( offset-1 ) ))) 1714 return -1; 1715 } else { 1716 if (offset != con.start) 1717 return -1; 1718 } 1719 break; 1720 1721 case '@': if (!(offset == con.start 1724 || offset > con.start && isEOLChar( target .charAt( offset-1 ) ))) 1725 return -1; 1726 break; 1727 1728 case '$': 1729 if (isSet(opts, MULTIPLE_LINES)) { 1730 if (!(offset == con.limit 1731 || offset < con.limit && isEOLChar( target .charAt( offset ) ))) 1732 return -1; 1733 } else { 1734 if (!(offset == con.limit 1735 || offset+1 == con.limit && isEOLChar( target .charAt( offset ) ) 1736 || offset+2 == con.limit && target .charAt( offset ) == CARRIAGE_RETURN 1737 && target .charAt( offset+1 ) == LINE_FEED)) 1738 return -1; 1739 } 1740 break; 1741 1742 case 'A': 1743 if (offset != con.start) return -1; 1744 break; 1745 1746 case 'Z': 1747 if (!(offset == con.limit 1748 || offset+1 == con.limit && isEOLChar( target .charAt( offset ) ) 1749 || offset+2 == con.limit && target .charAt( offset ) == CARRIAGE_RETURN 1750 && target .charAt( offset+1 ) == LINE_FEED)) 1751 return -1; 1752 break; 1753 1754 case 'z': 1755 if (offset != con.limit) return -1; 1756 break; 1757 1758 case 'b': 1759 if (con.length == 0) return -1; 1760 { 1761 int after = getWordType(target, con.start, con.limit, offset, opts); 1762 if (after == WT_IGNORE) return -1; 1763 int before = getPreviousWordType(target, con.start, con.limit, offset, opts); 1764 if (after == before) return -1; 1765 } 1766 break; 1767 1768 case 'B': 1769 if (con.length == 0) 1770 go = true; 1771 else { 1772 int after = getWordType(target, con.start, con.limit, offset, opts); 1773 go = after == WT_IGNORE 1774 || after == getPreviousWordType(target, con.start, con.limit, offset, opts); 1775 } 1776 if (!go) return -1; 1777 break; 1778 1779 case '<': 1780 if (con.length == 0 || offset == con.limit) return -1; 1781 if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER 1782 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) 1783 return -1; 1784 break; 1785 1786 case '>': 1787 if (con.length == 0 || offset == con.start) return -1; 1788 if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER 1789 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) 1790 return -1; 1791 break; 1792 } op = op.next; 1794 break; 1795 1796 case Op.BACKREFERENCE: 1797 { 1798 int refno = op.getData(); 1799 if (refno <= 0 || refno >= this.nofparen) 1800 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+refno); 1801 if (con.match.getBeginning(refno) < 0 1802 || con.match.getEnd(refno) < 0) 1803 return -1; int o2 = con.match.getBeginning(refno); 1805 int literallen = con.match.getEnd(refno)-o2; 1806 if (!isSet(opts, IGNORE_CASE)) { 1807 if (dx > 0) { 1808 if (!regionMatches(target, offset, con.limit, o2, literallen)) 1809 return -1; 1810 offset += literallen; 1811 } else { 1812 if (!regionMatches(target, offset-literallen, con.limit, o2, literallen)) 1813 return -1; 1814 offset -= literallen; 1815 } 1816 } else { 1817 if (dx > 0) { 1818 if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen)) 1819 return -1; 1820 offset += literallen; 1821 } else { 1822 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 1823 o2, literallen)) 1824 return -1; 1825 offset -= literallen; 1826 } 1827 } 1828 } 1829 op = op.next; 1830 break; 1831 case Op.STRING: 1832 { 1833 String literal = op.getString(); 1834 int literallen = literal.length(); 1835 if (!isSet(opts, IGNORE_CASE)) { 1836 if (dx > 0) { 1837 if (!regionMatches(target, offset, con.limit, literal, literallen)) 1838 return -1; 1839 offset += literallen; 1840 } else { 1841 if (!regionMatches(target, offset-literallen, con.limit, literal, literallen)) 1842 return -1; 1843 offset -= literallen; 1844 } 1845 } else { 1846 if (dx > 0) { 1847 if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen)) 1848 return -1; 1849 offset += literallen; 1850 } else { 1851 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 1852 literal, literallen)) 1853 return -1; 1854 offset -= literallen; 1855 } 1856 } 1857 } 1858 op = op.next; 1859 break; 1860 1861 case Op.CLOSURE: 1862 { 1863 1867 int id = op.getData(); 1868 if (id >= 0) { 1869 int previousOffset = con.offsets[id]; 1870 if (previousOffset < 0 || previousOffset != offset) { 1871 con.offsets[id] = offset; 1872 } else { 1873 con.offsets[id] = -1; 1874 op = op.next; 1875 break; 1876 } 1877 } 1878 int ret = this. matchString (con, op.getChild(), offset, dx, opts); 1879 if (id >= 0) con.offsets[id] = -1; 1880 if (ret >= 0) return ret; 1881 op = op.next; 1882 } 1883 break; 1884 1885 case Op.QUESTION: 1886 { 1887 int ret = this. matchString (con, op.getChild(), offset, dx, opts); 1888 if (ret >= 0) return ret; 1889 op = op.next; 1890 } 1891 break; 1892 1893 case Op.NONGREEDYCLOSURE: 1894 case Op.NONGREEDYQUESTION: 1895 { 1896 int ret = this. matchString (con, op.next, offset, dx, opts); 1897 if (ret >= 0) return ret; 1898 op = op.getChild(); 1899 } 1900 break; 1901 1902 case Op.UNION: 1903 for (int i = 0; i < op.size(); i ++) { 1904 int ret = this. matchString (con, op.elementAt(i), offset, dx, opts); 1905 if (DEBUG) { 1906 System.err.println("UNION: "+i+", ret="+ret); 1907 } 1908 if (ret >= 0) return ret; 1909 } 1910 return -1; 1911 1912 case Op.CAPTURE: 1913 int refno = op.getData(); 1914 if (con.match != null && refno > 0) { 1915 int save = con.match.getBeginning(refno); 1916 con.match.setBeginning(refno, offset); 1917 int ret = this. matchString (con, op.next, offset, dx, opts); 1918 if (ret < 0) con.match.setBeginning(refno, save); 1919 return ret; 1920 } else if (con.match != null && refno < 0) { 1921 int index = -refno; 1922 int save = con.match.getEnd(index); 1923 con.match.setEnd(index, offset); 1924 int ret = this. matchString (con, op.next, offset, dx, opts); 1925 if (ret < 0) con.match.setEnd(index, save); 1926 return ret; 1927 } 1928 op = op.next; 1929 break; 1930 1931 case Op.LOOKAHEAD: 1932 if (0 > this. matchString (con, op.getChild(), offset, 1, opts)) return -1; 1933 op = op.next; 1934 break; 1935 case Op.NEGATIVELOOKAHEAD: 1936 if (0 <= this. matchString (con, op.getChild(), offset, 1, opts)) return -1; 1937 op = op.next; 1938 break; 1939 case Op.LOOKBEHIND: 1940 if (0 > this. matchString (con, op.getChild(), offset, -1, opts)) return -1; 1941 op = op.next; 1942 break; 1943 case Op.NEGATIVELOOKBEHIND: 1944 if (0 <= this. matchString (con, op.getChild(), offset, -1, opts)) return -1; 1945 op = op.next; 1946 break; 1947 1948 case Op.INDEPENDENT: 1949 { 1950 int ret = this. matchString (con, op.getChild(), offset, dx, opts); 1951 if (ret < 0) return ret; 1952 offset = ret; 1953 op = op.next; 1954 } 1955 break; 1956 1957 case Op.MODIFIER: 1958 { 1959 int localopts = opts; 1960 localopts |= op.getData(); 1961 localopts &= ~op.getData2(); 1962 int ret = this. matchString (con, op.getChild(), offset, dx, localopts); 1964 if (ret < 0) return ret; 1965 offset = ret; 1966 op = op.next; 1967 } 1968 break; 1969 1970 case Op.CONDITION: 1971 { 1972 Op.ConditionOp cop = (Op.ConditionOp)op; 1973 boolean matchp = false; 1974 if (cop.refNumber > 0) { 1975 if (cop.refNumber >= this.nofparen) 1976 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+cop.refNumber); 1977 matchp = con.match.getBeginning(cop.refNumber) >= 0 1978 && con.match.getEnd(cop.refNumber) >= 0; 1979 } else { 1980 matchp = 0 <= this. matchString (con, cop.condition, offset, dx, opts); 1981 } 1982 1983 if (matchp) { 1984 op = cop.yes; 1985 } else if (cop.no != null) { 1986 op = cop.no; 1987 } else { 1988 op = cop.next; 1989 } 1990 } 1991 break; 1992 1993 default: 1994 throw new RuntimeException ("Unknown operation type: "+op.type); 1995 } } } 1998 1999 private static final int getPreviousWordType(String target, int begin, int end, 2000 int offset, int opts) { 2001 int ret = getWordType(target, begin, end, --offset, opts); 2002 while (ret == WT_IGNORE) 2003 ret = getWordType(target, begin, end, --offset, opts); 2004 return ret; 2005 } 2006 2007 private static final int getWordType(String target, int begin, int end, 2008 int offset, int opts) { 2009 if (offset < begin || offset >= end) return WT_OTHER; 2010 return getWordType0( target .charAt( offset ) , opts); 2011 } 2012 2013 2014 private static final boolean regionMatches(String text, int offset, int limit, 2015 String part, int partlen) { 2016 if (limit-offset < partlen) return false; 2017 return text.regionMatches(offset, part, 0, partlen); 2018 } 2019 2020 private static final boolean regionMatches(String text, int offset, int limit, 2021 int offset2, int partlen) { 2022 if (limit-offset < partlen) return false; 2023 return text.regionMatches(offset, text, offset2, partlen); 2024 } 2025 2026 private static final boolean regionMatchesIgnoreCase(String text, int offset, int limit, 2027 String part, int partlen) { 2028 return text.regionMatches(true, offset, part, 0, partlen); 2029 } 2030 2031 private static final boolean regionMatchesIgnoreCase(String text, int offset, int limit, 2032 int offset2, int partlen) { 2033 if (limit-offset < partlen) return false; 2034 return text.regionMatches(true, offset, text, offset2, partlen); 2035 } 2036 2037 2038 2039 2040 2041 2042 2043 2048 public boolean matches(CharacterIterator target) { 2049 return this.matches(target, (Match)null); 2050 } 2051 2052 2053 2059 public boolean matches(CharacterIterator target, Match match) { 2060 int start = target.getBeginIndex(); 2061 int end = target.getEndIndex(); 2062 2063 2064 2065 synchronized (this) { 2066 if (this.operations == null) 2067 this.prepare(); 2068 if (this.context == null) 2069 this.context = new Context(); 2070 } 2071 Context con = null; 2072 synchronized (this.context) { 2073 con = this.context.inuse ? new Context() : this.context; 2074 con.reset(target, start, end, this.numberOfClosures); 2075 } 2076 if (match != null) { 2077 match.setNumberOfGroups(this.nofparen); 2078 match.setSource(target); 2079 } else if (this.hasBackReferences) { 2080 match = new Match(); 2081 match.setNumberOfGroups(this.nofparen); 2082 } 2085 con.match = match; 2086 2087 if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) { 2088 int matchEnd = this. matchCharacterIterator (con, this.operations, con.start, 1, this.options); 2089 if (matchEnd == con.limit) { 2091 if (con.match != null) { 2092 con.match.setBeginning(0, con.start); 2093 con.match.setEnd(0, matchEnd); 2094 } 2095 con.inuse = false; 2096 return true; 2097 } 2098 return false; 2099 } 2100 2101 2105 if (this.fixedStringOnly) { 2106 int o = this.fixedStringTable.matches(target, con.start, con.limit); 2108 if (o >= 0) { 2109 if (con.match != null) { 2110 con.match.setBeginning(0, o); 2111 con.match.setEnd(0, o+this.fixedString.length()); 2112 } 2113 con.inuse = false; 2114 return true; 2115 } 2116 con.inuse = false; 2117 return false; 2118 } 2119 2120 2125 if (this.fixedString != null) { 2126 int o = this.fixedStringTable.matches(target, con.start, con.limit); 2127 if (o < 0) { 2128 con.inuse = false; 2130 return false; 2131 } 2132 } 2133 2134 int limit = con.limit-this.minlength; 2135 int matchStart; 2136 int matchEnd = -1; 2137 2138 2141 if (this.operations != null 2142 && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { 2143 if (isSet(this.options, SINGLE_LINE)) { 2144 matchStart = con.start; 2145 matchEnd = this. matchCharacterIterator (con, this.operations, con.start, 1, this.options); 2146 } else { 2147 boolean previousIsEOL = true; 2148 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 2149 int ch = target .setIndex( matchStart ) ; 2150 if (isEOLChar(ch)) { 2151 previousIsEOL = true; 2152 } else { 2153 if (previousIsEOL) { 2154 if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, 2155 matchStart, 1, this.options))) 2156 break; 2157 } 2158 previousIsEOL = false; 2159 } 2160 } 2161 } 2162 } 2163 2164 2167 else if (this.firstChar != null) { 2168 RangeToken range = this.firstChar; 2170 if (RegularExpression.isSet(this.options, IGNORE_CASE)) { 2171 range = this.firstChar.getCaseInsensitiveToken(); 2172 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 2173 int ch = target .setIndex( matchStart ) ; 2174 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { 2175 ch = REUtil.composeFromSurrogates(ch, target .setIndex( matchStart+1 ) ); 2176 if (!range.match(ch)) continue; 2177 } else { 2178 if (!range.match(ch)) { 2179 char ch1 = Character.toUpperCase((char)ch); 2180 if (!range.match(ch1)) 2181 if (!range.match(Character.toLowerCase(ch1))) 2182 continue; 2183 } 2184 } 2185 if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, 2186 matchStart, 1, this.options))) 2187 break; 2188 } 2189 } else { 2190 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 2191 int ch = target .setIndex( matchStart ) ; 2192 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) 2193 ch = REUtil.composeFromSurrogates(ch, target .setIndex( matchStart+1 ) ); 2194 if (!range.match(ch)) continue; 2195 if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, 2196 matchStart, 1, this.options))) 2197 break; 2198 } 2199 } 2200 } 2201 2202 2205 else { 2206 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 2207 if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, matchStart, 1, this.options))) 2208 break; 2209 } 2210 } 2211 2212 if (matchEnd >= 0) { 2213 if (con.match != null) { 2214 con.match.setBeginning(0, matchStart); 2215 con.match.setEnd(0, matchEnd); 2216 } 2217 con.inuse = false; 2218 return true; 2219 } else { 2220 con.inuse = false; 2221 return false; 2222 } 2223 } 2224 2225 2228 private int matchCharacterIterator (Context con, Op op, int offset, int dx, int opts) { 2229 2230 2231 CharacterIterator target = con.ciTarget; 2232 2233 2234 2235 2236 2237 2238 while (true) { 2239 if (op == null) 2240 return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; 2241 if (offset > con.limit || offset < con.start) 2242 return -1; 2243 switch (op.type) { 2244 case Op.CHAR: 2245 if (isSet(opts, IGNORE_CASE)) { 2246 int ch = op.getData(); 2247 if (dx > 0) { 2248 if (offset >= con.limit || !matchIgnoreCase(ch, target .setIndex( offset ) )) 2249 return -1; 2250 offset ++; 2251 } else { 2252 int o1 = offset-1; 2253 if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target .setIndex( o1 ) )) 2254 return -1; 2255 offset = o1; 2256 } 2257 } else { 2258 int ch = op.getData(); 2259 if (dx > 0) { 2260 if (offset >= con.limit || ch != target .setIndex( offset ) ) 2261 return -1; 2262 offset ++; 2263 } else { 2264 int o1 = offset-1; 2265 if (o1 >= con.limit || o1 < 0 || ch != target .setIndex( o1 ) ) 2266 return -1; 2267 offset = o1; 2268 } 2269 } 2270 op = op.next; 2271 break; 2272 2273 case Op.DOT: 2274 if (dx > 0) { 2275 if (offset >= con.limit) 2276 return -1; 2277 int ch = target .setIndex( offset ) ; 2278 if (isSet(opts, SINGLE_LINE)) { 2279 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 2280 offset ++; 2281 } else { 2282 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 2283 ch = REUtil.composeFromSurrogates(ch, target .setIndex( ++offset ) ); 2284 if (isEOLChar(ch)) 2285 return -1; 2286 } 2287 offset ++; 2288 } else { 2289 int o1 = offset-1; 2290 if (o1 >= con.limit || o1 < 0) 2291 return -1; 2292 int ch = target .setIndex( o1 ) ; 2293 if (isSet(opts, SINGLE_LINE)) { 2294 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 2295 o1 --; 2296 } else { 2297 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 2298 ch = REUtil.composeFromSurrogates( target .setIndex( --o1 ) , ch); 2299 if (!isEOLChar(ch)) 2300 return -1; 2301 } 2302 offset = o1; 2303 } 2304 op = op.next; 2305 break; 2306 2307 case Op.RANGE: 2308 case Op.NRANGE: 2309 if (dx > 0) { 2310 if (offset >= con.limit) 2311 return -1; 2312 int ch = target .setIndex( offset ) ; 2313 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 2314 ch = REUtil.composeFromSurrogates(ch, target .setIndex( ++offset ) ); 2315 RangeToken tok = op.getToken(); 2316 if (isSet(opts, IGNORE_CASE)) { 2317 tok = tok.getCaseInsensitiveToken(); 2318 if (!tok.match(ch)) { 2319 if (ch >= 0x10000) return -1; 2320 char uch; 2321 if (!tok.match(uch = Character.toUpperCase((char)ch)) 2322 && !tok.match(Character.toLowerCase(uch))) 2323 return -1; 2324 } 2325 } else { 2326 if (!tok.match(ch)) return -1; 2327 } 2328 offset ++; 2329 } else { 2330 int o1 = offset-1; 2331 if (o1 >= con.limit || o1 < 0) 2332 return -1; 2333 int ch = target .setIndex( o1 ) ; 2334 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 2335 ch = REUtil.composeFromSurrogates( target .setIndex( --o1 ) , ch); 2336 RangeToken tok = op.getToken(); 2337 if (isSet(opts, IGNORE_CASE)) { 2338 tok = tok.getCaseInsensitiveToken(); 2339 if (!tok.match(ch)) { 2340 if (ch >= 0x10000) return -1; 2341 char uch; 2342 if (!tok.match(uch = Character.toUpperCase((char)ch)) 2343 && !tok.match(Character.toLowerCase(uch))) 2344 return -1; 2345 } 2346 } else { 2347 if (!tok.match(ch)) return -1; 2348 } 2349 offset = o1; 2350 } 2351 op = op.next; 2352 break; 2353 2354 case Op.ANCHOR: 2355 boolean go = false; 2356 switch (op.getData()) { 2357 case '^': 2358 if (isSet(opts, MULTIPLE_LINES)) { 2359 if (!(offset == con.start 2360 || offset > con.start && isEOLChar( target .setIndex( offset-1 ) ))) 2361 return -1; 2362 } else { 2363 if (offset != con.start) 2364 return -1; 2365 } 2366 break; 2367 2368 case '@': if (!(offset == con.start 2371 || offset > con.start && isEOLChar( target .setIndex( offset-1 ) ))) 2372 return -1; 2373 break; 2374 2375 case '$': 2376 if (isSet(opts, MULTIPLE_LINES)) { 2377 if (!(offset == con.limit 2378 || offset < con.limit && isEOLChar( target .setIndex( offset ) ))) 2379 return -1; 2380 } else { 2381 if (!(offset == con.limit 2382 || offset+1 == con.limit && isEOLChar( target .setIndex( offset ) ) 2383 || offset+2 == con.limit && target .setIndex( offset ) == CARRIAGE_RETURN 2384 && target .setIndex( offset+1 ) == LINE_FEED)) 2385 return -1; 2386 } 2387 break; 2388 2389 case 'A': 2390 if (offset != con.start) return -1; 2391 break; 2392 2393 case 'Z': 2394 if (!(offset == con.limit 2395 || offset+1 == con.limit && isEOLChar( target .setIndex( offset ) ) 2396 || offset+2 == con.limit && target .setIndex( offset ) == CARRIAGE_RETURN 2397 && target .setIndex( offset+1 ) == LINE_FEED)) 2398 return -1; 2399 break; 2400 2401 case 'z': 2402 if (offset != con.limit) return -1; 2403 break; 2404 2405 case 'b': 2406 if (con.length == 0) return -1; 2407 { 2408 int after = getWordType(target, con.start, con.limit, offset, opts); 2409 if (after == WT_IGNORE) return -1; 2410 int before = getPreviousWordType(target, con.start, con.limit, offset, opts); 2411 if (after == before) return -1; 2412 } 2413 break; 2414 2415 case 'B': 2416 if (con.length == 0) 2417 go = true; 2418 else { 2419 int after = getWordType(target, con.start, con.limit, offset, opts); 2420 go = after == WT_IGNORE 2421 || after == getPreviousWordType(target, con.start, con.limit, offset, opts); 2422 } 2423 if (!go) return -1; 2424 break; 2425 2426 case '<': 2427 if (con.length == 0 || offset == con.limit) return -1; 2428 if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER 2429 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) 2430 return -1; 2431 break; 2432 2433 case '>': 2434 if (con.length == 0 || offset == con.start) return -1; 2435 if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER 2436 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) 2437 return -1; 2438 break; 2439 } op = op.next; 2441 break; 2442 2443 case Op.BACKREFERENCE: 2444 { 2445 int refno = op.getData(); 2446 if (refno <= 0 || refno >= this.nofparen) 2447 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+refno); 2448 if (con.match.getBeginning(refno) < 0 2449 || con.match.getEnd(refno) < 0) 2450 return -1; int o2 = con.match.getBeginning(refno); 2452 int literallen = con.match.getEnd(refno)-o2; 2453 if (!isSet(opts, IGNORE_CASE)) { 2454 if (dx > 0) { 2455 if (!regionMatches(target, offset, con.limit, o2, literallen)) 2456 return -1; 2457 offset += literallen; 2458 } else { 2459 if (!regionMatches(target, offset-literallen, con.limit, o2, literallen)) 2460 return -1; 2461 offset -= literallen; 2462 } 2463 } else { 2464 if (dx > 0) { 2465 if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen)) 2466 return -1; 2467 offset += literallen; 2468 } else { 2469 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 2470 o2, literallen)) 2471 return -1; 2472 offset -= literallen; 2473 } 2474 } 2475 } 2476 op = op.next; 2477 break; 2478 case Op.STRING: 2479 { 2480 String literal = op.getString(); 2481 int literallen = literal.length(); 2482 if (!isSet(opts, IGNORE_CASE)) { 2483 if (dx > 0) { 2484 if (!regionMatches(target, offset, con.limit, literal, literallen)) 2485 return -1; 2486 offset += literallen; 2487 } else { 2488 if (!regionMatches(target, offset-literallen, con.limit, literal, literallen)) 2489 return -1; 2490 offset -= literallen; 2491 } 2492 } else { 2493 if (dx > 0) { 2494 if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen)) 2495 return -1; 2496 offset += literallen; 2497 } else { 2498 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 2499 literal, literallen)) 2500 return -1; 2501 offset -= literallen; 2502 } 2503 } 2504 } 2505 op = op.next; 2506 break; 2507 2508 case Op.CLOSURE: 2509 { 2510 2514 int id = op.getData(); 2515 if (id >= 0) { 2516 int previousOffset = con.offsets[id]; 2517 if (previousOffset < 0 || previousOffset != offset) { 2518 con.offsets[id] = offset; 2519 } else { 2520 con.offsets[id] = -1; 2521 op = op.next; 2522 break; 2523 } 2524 } 2525 2526 int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts); 2527 if (id >= 0) con.offsets[id] = -1; 2528 if (ret >= 0) return ret; 2529 op = op.next; 2530 } 2531 break; 2532 2533 case Op.QUESTION: 2534 { 2535 int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts); 2536 if (ret >= 0) return ret; 2537 op = op.next; 2538 } 2539 break; 2540 2541 case Op.NONGREEDYCLOSURE: 2542 case Op.NONGREEDYQUESTION: 2543 { 2544 int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts); 2545 if (ret >= 0) return ret; 2546 op = op.getChild(); 2547 } 2548 break; 2549 2550 case Op.UNION: 2551 for (int i = 0; i < op.size(); i ++) { 2552 int ret = this. matchCharacterIterator (con, op.elementAt(i), offset, dx, opts); 2553 if (DEBUG) { 2554 System.err.println("UNION: "+i+", ret="+ret); 2555 } 2556 if (ret >= 0) return ret; 2557 } 2558 return -1; 2559 2560 case Op.CAPTURE: 2561 int refno = op.getData(); 2562 if (con.match != null && refno > 0) { 2563 int save = con.match.getBeginning(refno); 2564 con.match.setBeginning(refno, offset); 2565 int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts); 2566 if (ret < 0) con.match.setBeginning(refno, save); 2567 return ret; 2568 } else if (con.match != null && refno < 0) { 2569 int index = -refno; 2570 int save = con.match.getEnd(index); 2571 con.match.setEnd(index, offset); 2572 int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts); 2573 if (ret < 0) con.match.setEnd(index, save); 2574 return ret; 2575 } 2576 op = op.next; 2577 break; 2578 2579 case Op.LOOKAHEAD: 2580 if (0 > this. matchCharacterIterator (con, op.getChild(), offset, 1, opts)) return -1; 2581 op = op.next; 2582 break; 2583 case Op.NEGATIVELOOKAHEAD: 2584 if (0 <= this. matchCharacterIterator (con, op.getChild(), offset, 1, opts)) return -1; 2585 op = op.next; 2586 break; 2587 case Op.LOOKBEHIND: 2588 if (0 > this. matchCharacterIterator (con, op.getChild(), offset, -1, opts)) return -1; 2589 op = op.next; 2590 break; 2591 case Op.NEGATIVELOOKBEHIND: 2592 if (0 <= this. matchCharacterIterator (con, op.getChild(), offset, -1, opts)) return -1; 2593 op = op.next; 2594 break; 2595 2596 case Op.INDEPENDENT: 2597 { 2598 int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts); 2599 if (ret < 0) return ret; 2600 offset = ret; 2601 op = op.next; 2602 } 2603 break; 2604 2605 case Op.MODIFIER: 2606 { 2607 int localopts = opts; 2608 localopts |= op.getData(); 2609 localopts &= ~op.getData2(); 2610 int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, localopts); 2612 if (ret < 0) return ret; 2613 offset = ret; 2614 op = op.next; 2615 } 2616 break; 2617 2618 case Op.CONDITION: 2619 { 2620 Op.ConditionOp cop = (Op.ConditionOp)op; 2621 boolean matchp = false; 2622 if (cop.refNumber > 0) { 2623 if (cop.refNumber >= this.nofparen) 2624 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+cop.refNumber); 2625 matchp = con.match.getBeginning(cop.refNumber) >= 0 2626 && con.match.getEnd(cop.refNumber) >= 0; 2627 } else { 2628 matchp = 0 <= this. matchCharacterIterator (con, cop.condition, offset, dx, opts); 2629 } 2630 2631 if (matchp) { 2632 op = cop.yes; 2633 } else if (cop.no != null) { 2634 op = cop.no; 2635 } else { 2636 op = cop.next; 2637 } 2638 } 2639 break; 2640 2641 default: 2642 throw new RuntimeException ("Unknown operation type: "+op.type); 2643 } } } 2646 2647 private static final int getPreviousWordType(CharacterIterator target, int begin, int end, 2648 int offset, int opts) { 2649 int ret = getWordType(target, begin, end, --offset, opts); 2650 while (ret == WT_IGNORE) 2651 ret = getWordType(target, begin, end, --offset, opts); 2652 return ret; 2653 } 2654 2655 private static final int getWordType(CharacterIterator target, int begin, int end, 2656 int offset, int opts) { 2657 if (offset < begin || offset >= end) return WT_OTHER; 2658 return getWordType0( target .setIndex( offset ) , opts); 2659 } 2660 2661 2662 2663 private static final boolean regionMatches(CharacterIterator target, int offset, int limit, 2664 String part, int partlen) { 2665 if (offset < 0) return false; 2666 if (limit-offset < partlen) 2667 return false; 2668 int i = 0; 2669 while (partlen-- > 0) { 2670 if ( target .setIndex( offset++ ) != part.charAt(i++)) 2671 return false; 2672 } 2673 return true; 2674 } 2675 2676 private static final boolean regionMatches(CharacterIterator target, int offset, int limit, 2677 int offset2, int partlen) { 2678 if (offset < 0) return false; 2679 if (limit-offset < partlen) 2680 return false; 2681 int i = offset2; 2682 while (partlen-- > 0) { 2683 if ( target .setIndex( offset++ ) != target .setIndex( i++ ) ) 2684 return false; 2685 } 2686 return true; 2687 } 2688 2689 2692 private static final boolean regionMatchesIgnoreCase(CharacterIterator target, int offset, int limit, 2693 String part, int partlen) { 2694 if (offset < 0) return false; 2695 if (limit-offset < partlen) 2696 return false; 2697 int i = 0; 2698 while (partlen-- > 0) { 2699 char ch1 = target .setIndex( offset++ ) ; 2700 char ch2 = part.charAt(i++); 2701 if (ch1 == ch2) 2702 continue; 2703 char uch1 = Character.toUpperCase(ch1); 2704 char uch2 = Character.toUpperCase(ch2); 2705 if (uch1 == uch2) 2706 continue; 2707 if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) 2708 return false; 2709 } 2710 return true; 2711 } 2712 2713 private static final boolean regionMatchesIgnoreCase(CharacterIterator target, int offset, int limit, 2714 int offset2, int partlen) { 2715 if (offset < 0) return false; 2716 if (limit-offset < partlen) 2717 return false; 2718 int i = offset2; 2719 while (partlen-- > 0) { 2720 char ch1 = target .setIndex( offset++ ) ; 2721 char ch2 = target .setIndex( i++ ) ; 2722 if (ch1 == ch2) 2723 continue; 2724 char uch1 = Character.toUpperCase(ch1); 2725 char uch2 = Character.toUpperCase(ch2); 2726 if (uch1 == uch2) 2727 continue; 2728 if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) 2729 return false; 2730 } 2731 return true; 2732 } 2733 2734 2735 2736 2737 2739 2743 String regex; 2744 2747 int options; 2748 2749 2753 int nofparen; 2754 2758 Token tokentree; 2759 2760 boolean hasBackReferences = false; 2761 2762 transient int minlength; 2763 transient Op operations = null; 2764 transient int numberOfClosures; 2765 transient Context context = null; 2766 transient RangeToken firstChar = null; 2767 2768 transient String fixedString = null; 2769 transient int fixedStringOptions; 2770 transient BMPattern fixedStringTable = null; 2771 transient boolean fixedStringOnly = false; 2772 2773 2774 static final class Context { 2775 CharacterIterator ciTarget; 2776 String strTarget; 2777 char[] charTarget; 2778 int start; 2779 int limit; 2780 int length; 2781 Match match; 2782 boolean inuse = false; 2783 int[] offsets; 2784 2785 Context() { 2786 } 2787 2788 private void resetCommon(int nofclosures) { 2789 this.length = this.limit-this.start; 2790 this.inuse = true; 2791 this.match = null; 2792 if (this.offsets == null || this.offsets.length != nofclosures) 2793 this.offsets = new int[nofclosures]; 2794 for (int i = 0; i < nofclosures; i ++) this.offsets[i] = -1; 2795 } 2796 void reset(CharacterIterator target, int start, int limit, int nofclosures) { 2797 this.ciTarget = target; 2798 this.start = start; 2799 this.limit = limit; 2800 this.resetCommon(nofclosures); 2801 } 2802 void reset(String target, int start, int limit, int nofclosures) { 2803 this.strTarget = target; 2804 this.start = start; 2805 this.limit = limit; 2806 this.resetCommon(nofclosures); 2807 } 2808 void reset(char[] target, int start, int limit, int nofclosures) { 2809 this.charTarget = target; 2810 this.start = start; 2811 this.limit = limit; 2812 this.resetCommon(nofclosures); 2813 } 2814 } 2815 2816 2819 void prepare() { 2820 if (Op.COUNT) Op.nofinstances = 0; 2821 this.compile(this.tokentree); 2822 2829 if (Op.COUNT) System.err.println("DEBUG: The number of operations: "+Op.nofinstances); 2830 2831 this.minlength = this.tokentree.getMinLength(); 2832 2833 this.firstChar = null; 2834 if (!isSet(this.options, PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) 2835 && !isSet(this.options, XMLSCHEMA_MODE)) { 2836 RangeToken firstChar = Token.createRange(); 2837 int fresult = this.tokentree.analyzeFirstCharacter(firstChar, this.options); 2838 if (fresult == Token.FC_TERMINAL) { 2839 firstChar.compactRanges(); 2840 this.firstChar = firstChar; 2841 if (DEBUG) 2842 System.err.println("DEBUG: Use the first character optimization: "+firstChar); 2843 } 2844 } 2845 2846 if (this.operations != null 2847 && (this.operations.type == Op.STRING || this.operations.type == Op.CHAR) 2848 && this.operations.next == null) { 2849 if (DEBUG) 2850 System.err.print(" *** Only fixed string! *** "); 2851 this.fixedStringOnly = true; 2852 if (this.operations.type == Op.STRING) 2853 this.fixedString = this.operations.getString(); 2854 else if (this.operations.getData() >= 0x10000) { this.fixedString = REUtil.decomposeToSurrogates(this.operations.getData()); 2856 } else { 2857 char[] ac = new char[1]; 2858 ac[0] = (char)this.operations.getData(); 2859 this.fixedString = new String (ac); 2860 } 2861 this.fixedStringOptions = this.options; 2862 this.fixedStringTable = new BMPattern(this.fixedString, 256, 2863 isSet(this.fixedStringOptions, IGNORE_CASE)); 2864 } else if (!isSet(this.options, PROHIBIT_FIXED_STRING_OPTIMIZATION) 2865 && !isSet(this.options, XMLSCHEMA_MODE)) { 2866 Token.FixedStringContainer container = new Token.FixedStringContainer(); 2867 this.tokentree.findFixedString(container, this.options); 2868 this.fixedString = container.token == null ? null : container.token.getString(); 2869 this.fixedStringOptions = container.options; 2870 if (this.fixedString != null && this.fixedString.length() < 2) 2871 this.fixedString = null; 2872 if (this.fixedString != null) { 2874 this.fixedStringTable = new BMPattern(this.fixedString, 256, 2875 isSet(this.fixedStringOptions, IGNORE_CASE)); 2876 if (DEBUG) { 2877 System.err.println("DEBUG: The longest fixed string: "+this.fixedString.length() 2878 +"/" +"/"+REUtil.createOptionString(this.fixedStringOptions)); 2880 System.err.print("String: "); 2881 REUtil.dumpString(this.fixedString); 2882 } 2883 } 2884 } 2885 } 2886 2887 2897 2898 2901 static final int IGNORE_CASE = 1<<1; 2902 2903 2906 static final int SINGLE_LINE = 1<<2; 2907 2908 2911 static final int MULTIPLE_LINES = 1<<3; 2912 2913 2916 static final int EXTENDED_COMMENT = 1<<4; 2917 2918 2925 static final int USE_UNICODE_CATEGORY = 1<<5; 2927 2939 static final int UNICODE_WORD_BOUNDARY = 1<<6; 2941 2944 static final int PROHIBIT_HEAD_CHARACTER_OPTIMIZATION = 1<<7; 2945 2948 static final int PROHIBIT_FIXED_STRING_OPTIMIZATION = 1<<8; 2949 2952 static final int XMLSCHEMA_MODE = 1<<9; 2953 2956 static final int SPECIAL_COMMA = 1<<10; 2957 2958 2959 private static final boolean isSet(int options, int flag) { 2960 return (options & flag) == flag; 2961 } 2962 2963 2969 public RegularExpression(String regex) throws ParseException { 2970 this.setPattern(regex, null); 2971 } 2972 2973 2980 public RegularExpression(String regex, String options) throws ParseException { 2981 this.setPattern(regex, options); 2982 } 2983 2984 RegularExpression(String regex, Token tok, int parens, boolean hasBackReferences, int options) { 2985 this.regex = regex; 2986 this.tokentree = tok; 2987 this.nofparen = parens; 2988 this.options = options; 2989 this.hasBackReferences = hasBackReferences; 2990 } 2991 2992 2995 public void setPattern(String newPattern) throws ParseException { 2996 this.setPattern(newPattern, this.options); 2997 } 2998 2999 private void setPattern(String newPattern, int options) throws ParseException { 3000 this.regex = newPattern; 3001 this.options = options; 3002 RegexParser rp = RegularExpression.isSet(this.options, RegularExpression.XMLSCHEMA_MODE) 3003 ? new ParserForXMLSchema() : new RegexParser(); 3004 this.tokentree = rp.parse(this.regex, this.options); 3005 this.nofparen = rp.parennumber; 3006 this.hasBackReferences = rp.hasBackReferences; 3007 3008 this.operations = null; 3009 this.context = null; 3010 } 3011 3014 public void setPattern(String newPattern, String options) throws ParseException { 3015 this.setPattern(newPattern, REUtil.parseOptions(options)); 3016 } 3017 3018 3021 public String getPattern() { 3022 return this.regex; 3023 } 3024 3025 3028 public String toString() { 3029 return this.tokentree.toString(this.options); 3030 } 3031 3032 3040 public String getOptions() { 3041 return REUtil.createOptionString(this.options); 3042 } 3043 3044 3047 public boolean equals(Object obj) { 3048 if (obj == null) return false; 3049 if (!(obj instanceof RegularExpression)) 3050 return false; 3051 RegularExpression r = (RegularExpression)obj; 3052 return this.regex.equals(r.regex) && this.options == r.options; 3053 } 3054 3055 boolean equals(String pattern, int options) { 3056 return this.regex.equals(pattern) && this.options == options; 3057 } 3058 3059 3062 public int hashCode() { 3063 return (this.regex+"/"+this.getOptions()).hashCode(); 3064 } 3065 3066 3071 public int getNumberOfGroups() { 3072 return this.nofparen; 3073 } 3074 3075 3077 private static final int WT_IGNORE = 0; 3078 private static final int WT_LETTER = 1; 3079 private static final int WT_OTHER = 2; 3080 private static final int getWordType0(char ch, int opts) { 3081 if (!isSet(opts, UNICODE_WORD_BOUNDARY)) { 3082 if (isSet(opts, USE_UNICODE_CATEGORY)) { 3083 return (Token.getRange("IsWord", true).match(ch)) ? WT_LETTER : WT_OTHER; 3084 } 3085 return isWordChar(ch) ? WT_LETTER : WT_OTHER; 3086 } 3087 3088 switch (Character.getType(ch)) { 3089 case Character.UPPERCASE_LETTER: case Character.LOWERCASE_LETTER: case Character.TITLECASE_LETTER: case Character.MODIFIER_LETTER: case Character.OTHER_LETTER: case Character.LETTER_NUMBER: case Character.DECIMAL_DIGIT_NUMBER: case Character.OTHER_NUMBER: case Character.COMBINING_SPACING_MARK: return WT_LETTER; 3099 3100 case Character.FORMAT: case Character.NON_SPACING_MARK: case Character.ENCLOSING_MARK: return WT_IGNORE; 3104 3105 case Character.CONTROL: switch (ch) { 3107 case '\t': 3108 case '\n': 3109 case '\u000B': 3110 case '\f': 3111 case '\r': 3112 return WT_OTHER; 3113 default: 3114 return WT_IGNORE; 3115 } 3116 3117 default: 3118 return WT_OTHER; 3119 } 3120 } 3121 3122 3124 static final int LINE_FEED = 0x000A; 3125 static final int CARRIAGE_RETURN = 0x000D; 3126 static final int LINE_SEPARATOR = 0x2028; 3127 static final int PARAGRAPH_SEPARATOR = 0x2029; 3128 3129 private static final boolean isEOLChar(int ch) { 3130 return ch == LINE_FEED || ch == CARRIAGE_RETURN || ch == LINE_SEPARATOR 3131 || ch == PARAGRAPH_SEPARATOR; 3132 } 3133 3134 private static final boolean isWordChar(int ch) { if (ch == '_') return true; 3136 if (ch < '0') return false; 3137 if (ch > 'z') return false; 3138 if (ch <= '9') return true; 3139 if (ch < 'A') return false; 3140 if (ch <= 'Z') return true; 3141 if (ch < 'a') return false; 3142 return true; 3143 } 3144 3145 private static final boolean matchIgnoreCase(int chardata, int ch) { 3146 if (chardata == ch) return true; 3147 if (chardata > 0xffff || ch > 0xffff) return false; 3148 char uch1 = Character.toUpperCase((char)chardata); 3149 char uch2 = Character.toUpperCase((char)ch); 3150 if (uch1 == uch2) return true; 3151 return Character.toLowerCase(uch1) == Character.toLowerCase(uch2); 3152 } 3153} 3154 | Popular Tags |