1 57 58 package com.sun.org.apache.xerces.internal.impl.xpath.regex; 59 60 import java.text.CharacterIterator ; 61 62 522 public class RegularExpression implements java.io.Serializable { 523 static final boolean DEBUG = false; 524 525 528 private synchronized void compile(Token tok) { 529 if (this.operations != null) 530 return; 531 this.numberOfClosures = 0; 532 this.operations = this.compile(tok, null, false); 533 } 534 535 538 private Op compile(Token tok, Op next, boolean reverse) { 539 Op ret; 540 switch (tok.type) { 541 case Token.DOT: 542 ret = Op.createDot(); 543 ret.next = next; 544 break; 545 546 case Token.CHAR: 547 ret = Op.createChar(tok.getChar()); 548 ret.next = next; 549 break; 550 551 case Token.ANCHOR: 552 ret = Op.createAnchor(tok.getChar()); 553 ret.next = next; 554 break; 555 556 case Token.RANGE: 557 case Token.NRANGE: 558 ret = Op.createRange(tok); 559 ret.next = next; 560 break; 561 562 case Token.CONCAT: 563 ret = next; 564 if (!reverse) { 565 for (int i = tok.size()-1; i >= 0; i --) { 566 ret = compile(tok.getChild(i), ret, false); 567 } 568 } else { 569 for (int i = 0; i < tok.size(); i ++) { 570 ret = compile(tok.getChild(i), ret, true); 571 } 572 } 573 break; 574 575 case Token.UNION: 576 Op.UnionOp uni = Op.createUnion(tok.size()); 577 for (int i = 0; i < tok.size(); i ++) { 578 uni.addElement(compile(tok.getChild(i), next, reverse)); 579 } 580 ret = uni; break; 582 583 case Token.CLOSURE: 584 case Token.NONGREEDYCLOSURE: 585 Token child = tok.getChild(0); 586 int min = tok.getMin(); 587 int max = tok.getMax(); 588 if (min >= 0 && min == max) { ret = next; 590 for (int i = 0; i < min; i ++) { 591 ret = compile(child, ret, reverse); 592 } 593 break; 594 } 595 if (min > 0 && max > 0) 596 max -= min; 597 if (max > 0) { 598 ret = next; 600 for (int i = 0; i < max; i ++) { 601 Op.ChildOp q = Op.createQuestion(tok.type == Token.NONGREEDYCLOSURE); 602 q.next = next; 603 q.setChild(compile(child, ret, reverse)); 604 ret = q; 605 } 606 } else { 607 Op.ChildOp op; 608 if (tok.type == Token.NONGREEDYCLOSURE) { 609 op = Op.createNonGreedyClosure(); 610 } else { if (child.getMinLength() == 0) 612 op = Op.createClosure(this.numberOfClosures++); 613 else 614 op = Op.createClosure(-1); 615 } 616 op.next = next; 617 op.setChild(compile(child, op, reverse)); 618 ret = op; 619 } 620 if (min > 0) { 621 for (int i = 0; i < min; i ++) { 622 ret = compile(child, ret, reverse); 623 } 624 } 625 break; 626 627 case Token.EMPTY: 628 ret = next; 629 break; 630 631 case Token.STRING: 632 ret = Op.createString(tok.getString()); 633 ret.next = next; 634 break; 635 636 case Token.BACKREFERENCE: 637 ret = Op.createBackReference(tok.getReferenceNumber()); 638 ret.next = next; 639 break; 640 641 case Token.PAREN: 642 if (tok.getParenNumber() == 0) { 643 ret = compile(tok.getChild(0), next, reverse); 644 } else if (reverse) { 645 next = Op.createCapture(tok.getParenNumber(), next); 646 next = compile(tok.getChild(0), next, reverse); 647 ret = Op.createCapture(-tok.getParenNumber(), next); 648 } else { 649 next = Op.createCapture(-tok.getParenNumber(), next); 650 next = compile(tok.getChild(0), next, reverse); 651 ret = Op.createCapture(tok.getParenNumber(), next); 652 } 653 break; 654 655 case Token.LOOKAHEAD: 656 ret = Op.createLook(Op.LOOKAHEAD, next, compile(tok.getChild(0), null, false)); 657 break; 658 case Token.NEGATIVELOOKAHEAD: 659 ret = Op.createLook(Op.NEGATIVELOOKAHEAD, next, compile(tok.getChild(0), null, false)); 660 break; 661 case Token.LOOKBEHIND: 662 ret = Op.createLook(Op.LOOKBEHIND, next, compile(tok.getChild(0), null, true)); 663 break; 664 case Token.NEGATIVELOOKBEHIND: 665 ret = Op.createLook(Op.NEGATIVELOOKBEHIND, next, compile(tok.getChild(0), null, true)); 666 break; 667 668 case Token.INDEPENDENT: 669 ret = Op.createIndependent(next, compile(tok.getChild(0), null, reverse)); 670 break; 671 672 case Token.MODIFIERGROUP: 673 ret = Op.createModifier(next, compile(tok.getChild(0), null, reverse), 674 ((Token.ModifierToken)tok).getOptions(), 675 ((Token.ModifierToken)tok).getOptionsMask()); 676 break; 677 678 case Token.CONDITION: 679 Token.ConditionToken ctok = (Token.ConditionToken)tok; 680 int ref = ctok.refNumber; 681 Op condition = ctok.condition == null ? null : compile(ctok.condition, null, reverse); 682 Op yes = compile(ctok.yes, next, reverse); 683 Op no = ctok.no == null ? null : compile(ctok.no, next, reverse); 684 ret = Op.createCondition(next, ref, condition, yes, no); 685 break; 686 687 default: 688 throw new RuntimeException ("Unknown token type: "+tok.type); 689 } return ret; 691 } 692 693 694 696 701 public boolean matches(char[] target) { 702 return this.matches(target, 0, target .length , (Match)null); 703 } 704 705 713 public boolean matches(char[] target, int start, int end) { 714 return this.matches(target, start, end, (Match)null); 715 } 716 717 723 public boolean matches(char[] target, Match match) { 724 return this.matches(target, 0, target .length , match); 725 } 726 727 728 737 public boolean matches(char[] target, int start, int end, Match match) { 738 739 synchronized (this) { 740 if (this.operations == null) 741 this.prepare(); 742 if (this.context == null) 743 this.context = new Context(); 744 } 745 Context con = null; 746 synchronized (this.context) { 747 con = this.context.inuse ? new Context() : this.context; 748 con.reset(target, start, end, this.numberOfClosures); 749 } 750 if (match != null) { 751 match.setNumberOfGroups(this.nofparen); 752 match.setSource(target); 753 } else if (this.hasBackReferences) { 754 match = new Match(); 755 match.setNumberOfGroups(this.nofparen); 756 } 759 con.match = match; 760 761 if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) { 762 int matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options); 763 if (matchEnd == con.limit) { 765 if (con.match != null) { 766 con.match.setBeginning(0, con.start); 767 con.match.setEnd(0, matchEnd); 768 } 769 con.inuse = false; 770 return true; 771 } 772 return false; 773 } 774 775 779 if (this.fixedStringOnly) { 780 int o = this.fixedStringTable.matches(target, con.start, con.limit); 782 if (o >= 0) { 783 if (con.match != null) { 784 con.match.setBeginning(0, o); 785 con.match.setEnd(0, o+this.fixedString.length()); 786 } 787 con.inuse = false; 788 return true; 789 } 790 con.inuse = false; 791 return false; 792 } 793 794 799 if (this.fixedString != null) { 800 int o = this.fixedStringTable.matches(target, con.start, con.limit); 801 if (o < 0) { 802 con.inuse = false; 804 return false; 805 } 806 } 807 808 int limit = con.limit-this.minlength; 809 int matchStart; 810 int matchEnd = -1; 811 812 815 if (this.operations != null 816 && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { 817 if (isSet(this.options, SINGLE_LINE)) { 818 matchStart = con.start; 819 matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options); 820 } else { 821 boolean previousIsEOL = true; 822 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 823 int ch = target [ matchStart ] ; 824 if (isEOLChar(ch)) { 825 previousIsEOL = true; 826 } else { 827 if (previousIsEOL) { 828 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, 829 matchStart, 1, this.options))) 830 break; 831 } 832 previousIsEOL = false; 833 } 834 } 835 } 836 } 837 838 841 else if (this.firstChar != null) { 842 RangeToken range = this.firstChar; 844 if (RegularExpression.isSet(this.options, IGNORE_CASE)) { 845 range = this.firstChar.getCaseInsensitiveToken(); 846 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 847 int ch = target [ matchStart ] ; 848 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { 849 ch = REUtil.composeFromSurrogates(ch, target [ matchStart+1 ] ); 850 if (!range.match(ch)) continue; 851 } else { 852 if (!range.match(ch)) { 853 char ch1 = Character.toUpperCase((char)ch); 854 if (!range.match(ch1)) 855 if (!range.match(Character.toLowerCase(ch1))) 856 continue; 857 } 858 } 859 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, 860 matchStart, 1, this.options))) 861 break; 862 } 863 } else { 864 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 865 int ch = target [ matchStart ] ; 866 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) 867 ch = REUtil.composeFromSurrogates(ch, target [ matchStart+1 ] ); 868 if (!range.match(ch)) continue; 869 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, 870 matchStart, 1, this.options))) 871 break; 872 } 873 } 874 } 875 876 879 else { 880 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 881 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, matchStart, 1, this.options))) 882 break; 883 } 884 } 885 886 if (matchEnd >= 0) { 887 if (con.match != null) { 888 con.match.setBeginning(0, matchStart); 889 con.match.setEnd(0, matchEnd); 890 } 891 con.inuse = false; 892 return true; 893 } else { 894 con.inuse = false; 895 return false; 896 } 897 } 898 899 902 private int matchCharArray (Context con, Op op, int offset, int dx, int opts) { 903 904 char[] target = con.charTarget; 905 906 907 while (true) { 908 if (op == null) 909 return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; 910 if (offset > con.limit || offset < con.start) 911 return -1; 912 switch (op.type) { 913 case Op.CHAR: 914 if (isSet(opts, IGNORE_CASE)) { 915 int ch = op.getData(); 916 if (dx > 0) { 917 if (offset >= con.limit || !matchIgnoreCase(ch, target [ offset ] )) 918 return -1; 919 offset ++; 920 } else { 921 int o1 = offset-1; 922 if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target [ o1 ] )) 923 return -1; 924 offset = o1; 925 } 926 } else { 927 int ch = op.getData(); 928 if (dx > 0) { 929 if (offset >= con.limit || ch != target [ offset ] ) 930 return -1; 931 offset ++; 932 } else { 933 int o1 = offset-1; 934 if (o1 >= con.limit || o1 < 0 || ch != target [ o1 ] ) 935 return -1; 936 offset = o1; 937 } 938 } 939 op = op.next; 940 break; 941 942 case Op.DOT: 943 if (dx > 0) { 944 if (offset >= con.limit) 945 return -1; 946 int ch = target [ offset ] ; 947 if (isSet(opts, SINGLE_LINE)) { 948 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 949 offset ++; 950 } else { 951 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 952 ch = REUtil.composeFromSurrogates(ch, target [ ++offset ] ); 953 if (isEOLChar(ch)) 954 return -1; 955 } 956 offset ++; 957 } else { 958 int o1 = offset-1; 959 if (o1 >= con.limit || o1 < 0) 960 return -1; 961 int ch = target [ o1 ] ; 962 if (isSet(opts, SINGLE_LINE)) { 963 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 964 o1 --; 965 } else { 966 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 967 ch = REUtil.composeFromSurrogates( target [ --o1 ] , ch); 968 if (!isEOLChar(ch)) 969 return -1; 970 } 971 offset = o1; 972 } 973 op = op.next; 974 break; 975 976 case Op.RANGE: 977 case Op.NRANGE: 978 if (dx > 0) { 979 if (offset >= con.limit) 980 return -1; 981 int ch = target [ offset ] ; 982 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 983 ch = REUtil.composeFromSurrogates(ch, target [ ++offset ] ); 984 RangeToken tok = op.getToken(); 985 if (isSet(opts, IGNORE_CASE)) { 986 tok = tok.getCaseInsensitiveToken(); 987 if (!tok.match(ch)) { 988 if (ch >= 0x10000) return -1; 989 char uch; 990 if (!tok.match(uch = Character.toUpperCase((char)ch)) 991 && !tok.match(Character.toLowerCase(uch))) 992 return -1; 993 } 994 } else { 995 if (!tok.match(ch)) return -1; 996 } 997 offset ++; 998 } else { 999 int o1 = offset-1; 1000 if (o1 >= con.limit || o1 < 0) 1001 return -1; 1002 int ch = target [ o1 ] ; 1003 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 1004 ch = REUtil.composeFromSurrogates( target [ --o1 ] , ch); 1005 RangeToken tok = op.getToken(); 1006 if (isSet(opts, IGNORE_CASE)) { 1007 tok = tok.getCaseInsensitiveToken(); 1008 if (!tok.match(ch)) { 1009 if (ch >= 0x10000) return -1; 1010 char uch; 1011 if (!tok.match(uch = Character.toUpperCase((char)ch)) 1012 && !tok.match(Character.toLowerCase(uch))) 1013 return -1; 1014 } 1015 } else { 1016 if (!tok.match(ch)) return -1; 1017 } 1018 offset = o1; 1019 } 1020 op = op.next; 1021 break; 1022 1023 case Op.ANCHOR: 1024 boolean go = false; 1025 switch (op.getData()) { 1026 case '^': 1027 if (isSet(opts, MULTIPLE_LINES)) { 1028 if (!(offset == con.start 1029 || offset > con.start && isEOLChar( target [ offset-1 ] ))) 1030 return -1; 1031 } else { 1032 if (offset != con.start) 1033 return -1; 1034 } 1035 break; 1036 1037 case '@': if (!(offset == con.start 1040 || offset > con.start && isEOLChar( target [ offset-1 ] ))) 1041 return -1; 1042 break; 1043 1044 case '$': 1045 if (isSet(opts, MULTIPLE_LINES)) { 1046 if (!(offset == con.limit 1047 || offset < con.limit && isEOLChar( target [ offset ] ))) 1048 return -1; 1049 } else { 1050 if (!(offset == con.limit 1051 || offset+1 == con.limit && isEOLChar( target [ offset ] ) 1052 || offset+2 == con.limit && target [ offset ] == CARRIAGE_RETURN 1053 && target [ offset+1 ] == LINE_FEED)) 1054 return -1; 1055 } 1056 break; 1057 1058 case 'A': 1059 if (offset != con.start) return -1; 1060 break; 1061 1062 case 'Z': 1063 if (!(offset == con.limit 1064 || offset+1 == con.limit && isEOLChar( target [ offset ] ) 1065 || offset+2 == con.limit && target [ offset ] == CARRIAGE_RETURN 1066 && target [ offset+1 ] == LINE_FEED)) 1067 return -1; 1068 break; 1069 1070 case 'z': 1071 if (offset != con.limit) return -1; 1072 break; 1073 1074 case 'b': 1075 if (con.length == 0) return -1; 1076 { 1077 int after = getWordType(target, con.start, con.limit, offset, opts); 1078 if (after == WT_IGNORE) return -1; 1079 int before = getPreviousWordType(target, con.start, con.limit, offset, opts); 1080 if (after == before) return -1; 1081 } 1082 break; 1083 1084 case 'B': 1085 if (con.length == 0) 1086 go = true; 1087 else { 1088 int after = getWordType(target, con.start, con.limit, offset, opts); 1089 go = after == WT_IGNORE 1090 || after == getPreviousWordType(target, con.start, con.limit, offset, opts); 1091 } 1092 if (!go) return -1; 1093 break; 1094 1095 case '<': 1096 if (con.length == 0 || offset == con.limit) return -1; 1097 if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER 1098 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) 1099 return -1; 1100 break; 1101 1102 case '>': 1103 if (con.length == 0 || offset == con.start) return -1; 1104 if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER 1105 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) 1106 return -1; 1107 break; 1108 } op = op.next; 1110 break; 1111 1112 case Op.BACKREFERENCE: 1113 { 1114 int refno = op.getData(); 1115 if (refno <= 0 || refno >= this.nofparen) 1116 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+refno); 1117 if (con.match.getBeginning(refno) < 0 1118 || con.match.getEnd(refno) < 0) 1119 return -1; int o2 = con.match.getBeginning(refno); 1121 int literallen = con.match.getEnd(refno)-o2; 1122 if (!isSet(opts, IGNORE_CASE)) { 1123 if (dx > 0) { 1124 if (!regionMatches(target, offset, con.limit, o2, literallen)) 1125 return -1; 1126 offset += literallen; 1127 } else { 1128 if (!regionMatches(target, offset-literallen, con.limit, o2, literallen)) 1129 return -1; 1130 offset -= literallen; 1131 } 1132 } else { 1133 if (dx > 0) { 1134 if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen)) 1135 return -1; 1136 offset += literallen; 1137 } else { 1138 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 1139 o2, literallen)) 1140 return -1; 1141 offset -= literallen; 1142 } 1143 } 1144 } 1145 op = op.next; 1146 break; 1147 case Op.STRING: 1148 { 1149 String literal = op.getString(); 1150 int literallen = literal.length(); 1151 if (!isSet(opts, IGNORE_CASE)) { 1152 if (dx > 0) { 1153 if (!regionMatches(target, offset, con.limit, literal, literallen)) 1154 return -1; 1155 offset += literallen; 1156 } else { 1157 if (!regionMatches(target, offset-literallen, con.limit, literal, literallen)) 1158 return -1; 1159 offset -= literallen; 1160 } 1161 } else { 1162 if (dx > 0) { 1163 if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen)) 1164 return -1; 1165 offset += literallen; 1166 } else { 1167 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 1168 literal, literallen)) 1169 return -1; 1170 offset -= literallen; 1171 } 1172 } 1173 } 1174 op = op.next; 1175 break; 1176 1177 case Op.CLOSURE: 1178 { 1179 1183 int id = op.getData(); 1184 if (id >= 0) { 1185 int previousOffset = con.offsets[id]; 1186 if (previousOffset < 0 || previousOffset != offset) { 1187 con.offsets[id] = offset; 1188 } else { 1189 con.offsets[id] = -1; 1190 op = op.next; 1191 break; 1192 } 1193 } 1194 1195 int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts); 1196 if (id >= 0) con.offsets[id] = -1; 1197 if (ret >= 0) return ret; 1198 op = op.next; 1199 } 1200 break; 1201 1202 case Op.QUESTION: 1203 { 1204 int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts); 1205 if (ret >= 0) return ret; 1206 op = op.next; 1207 } 1208 break; 1209 1210 case Op.NONGREEDYCLOSURE: 1211 case Op.NONGREEDYQUESTION: 1212 { 1213 int ret = this. matchCharArray (con, op.next, offset, dx, opts); 1214 if (ret >= 0) return ret; 1215 op = op.getChild(); 1216 } 1217 break; 1218 1219 case Op.UNION: 1220 for (int i = 0; i < op.size(); i ++) { 1221 int ret = this. matchCharArray (con, op.elementAt(i), offset, dx, opts); 1222 if (DEBUG) { 1223 System.err.println("UNION: "+i+", ret="+ret); 1224 } 1225 if (ret >= 0) return ret; 1226 } 1227 return -1; 1228 1229 case Op.CAPTURE: 1230 int refno = op.getData(); 1231 if (con.match != null && refno > 0) { 1232 int save = con.match.getBeginning(refno); 1233 con.match.setBeginning(refno, offset); 1234 int ret = this. matchCharArray (con, op.next, offset, dx, opts); 1235 if (ret < 0) con.match.setBeginning(refno, save); 1236 return ret; 1237 } else if (con.match != null && refno < 0) { 1238 int index = -refno; 1239 int save = con.match.getEnd(index); 1240 con.match.setEnd(index, offset); 1241 int ret = this. matchCharArray (con, op.next, offset, dx, opts); 1242 if (ret < 0) con.match.setEnd(index, save); 1243 return ret; 1244 } 1245 op = op.next; 1246 break; 1247 1248 case Op.LOOKAHEAD: 1249 if (0 > this. matchCharArray (con, op.getChild(), offset, 1, opts)) return -1; 1250 op = op.next; 1251 break; 1252 case Op.NEGATIVELOOKAHEAD: 1253 if (0 <= this. matchCharArray (con, op.getChild(), offset, 1, opts)) return -1; 1254 op = op.next; 1255 break; 1256 case Op.LOOKBEHIND: 1257 if (0 > this. matchCharArray (con, op.getChild(), offset, -1, opts)) return -1; 1258 op = op.next; 1259 break; 1260 case Op.NEGATIVELOOKBEHIND: 1261 if (0 <= this. matchCharArray (con, op.getChild(), offset, -1, opts)) return -1; 1262 op = op.next; 1263 break; 1264 1265 case Op.INDEPENDENT: 1266 { 1267 int ret = this. matchCharArray (con, op.getChild(), offset, dx, opts); 1268 if (ret < 0) return ret; 1269 offset = ret; 1270 op = op.next; 1271 } 1272 break; 1273 1274 case Op.MODIFIER: 1275 { 1276 int localopts = opts; 1277 localopts |= op.getData(); 1278 localopts &= ~op.getData2(); 1279 int ret = this. matchCharArray (con, op.getChild(), offset, dx, localopts); 1281 if (ret < 0) return ret; 1282 offset = ret; 1283 op = op.next; 1284 } 1285 break; 1286 1287 case Op.CONDITION: 1288 { 1289 Op.ConditionOp cop = (Op.ConditionOp)op; 1290 boolean matchp = false; 1291 if (cop.refNumber > 0) { 1292 if (cop.refNumber >= this.nofparen) 1293 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+cop.refNumber); 1294 matchp = con.match.getBeginning(cop.refNumber) >= 0 1295 && con.match.getEnd(cop.refNumber) >= 0; 1296 } else { 1297 matchp = 0 <= this. matchCharArray (con, cop.condition, offset, dx, opts); 1298 } 1299 1300 if (matchp) { 1301 op = cop.yes; 1302 } else if (cop.no != null) { 1303 op = cop.no; 1304 } else { 1305 op = cop.next; 1306 } 1307 } 1308 break; 1309 1310 default: 1311 throw new RuntimeException ("Unknown operation type: "+op.type); 1312 } } } 1315 1316 private static final int getPreviousWordType(char[] target, int begin, int end, 1317 int offset, int opts) { 1318 int ret = getWordType(target, begin, end, --offset, opts); 1319 while (ret == WT_IGNORE) 1320 ret = getWordType(target, begin, end, --offset, opts); 1321 return ret; 1322 } 1323 1324 private static final int getWordType(char[] target, int begin, int end, 1325 int offset, int opts) { 1326 if (offset < begin || offset >= end) return WT_OTHER; 1327 return getWordType0( target [ offset ] , opts); 1328 } 1329 1330 1331 1332 private static final boolean regionMatches(char[] target, int offset, int limit, 1333 String part, int partlen) { 1334 if (offset < 0) return false; 1335 if (limit-offset < partlen) 1336 return false; 1337 int i = 0; 1338 while (partlen-- > 0) { 1339 if ( target [ offset++ ] != part.charAt(i++)) 1340 return false; 1341 } 1342 return true; 1343 } 1344 1345 private static final boolean regionMatches(char[] target, int offset, int limit, 1346 int offset2, int partlen) { 1347 if (offset < 0) return false; 1348 if (limit-offset < partlen) 1349 return false; 1350 int i = offset2; 1351 while (partlen-- > 0) { 1352 if ( target [ offset++ ] != target [ i++ ] ) 1353 return false; 1354 } 1355 return true; 1356 } 1357 1358 1361 private static final boolean regionMatchesIgnoreCase(char[] target, int offset, int limit, 1362 String part, int partlen) { 1363 if (offset < 0) return false; 1364 if (limit-offset < partlen) 1365 return false; 1366 int i = 0; 1367 while (partlen-- > 0) { 1368 char ch1 = target [ offset++ ] ; 1369 char ch2 = part.charAt(i++); 1370 if (ch1 == ch2) 1371 continue; 1372 char uch1 = Character.toUpperCase(ch1); 1373 char uch2 = Character.toUpperCase(ch2); 1374 if (uch1 == uch2) 1375 continue; 1376 if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) 1377 return false; 1378 } 1379 return true; 1380 } 1381 1382 private static final boolean regionMatchesIgnoreCase(char[] target, int offset, int limit, 1383 int offset2, int partlen) { 1384 if (offset < 0) return false; 1385 if (limit-offset < partlen) 1386 return false; 1387 int i = offset2; 1388 while (partlen-- > 0) { 1389 char ch1 = target [ offset++ ] ; 1390 char ch2 = target [ i++ ] ; 1391 if (ch1 == ch2) 1392 continue; 1393 char uch1 = Character.toUpperCase(ch1); 1394 char uch2 = Character.toUpperCase(ch2); 1395 if (uch1 == uch2) 1396 continue; 1397 if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) 1398 return false; 1399 } 1400 return true; 1401 } 1402 1403 1404 1405 1406 1411 public boolean matches(String target) { 1412 return this.matches(target, 0, target .length() , (Match)null); 1413 } 1414 1415 1423 public boolean matches(String target, int start, int end) { 1424 return this.matches(target, start, end, (Match)null); 1425 } 1426 1427 1433 public boolean matches(String target, Match match) { 1434 return this.matches(target, 0, target .length() , match); 1435 } 1436 1437 1446 public boolean matches(String target, int start, int end, Match match) { 1447 1448 synchronized (this) { 1449 if (this.operations == null) 1450 this.prepare(); 1451 if (this.context == null) 1452 this.context = new Context(); 1453 } 1454 Context con = null; 1455 synchronized (this.context) { 1456 con = this.context.inuse ? new Context() : this.context; 1457 con.reset(target, start, end, this.numberOfClosures); 1458 } 1459 if (match != null) { 1460 match.setNumberOfGroups(this.nofparen); 1461 match.setSource(target); 1462 } else if (this.hasBackReferences) { 1463 match = new Match(); 1464 match.setNumberOfGroups(this.nofparen); 1465 } 1468 con.match = match; 1469 1470 if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) { 1471 if (DEBUG) { 1472 System.err.println("target string="+target); 1473 } 1474 int matchEnd = this. matchString (con, this.operations, con.start, 1, this.options); 1475 if (DEBUG) { 1476 System.err.println("matchEnd="+matchEnd); 1477 System.err.println("con.limit="+con.limit); 1478 } 1479 if (matchEnd == con.limit) { 1480 if (con.match != null) { 1481 con.match.setBeginning(0, con.start); 1482 con.match.setEnd(0, matchEnd); 1483 } 1484 con.inuse = false; 1485 return true; 1486 } 1487 return false; 1488 } 1489 1490 1494 if (this.fixedStringOnly) { 1495 int o = this.fixedStringTable.matches(target, con.start, con.limit); 1497 if (o >= 0) { 1498 if (con.match != null) { 1499 con.match.setBeginning(0, o); 1500 con.match.setEnd(0, o+this.fixedString.length()); 1501 } 1502 con.inuse = false; 1503 return true; 1504 } 1505 con.inuse = false; 1506 return false; 1507 } 1508 1509 1514 if (this.fixedString != null) { 1515 int o = this.fixedStringTable.matches(target, con.start, con.limit); 1516 if (o < 0) { 1517 con.inuse = false; 1519 return false; 1520 } 1521 } 1522 1523 int limit = con.limit-this.minlength; 1524 int matchStart; 1525 int matchEnd = -1; 1526 1527 1530 if (this.operations != null 1531 && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { 1532 if (isSet(this.options, SINGLE_LINE)) { 1533 matchStart = con.start; 1534 matchEnd = this. matchString (con, this.operations, con.start, 1, this.options); 1535 } else { 1536 boolean previousIsEOL = true; 1537 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 1538 int ch = target .charAt( matchStart ) ; 1539 if (isEOLChar(ch)) { 1540 previousIsEOL = true; 1541 } else { 1542 if (previousIsEOL) { 1543 if (0 <= (matchEnd = this. matchString (con, this.operations, 1544 matchStart, 1, this.options))) 1545 break; 1546 } 1547 previousIsEOL = false; 1548 } 1549 } 1550 } 1551 } 1552 1553 1556 else if (this.firstChar != null) { 1557 RangeToken range = this.firstChar; 1559 if (RegularExpression.isSet(this.options, IGNORE_CASE)) { 1560 range = this.firstChar.getCaseInsensitiveToken(); 1561 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 1562 int ch = target .charAt( matchStart ) ; 1563 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { 1564 ch = REUtil.composeFromSurrogates(ch, target .charAt( matchStart+1 ) ); 1565 if (!range.match(ch)) continue; 1566 } else { 1567 if (!range.match(ch)) { 1568 char ch1 = Character.toUpperCase((char)ch); 1569 if (!range.match(ch1)) 1570 if (!range.match(Character.toLowerCase(ch1))) 1571 continue; 1572 } 1573 } 1574 if (0 <= (matchEnd = this. matchString (con, this.operations, 1575 matchStart, 1, this.options))) 1576 break; 1577 } 1578 } else { 1579 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 1580 int ch = target .charAt( matchStart ) ; 1581 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) 1582 ch = REUtil.composeFromSurrogates(ch, target .charAt( matchStart+1 ) ); 1583 if (!range.match(ch)) continue; 1584 if (0 <= (matchEnd = this. matchString (con, this.operations, 1585 matchStart, 1, this.options))) 1586 break; 1587 } 1588 } 1589 } 1590 1591 1594 else { 1595 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 1596 if (0 <= (matchEnd = this. matchString (con, this.operations, matchStart, 1, this.options))) 1597 break; 1598 } 1599 } 1600 1601 if (matchEnd >= 0) { 1602 if (con.match != null) { 1603 con.match.setBeginning(0, matchStart); 1604 con.match.setEnd(0, matchEnd); 1605 } 1606 con.inuse = false; 1607 return true; 1608 } else { 1609 con.inuse = false; 1610 return false; 1611 } 1612 } 1613 1614 1617 private int matchString (Context con, Op op, int offset, int dx, int opts) { 1618 1619 1620 1621 1622 String target = con.strTarget; 1623 1624 1625 1626 1627 while (true) { 1628 if (op == null) 1629 return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; 1630 if (offset > con.limit || offset < con.start) 1631 return -1; 1632 switch (op.type) { 1633 case Op.CHAR: 1634 if (isSet(opts, IGNORE_CASE)) { 1635 int ch = op.getData(); 1636 if (dx > 0) { 1637 if (offset >= con.limit || !matchIgnoreCase(ch, target .charAt( offset ) )) 1638 return -1; 1639 offset ++; 1640 } else { 1641 int o1 = offset-1; 1642 if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target .charAt( o1 ) )) 1643 return -1; 1644 offset = o1; 1645 } 1646 } else { 1647 int ch = op.getData(); 1648 if (dx > 0) { 1649 if (offset >= con.limit || ch != target .charAt( offset ) ) 1650 return -1; 1651 offset ++; 1652 } else { 1653 int o1 = offset-1; 1654 if (o1 >= con.limit || o1 < 0 || ch != target .charAt( o1 ) ) 1655 return -1; 1656 offset = o1; 1657 } 1658 } 1659 op = op.next; 1660 break; 1661 1662 case Op.DOT: 1663 if (dx > 0) { 1664 if (offset >= con.limit) 1665 return -1; 1666 int ch = target .charAt( offset ) ; 1667 if (isSet(opts, SINGLE_LINE)) { 1668 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 1669 offset ++; 1670 } else { 1671 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 1672 ch = REUtil.composeFromSurrogates(ch, target .charAt( ++offset ) ); 1673 if (isEOLChar(ch)) 1674 return -1; 1675 } 1676 offset ++; 1677 } else { 1678 int o1 = offset-1; 1679 if (o1 >= con.limit || o1 < 0) 1680 return -1; 1681 int ch = target .charAt( o1 ) ; 1682 if (isSet(opts, SINGLE_LINE)) { 1683 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 1684 o1 --; 1685 } else { 1686 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 1687 ch = REUtil.composeFromSurrogates( target .charAt( --o1 ) , ch); 1688 if (!isEOLChar(ch)) 1689 return -1; 1690 } 1691 offset = o1; 1692 } 1693 op = op.next; 1694 break; 1695 1696 case Op.RANGE: 1697 case Op.NRANGE: 1698 if (dx > 0) { 1699 if (offset >= con.limit) 1700 return -1; 1701 int ch = target .charAt( offset ) ; 1702 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 1703 ch = REUtil.composeFromSurrogates(ch, target .charAt( ++offset ) ); 1704 RangeToken tok = op.getToken(); 1705 if (isSet(opts, IGNORE_CASE)) { 1706 tok = tok.getCaseInsensitiveToken(); 1707 if (!tok.match(ch)) { 1708 if (ch >= 0x10000) return -1; 1709 char uch; 1710 if (!tok.match(uch = Character.toUpperCase((char)ch)) 1711 && !tok.match(Character.toLowerCase(uch))) 1712 return -1; 1713 } 1714 } else { 1715 if (!tok.match(ch)) return -1; 1716 } 1717 offset ++; 1718 } else { 1719 int o1 = offset-1; 1720 if (o1 >= con.limit || o1 < 0) 1721 return -1; 1722 int ch = target .charAt( o1 ) ; 1723 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 1724 ch = REUtil.composeFromSurrogates( target .charAt( --o1 ) , ch); 1725 RangeToken tok = op.getToken(); 1726 if (isSet(opts, IGNORE_CASE)) { 1727 tok = tok.getCaseInsensitiveToken(); 1728 if (!tok.match(ch)) { 1729 if (ch >= 0x10000) return -1; 1730 char uch; 1731 if (!tok.match(uch = Character.toUpperCase((char)ch)) 1732 && !tok.match(Character.toLowerCase(uch))) 1733 return -1; 1734 } 1735 } else { 1736 if (!tok.match(ch)) return -1; 1737 } 1738 offset = o1; 1739 } 1740 op = op.next; 1741 break; 1742 1743 case Op.ANCHOR: 1744 boolean go = false; 1745 switch (op.getData()) { 1746 case '^': 1747 if (isSet(opts, MULTIPLE_LINES)) { 1748 if (!(offset == con.start 1749 || offset > con.start && isEOLChar( target .charAt( offset-1 ) ))) 1750 return -1; 1751 } else { 1752 if (offset != con.start) 1753 return -1; 1754 } 1755 break; 1756 1757 case '@': if (!(offset == con.start 1760 || offset > con.start && isEOLChar( target .charAt( offset-1 ) ))) 1761 return -1; 1762 break; 1763 1764 case '$': 1765 if (isSet(opts, MULTIPLE_LINES)) { 1766 if (!(offset == con.limit 1767 || offset < con.limit && isEOLChar( target .charAt( offset ) ))) 1768 return -1; 1769 } else { 1770 if (!(offset == con.limit 1771 || offset+1 == con.limit && isEOLChar( target .charAt( offset ) ) 1772 || offset+2 == con.limit && target .charAt( offset ) == CARRIAGE_RETURN 1773 && target .charAt( offset+1 ) == LINE_FEED)) 1774 return -1; 1775 } 1776 break; 1777 1778 case 'A': 1779 if (offset != con.start) return -1; 1780 break; 1781 1782 case 'Z': 1783 if (!(offset == con.limit 1784 || offset+1 == con.limit && isEOLChar( target .charAt( offset ) ) 1785 || offset+2 == con.limit && target .charAt( offset ) == CARRIAGE_RETURN 1786 && target .charAt( offset+1 ) == LINE_FEED)) 1787 return -1; 1788 break; 1789 1790 case 'z': 1791 if (offset != con.limit) return -1; 1792 break; 1793 1794 case 'b': 1795 if (con.length == 0) return -1; 1796 { 1797 int after = getWordType(target, con.start, con.limit, offset, opts); 1798 if (after == WT_IGNORE) return -1; 1799 int before = getPreviousWordType(target, con.start, con.limit, offset, opts); 1800 if (after == before) return -1; 1801 } 1802 break; 1803 1804 case 'B': 1805 if (con.length == 0) 1806 go = true; 1807 else { 1808 int after = getWordType(target, con.start, con.limit, offset, opts); 1809 go = after == WT_IGNORE 1810 || after == getPreviousWordType(target, con.start, con.limit, offset, opts); 1811 } 1812 if (!go) return -1; 1813 break; 1814 1815 case '<': 1816 if (con.length == 0 || offset == con.limit) return -1; 1817 if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER 1818 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) 1819 return -1; 1820 break; 1821 1822 case '>': 1823 if (con.length == 0 || offset == con.start) return -1; 1824 if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER 1825 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) 1826 return -1; 1827 break; 1828 } op = op.next; 1830 break; 1831 1832 case Op.BACKREFERENCE: 1833 { 1834 int refno = op.getData(); 1835 if (refno <= 0 || refno >= this.nofparen) 1836 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+refno); 1837 if (con.match.getBeginning(refno) < 0 1838 || con.match.getEnd(refno) < 0) 1839 return -1; int o2 = con.match.getBeginning(refno); 1841 int literallen = con.match.getEnd(refno)-o2; 1842 if (!isSet(opts, IGNORE_CASE)) { 1843 if (dx > 0) { 1844 if (!regionMatches(target, offset, con.limit, o2, literallen)) 1845 return -1; 1846 offset += literallen; 1847 } else { 1848 if (!regionMatches(target, offset-literallen, con.limit, o2, literallen)) 1849 return -1; 1850 offset -= literallen; 1851 } 1852 } else { 1853 if (dx > 0) { 1854 if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen)) 1855 return -1; 1856 offset += literallen; 1857 } else { 1858 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 1859 o2, literallen)) 1860 return -1; 1861 offset -= literallen; 1862 } 1863 } 1864 } 1865 op = op.next; 1866 break; 1867 case Op.STRING: 1868 { 1869 String literal = op.getString(); 1870 int literallen = literal.length(); 1871 if (!isSet(opts, IGNORE_CASE)) { 1872 if (dx > 0) { 1873 if (!regionMatches(target, offset, con.limit, literal, literallen)) 1874 return -1; 1875 offset += literallen; 1876 } else { 1877 if (!regionMatches(target, offset-literallen, con.limit, literal, literallen)) 1878 return -1; 1879 offset -= literallen; 1880 } 1881 } else { 1882 if (dx > 0) { 1883 if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen)) 1884 return -1; 1885 offset += literallen; 1886 } else { 1887 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 1888 literal, literallen)) 1889 return -1; 1890 offset -= literallen; 1891 } 1892 } 1893 } 1894 op = op.next; 1895 break; 1896 1897 case Op.CLOSURE: 1898 { 1899 1903 int id = op.getData(); 1904 if (id >= 0) { 1905 int previousOffset = con.offsets[id]; 1906 if (previousOffset < 0 || previousOffset != offset) { 1907 con.offsets[id] = offset; 1908 } else { 1909 con.offsets[id] = -1; 1910 op = op.next; 1911 break; 1912 } 1913 } 1914 int ret = this. matchString (con, op.getChild(), offset, dx, opts); 1915 if (id >= 0) con.offsets[id] = -1; 1916 if (ret >= 0) return ret; 1917 op = op.next; 1918 } 1919 break; 1920 1921 case Op.QUESTION: 1922 { 1923 int ret = this. matchString (con, op.getChild(), offset, dx, opts); 1924 if (ret >= 0) return ret; 1925 op = op.next; 1926 } 1927 break; 1928 1929 case Op.NONGREEDYCLOSURE: 1930 case Op.NONGREEDYQUESTION: 1931 { 1932 int ret = this. matchString (con, op.next, offset, dx, opts); 1933 if (ret >= 0) return ret; 1934 op = op.getChild(); 1935 } 1936 break; 1937 1938 case Op.UNION: 1939 for (int i = 0; i < op.size(); i ++) { 1940 int ret = this. matchString (con, op.elementAt(i), offset, dx, opts); 1941 if (DEBUG) { 1942 System.err.println("UNION: "+i+", ret="+ret); 1943 } 1944 if (ret >= 0) return ret; 1945 } 1946 return -1; 1947 1948 case Op.CAPTURE: 1949 int refno = op.getData(); 1950 if (con.match != null && refno > 0) { 1951 int save = con.match.getBeginning(refno); 1952 con.match.setBeginning(refno, offset); 1953 int ret = this. matchString (con, op.next, offset, dx, opts); 1954 if (ret < 0) con.match.setBeginning(refno, save); 1955 return ret; 1956 } else if (con.match != null && refno < 0) { 1957 int index = -refno; 1958 int save = con.match.getEnd(index); 1959 con.match.setEnd(index, offset); 1960 int ret = this. matchString (con, op.next, offset, dx, opts); 1961 if (ret < 0) con.match.setEnd(index, save); 1962 return ret; 1963 } 1964 op = op.next; 1965 break; 1966 1967 case Op.LOOKAHEAD: 1968 if (0 > this. matchString (con, op.getChild(), offset, 1, opts)) return -1; 1969 op = op.next; 1970 break; 1971 case Op.NEGATIVELOOKAHEAD: 1972 if (0 <= this. matchString (con, op.getChild(), offset, 1, opts)) return -1; 1973 op = op.next; 1974 break; 1975 case Op.LOOKBEHIND: 1976 if (0 > this. matchString (con, op.getChild(), offset, -1, opts)) return -1; 1977 op = op.next; 1978 break; 1979 case Op.NEGATIVELOOKBEHIND: 1980 if (0 <= this. matchString (con, op.getChild(), offset, -1, opts)) return -1; 1981 op = op.next; 1982 break; 1983 1984 case Op.INDEPENDENT: 1985 { 1986 int ret = this. matchString (con, op.getChild(), offset, dx, opts); 1987 if (ret < 0) return ret; 1988 offset = ret; 1989 op = op.next; 1990 } 1991 break; 1992 1993 case Op.MODIFIER: 1994 { 1995 int localopts = opts; 1996 localopts |= op.getData(); 1997 localopts &= ~op.getData2(); 1998 int ret = this. matchString (con, op.getChild(), offset, dx, localopts); 2000 if (ret < 0) return ret; 2001 offset = ret; 2002 op = op.next; 2003 } 2004 break; 2005 2006 case Op.CONDITION: 2007 { 2008 Op.ConditionOp cop = (Op.ConditionOp)op; 2009 boolean matchp = false; 2010 if (cop.refNumber > 0) { 2011 if (cop.refNumber >= this.nofparen) 2012 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+cop.refNumber); 2013 matchp = con.match.getBeginning(cop.refNumber) >= 0 2014 && con.match.getEnd(cop.refNumber) >= 0; 2015 } else { 2016 matchp = 0 <= this. matchString (con, cop.condition, offset, dx, opts); 2017 } 2018 2019 if (matchp) { 2020 op = cop.yes; 2021 } else if (cop.no != null) { 2022 op = cop.no; 2023 } else { 2024 op = cop.next; 2025 } 2026 } 2027 break; 2028 2029 default: 2030 throw new RuntimeException ("Unknown operation type: "+op.type); 2031 } } } 2034 2035 private static final int getPreviousWordType(String target, int begin, int end, 2036 int offset, int opts) { 2037 int ret = getWordType(target, begin, end, --offset, opts); 2038 while (ret == WT_IGNORE) 2039 ret = getWordType(target, begin, end, --offset, opts); 2040 return ret; 2041 } 2042 2043 private static final int getWordType(String target, int begin, int end, 2044 int offset, int opts) { 2045 if (offset < begin || offset >= end) return WT_OTHER; 2046 return getWordType0( target .charAt( offset ) , opts); 2047 } 2048 2049 2050 private static final boolean regionMatches(String text, int offset, int limit, 2051 String part, int partlen) { 2052 if (limit-offset < partlen) return false; 2053 return text.regionMatches(offset, part, 0, partlen); 2054 } 2055 2056 private static final boolean regionMatches(String text, int offset, int limit, 2057 int offset2, int partlen) { 2058 if (limit-offset < partlen) return false; 2059 return text.regionMatches(offset, text, offset2, partlen); 2060 } 2061 2062 private static final boolean regionMatchesIgnoreCase(String text, int offset, int limit, 2063 String part, int partlen) { 2064 return text.regionMatches(true, offset, part, 0, partlen); 2065 } 2066 2067 private static final boolean regionMatchesIgnoreCase(String text, int offset, int limit, 2068 int offset2, int partlen) { 2069 if (limit-offset < partlen) return false; 2070 return text.regionMatches(true, offset, text, offset2, partlen); 2071 } 2072 2073 2074 2075 2076 2077 2078 2079 2084 public boolean matches(CharacterIterator target) { 2085 return this.matches(target, (Match)null); 2086 } 2087 2088 2089 2095 public boolean matches(CharacterIterator target, Match match) { 2096 int start = target.getBeginIndex(); 2097 int end = target.getEndIndex(); 2098 2099 2100 2101 synchronized (this) { 2102 if (this.operations == null) 2103 this.prepare(); 2104 if (this.context == null) 2105 this.context = new Context(); 2106 } 2107 Context con = null; 2108 synchronized (this.context) { 2109 con = this.context.inuse ? new Context() : this.context; 2110 con.reset(target, start, end, this.numberOfClosures); 2111 } 2112 if (match != null) { 2113 match.setNumberOfGroups(this.nofparen); 2114 match.setSource(target); 2115 } else if (this.hasBackReferences) { 2116 match = new Match(); 2117 match.setNumberOfGroups(this.nofparen); 2118 } 2121 con.match = match; 2122 2123 if (RegularExpression.isSet(this.options, XMLSCHEMA_MODE)) { 2124 int matchEnd = this. matchCharacterIterator (con, this.operations, con.start, 1, this.options); 2125 if (matchEnd == con.limit) { 2127 if (con.match != null) { 2128 con.match.setBeginning(0, con.start); 2129 con.match.setEnd(0, matchEnd); 2130 } 2131 con.inuse = false; 2132 return true; 2133 } 2134 return false; 2135 } 2136 2137 2141 if (this.fixedStringOnly) { 2142 int o = this.fixedStringTable.matches(target, con.start, con.limit); 2144 if (o >= 0) { 2145 if (con.match != null) { 2146 con.match.setBeginning(0, o); 2147 con.match.setEnd(0, o+this.fixedString.length()); 2148 } 2149 con.inuse = false; 2150 return true; 2151 } 2152 con.inuse = false; 2153 return false; 2154 } 2155 2156 2161 if (this.fixedString != null) { 2162 int o = this.fixedStringTable.matches(target, con.start, con.limit); 2163 if (o < 0) { 2164 con.inuse = false; 2166 return false; 2167 } 2168 } 2169 2170 int limit = con.limit-this.minlength; 2171 int matchStart; 2172 int matchEnd = -1; 2173 2174 2177 if (this.operations != null 2178 && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { 2179 if (isSet(this.options, SINGLE_LINE)) { 2180 matchStart = con.start; 2181 matchEnd = this. matchCharacterIterator (con, this.operations, con.start, 1, this.options); 2182 } else { 2183 boolean previousIsEOL = true; 2184 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 2185 int ch = target .setIndex( matchStart ) ; 2186 if (isEOLChar(ch)) { 2187 previousIsEOL = true; 2188 } else { 2189 if (previousIsEOL) { 2190 if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, 2191 matchStart, 1, this.options))) 2192 break; 2193 } 2194 previousIsEOL = false; 2195 } 2196 } 2197 } 2198 } 2199 2200 2203 else if (this.firstChar != null) { 2204 RangeToken range = this.firstChar; 2206 if (RegularExpression.isSet(this.options, IGNORE_CASE)) { 2207 range = this.firstChar.getCaseInsensitiveToken(); 2208 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 2209 int ch = target .setIndex( matchStart ) ; 2210 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { 2211 ch = REUtil.composeFromSurrogates(ch, target .setIndex( matchStart+1 ) ); 2212 if (!range.match(ch)) continue; 2213 } else { 2214 if (!range.match(ch)) { 2215 char ch1 = Character.toUpperCase((char)ch); 2216 if (!range.match(ch1)) 2217 if (!range.match(Character.toLowerCase(ch1))) 2218 continue; 2219 } 2220 } 2221 if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, 2222 matchStart, 1, this.options))) 2223 break; 2224 } 2225 } else { 2226 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 2227 int ch = target .setIndex( matchStart ) ; 2228 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) 2229 ch = REUtil.composeFromSurrogates(ch, target .setIndex( matchStart+1 ) ); 2230 if (!range.match(ch)) continue; 2231 if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, 2232 matchStart, 1, this.options))) 2233 break; 2234 } 2235 } 2236 } 2237 2238 2241 else { 2242 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 2243 if (0 <= (matchEnd = this. matchCharacterIterator (con, this.operations, matchStart, 1, this.options))) 2244 break; 2245 } 2246 } 2247 2248 if (matchEnd >= 0) { 2249 if (con.match != null) { 2250 con.match.setBeginning(0, matchStart); 2251 con.match.setEnd(0, matchEnd); 2252 } 2253 con.inuse = false; 2254 return true; 2255 } else { 2256 con.inuse = false; 2257 return false; 2258 } 2259 } 2260 2261 2264 private int matchCharacterIterator (Context con, Op op, int offset, int dx, int opts) { 2265 2266 2267 CharacterIterator target = con.ciTarget; 2268 2269 2270 2271 2272 2273 2274 while (true) { 2275 if (op == null) 2276 return isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset; 2277 if (offset > con.limit || offset < con.start) 2278 return -1; 2279 switch (op.type) { 2280 case Op.CHAR: 2281 if (isSet(opts, IGNORE_CASE)) { 2282 int ch = op.getData(); 2283 if (dx > 0) { 2284 if (offset >= con.limit || !matchIgnoreCase(ch, target .setIndex( offset ) )) 2285 return -1; 2286 offset ++; 2287 } else { 2288 int o1 = offset-1; 2289 if (o1 >= con.limit || o1 < 0 || !matchIgnoreCase(ch, target .setIndex( o1 ) )) 2290 return -1; 2291 offset = o1; 2292 } 2293 } else { 2294 int ch = op.getData(); 2295 if (dx > 0) { 2296 if (offset >= con.limit || ch != target .setIndex( offset ) ) 2297 return -1; 2298 offset ++; 2299 } else { 2300 int o1 = offset-1; 2301 if (o1 >= con.limit || o1 < 0 || ch != target .setIndex( o1 ) ) 2302 return -1; 2303 offset = o1; 2304 } 2305 } 2306 op = op.next; 2307 break; 2308 2309 case Op.DOT: 2310 if (dx > 0) { 2311 if (offset >= con.limit) 2312 return -1; 2313 int ch = target .setIndex( offset ) ; 2314 if (isSet(opts, SINGLE_LINE)) { 2315 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 2316 offset ++; 2317 } else { 2318 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 2319 ch = REUtil.composeFromSurrogates(ch, target .setIndex( ++offset ) ); 2320 if (isEOLChar(ch)) 2321 return -1; 2322 } 2323 offset ++; 2324 } else { 2325 int o1 = offset-1; 2326 if (o1 >= con.limit || o1 < 0) 2327 return -1; 2328 int ch = target .setIndex( o1 ) ; 2329 if (isSet(opts, SINGLE_LINE)) { 2330 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 2331 o1 --; 2332 } else { 2333 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 2334 ch = REUtil.composeFromSurrogates( target .setIndex( --o1 ) , ch); 2335 if (!isEOLChar(ch)) 2336 return -1; 2337 } 2338 offset = o1; 2339 } 2340 op = op.next; 2341 break; 2342 2343 case Op.RANGE: 2344 case Op.NRANGE: 2345 if (dx > 0) { 2346 if (offset >= con.limit) 2347 return -1; 2348 int ch = target .setIndex( offset ) ; 2349 if (REUtil.isHighSurrogate(ch) && offset+1 < con.limit) 2350 ch = REUtil.composeFromSurrogates(ch, target .setIndex( ++offset ) ); 2351 RangeToken tok = op.getToken(); 2352 if (isSet(opts, IGNORE_CASE)) { 2353 tok = tok.getCaseInsensitiveToken(); 2354 if (!tok.match(ch)) { 2355 if (ch >= 0x10000) return -1; 2356 char uch; 2357 if (!tok.match(uch = Character.toUpperCase((char)ch)) 2358 && !tok.match(Character.toLowerCase(uch))) 2359 return -1; 2360 } 2361 } else { 2362 if (!tok.match(ch)) return -1; 2363 } 2364 offset ++; 2365 } else { 2366 int o1 = offset-1; 2367 if (o1 >= con.limit || o1 < 0) 2368 return -1; 2369 int ch = target .setIndex( o1 ) ; 2370 if (REUtil.isLowSurrogate(ch) && o1-1 >= 0) 2371 ch = REUtil.composeFromSurrogates( target .setIndex( --o1 ) , ch); 2372 RangeToken tok = op.getToken(); 2373 if (isSet(opts, IGNORE_CASE)) { 2374 tok = tok.getCaseInsensitiveToken(); 2375 if (!tok.match(ch)) { 2376 if (ch >= 0x10000) return -1; 2377 char uch; 2378 if (!tok.match(uch = Character.toUpperCase((char)ch)) 2379 && !tok.match(Character.toLowerCase(uch))) 2380 return -1; 2381 } 2382 } else { 2383 if (!tok.match(ch)) return -1; 2384 } 2385 offset = o1; 2386 } 2387 op = op.next; 2388 break; 2389 2390 case Op.ANCHOR: 2391 boolean go = false; 2392 switch (op.getData()) { 2393 case '^': 2394 if (isSet(opts, MULTIPLE_LINES)) { 2395 if (!(offset == con.start 2396 || offset > con.start && isEOLChar( target .setIndex( offset-1 ) ))) 2397 return -1; 2398 } else { 2399 if (offset != con.start) 2400 return -1; 2401 } 2402 break; 2403 2404 case '@': if (!(offset == con.start 2407 || offset > con.start && isEOLChar( target .setIndex( offset-1 ) ))) 2408 return -1; 2409 break; 2410 2411 case '$': 2412 if (isSet(opts, MULTIPLE_LINES)) { 2413 if (!(offset == con.limit 2414 || offset < con.limit && isEOLChar( target .setIndex( offset ) ))) 2415 return -1; 2416 } else { 2417 if (!(offset == con.limit 2418 || offset+1 == con.limit && isEOLChar( target .setIndex( offset ) ) 2419 || offset+2 == con.limit && target .setIndex( offset ) == CARRIAGE_RETURN 2420 && target .setIndex( offset+1 ) == LINE_FEED)) 2421 return -1; 2422 } 2423 break; 2424 2425 case 'A': 2426 if (offset != con.start) return -1; 2427 break; 2428 2429 case 'Z': 2430 if (!(offset == con.limit 2431 || offset+1 == con.limit && isEOLChar( target .setIndex( offset ) ) 2432 || offset+2 == con.limit && target .setIndex( offset ) == CARRIAGE_RETURN 2433 && target .setIndex( offset+1 ) == LINE_FEED)) 2434 return -1; 2435 break; 2436 2437 case 'z': 2438 if (offset != con.limit) return -1; 2439 break; 2440 2441 case 'b': 2442 if (con.length == 0) return -1; 2443 { 2444 int after = getWordType(target, con.start, con.limit, offset, opts); 2445 if (after == WT_IGNORE) return -1; 2446 int before = getPreviousWordType(target, con.start, con.limit, offset, opts); 2447 if (after == before) return -1; 2448 } 2449 break; 2450 2451 case 'B': 2452 if (con.length == 0) 2453 go = true; 2454 else { 2455 int after = getWordType(target, con.start, con.limit, offset, opts); 2456 go = after == WT_IGNORE 2457 || after == getPreviousWordType(target, con.start, con.limit, offset, opts); 2458 } 2459 if (!go) return -1; 2460 break; 2461 2462 case '<': 2463 if (con.length == 0 || offset == con.limit) return -1; 2464 if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER 2465 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) 2466 return -1; 2467 break; 2468 2469 case '>': 2470 if (con.length == 0 || offset == con.start) return -1; 2471 if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER 2472 || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) 2473 return -1; 2474 break; 2475 } op = op.next; 2477 break; 2478 2479 case Op.BACKREFERENCE: 2480 { 2481 int refno = op.getData(); 2482 if (refno <= 0 || refno >= this.nofparen) 2483 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+refno); 2484 if (con.match.getBeginning(refno) < 0 2485 || con.match.getEnd(refno) < 0) 2486 return -1; int o2 = con.match.getBeginning(refno); 2488 int literallen = con.match.getEnd(refno)-o2; 2489 if (!isSet(opts, IGNORE_CASE)) { 2490 if (dx > 0) { 2491 if (!regionMatches(target, offset, con.limit, o2, literallen)) 2492 return -1; 2493 offset += literallen; 2494 } else { 2495 if (!regionMatches(target, offset-literallen, con.limit, o2, literallen)) 2496 return -1; 2497 offset -= literallen; 2498 } 2499 } else { 2500 if (dx > 0) { 2501 if (!regionMatchesIgnoreCase(target, offset, con.limit, o2, literallen)) 2502 return -1; 2503 offset += literallen; 2504 } else { 2505 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 2506 o2, literallen)) 2507 return -1; 2508 offset -= literallen; 2509 } 2510 } 2511 } 2512 op = op.next; 2513 break; 2514 case Op.STRING: 2515 { 2516 String literal = op.getString(); 2517 int literallen = literal.length(); 2518 if (!isSet(opts, IGNORE_CASE)) { 2519 if (dx > 0) { 2520 if (!regionMatches(target, offset, con.limit, literal, literallen)) 2521 return -1; 2522 offset += literallen; 2523 } else { 2524 if (!regionMatches(target, offset-literallen, con.limit, literal, literallen)) 2525 return -1; 2526 offset -= literallen; 2527 } 2528 } else { 2529 if (dx > 0) { 2530 if (!regionMatchesIgnoreCase(target, offset, con.limit, literal, literallen)) 2531 return -1; 2532 offset += literallen; 2533 } else { 2534 if (!regionMatchesIgnoreCase(target, offset-literallen, con.limit, 2535 literal, literallen)) 2536 return -1; 2537 offset -= literallen; 2538 } 2539 } 2540 } 2541 op = op.next; 2542 break; 2543 2544 case Op.CLOSURE: 2545 { 2546 2550 int id = op.getData(); 2551 if (id >= 0) { 2552 int previousOffset = con.offsets[id]; 2553 if (previousOffset < 0 || previousOffset != offset) { 2554 con.offsets[id] = offset; 2555 } else { 2556 con.offsets[id] = -1; 2557 op = op.next; 2558 break; 2559 } 2560 } 2561 2562 int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts); 2563 if (id >= 0) con.offsets[id] = -1; 2564 if (ret >= 0) return ret; 2565 op = op.next; 2566 } 2567 break; 2568 2569 case Op.QUESTION: 2570 { 2571 int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts); 2572 if (ret >= 0) return ret; 2573 op = op.next; 2574 } 2575 break; 2576 2577 case Op.NONGREEDYCLOSURE: 2578 case Op.NONGREEDYQUESTION: 2579 { 2580 int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts); 2581 if (ret >= 0) return ret; 2582 op = op.getChild(); 2583 } 2584 break; 2585 2586 case Op.UNION: 2587 for (int i = 0; i < op.size(); i ++) { 2588 int ret = this. matchCharacterIterator (con, op.elementAt(i), offset, dx, opts); 2589 if (DEBUG) { 2590 System.err.println("UNION: "+i+", ret="+ret); 2591 } 2592 if (ret >= 0) return ret; 2593 } 2594 return -1; 2595 2596 case Op.CAPTURE: 2597 int refno = op.getData(); 2598 if (con.match != null && refno > 0) { 2599 int save = con.match.getBeginning(refno); 2600 con.match.setBeginning(refno, offset); 2601 int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts); 2602 if (ret < 0) con.match.setBeginning(refno, save); 2603 return ret; 2604 } else if (con.match != null && refno < 0) { 2605 int index = -refno; 2606 int save = con.match.getEnd(index); 2607 con.match.setEnd(index, offset); 2608 int ret = this. matchCharacterIterator (con, op.next, offset, dx, opts); 2609 if (ret < 0) con.match.setEnd(index, save); 2610 return ret; 2611 } 2612 op = op.next; 2613 break; 2614 2615 case Op.LOOKAHEAD: 2616 if (0 > this. matchCharacterIterator (con, op.getChild(), offset, 1, opts)) return -1; 2617 op = op.next; 2618 break; 2619 case Op.NEGATIVELOOKAHEAD: 2620 if (0 <= this. matchCharacterIterator (con, op.getChild(), offset, 1, opts)) return -1; 2621 op = op.next; 2622 break; 2623 case Op.LOOKBEHIND: 2624 if (0 > this. matchCharacterIterator (con, op.getChild(), offset, -1, opts)) return -1; 2625 op = op.next; 2626 break; 2627 case Op.NEGATIVELOOKBEHIND: 2628 if (0 <= this. matchCharacterIterator (con, op.getChild(), offset, -1, opts)) return -1; 2629 op = op.next; 2630 break; 2631 2632 case Op.INDEPENDENT: 2633 { 2634 int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, opts); 2635 if (ret < 0) return ret; 2636 offset = ret; 2637 op = op.next; 2638 } 2639 break; 2640 2641 case Op.MODIFIER: 2642 { 2643 int localopts = opts; 2644 localopts |= op.getData(); 2645 localopts &= ~op.getData2(); 2646 int ret = this. matchCharacterIterator (con, op.getChild(), offset, dx, localopts); 2648 if (ret < 0) return ret; 2649 offset = ret; 2650 op = op.next; 2651 } 2652 break; 2653 2654 case Op.CONDITION: 2655 { 2656 Op.ConditionOp cop = (Op.ConditionOp)op; 2657 boolean matchp = false; 2658 if (cop.refNumber > 0) { 2659 if (cop.refNumber >= this.nofparen) 2660 throw new RuntimeException ("Internal Error: Reference number must be more than zero: "+cop.refNumber); 2661 matchp = con.match.getBeginning(cop.refNumber) >= 0 2662 && con.match.getEnd(cop.refNumber) >= 0; 2663 } else { 2664 matchp = 0 <= this. matchCharacterIterator (con, cop.condition, offset, dx, opts); 2665 } 2666 2667 if (matchp) { 2668 op = cop.yes; 2669 } else if (cop.no != null) { 2670 op = cop.no; 2671 } else { 2672 op = cop.next; 2673 } 2674 } 2675 break; 2676 2677 default: 2678 throw new RuntimeException ("Unknown operation type: "+op.type); 2679 } } } 2682 2683 private static final int getPreviousWordType(CharacterIterator target, int begin, int end, 2684 int offset, int opts) { 2685 int ret = getWordType(target, begin, end, --offset, opts); 2686 while (ret == WT_IGNORE) 2687 ret = getWordType(target, begin, end, --offset, opts); 2688 return ret; 2689 } 2690 2691 private static final int getWordType(CharacterIterator target, int begin, int end, 2692 int offset, int opts) { 2693 if (offset < begin || offset >= end) return WT_OTHER; 2694 return getWordType0( target .setIndex( offset ) , opts); 2695 } 2696 2697 2698 2699 private static final boolean regionMatches(CharacterIterator target, int offset, int limit, 2700 String part, int partlen) { 2701 if (offset < 0) return false; 2702 if (limit-offset < partlen) 2703 return false; 2704 int i = 0; 2705 while (partlen-- > 0) { 2706 if ( target .setIndex( offset++ ) != part.charAt(i++)) 2707 return false; 2708 } 2709 return true; 2710 } 2711 2712 private static final boolean regionMatches(CharacterIterator target, int offset, int limit, 2713 int offset2, int partlen) { 2714 if (offset < 0) return false; 2715 if (limit-offset < partlen) 2716 return false; 2717 int i = offset2; 2718 while (partlen-- > 0) { 2719 if ( target .setIndex( offset++ ) != target .setIndex( i++ ) ) 2720 return false; 2721 } 2722 return true; 2723 } 2724 2725 2728 private static final boolean regionMatchesIgnoreCase(CharacterIterator target, int offset, int limit, 2729 String part, int partlen) { 2730 if (offset < 0) return false; 2731 if (limit-offset < partlen) 2732 return false; 2733 int i = 0; 2734 while (partlen-- > 0) { 2735 char ch1 = target .setIndex( offset++ ) ; 2736 char ch2 = part.charAt(i++); 2737 if (ch1 == ch2) 2738 continue; 2739 char uch1 = Character.toUpperCase(ch1); 2740 char uch2 = Character.toUpperCase(ch2); 2741 if (uch1 == uch2) 2742 continue; 2743 if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) 2744 return false; 2745 } 2746 return true; 2747 } 2748 2749 private static final boolean regionMatchesIgnoreCase(CharacterIterator target, int offset, int limit, 2750 int offset2, int partlen) { 2751 if (offset < 0) return false; 2752 if (limit-offset < partlen) 2753 return false; 2754 int i = offset2; 2755 while (partlen-- > 0) { 2756 char ch1 = target .setIndex( offset++ ) ; 2757 char ch2 = target .setIndex( i++ ) ; 2758 if (ch1 == ch2) 2759 continue; 2760 char uch1 = Character.toUpperCase(ch1); 2761 char uch2 = Character.toUpperCase(ch2); 2762 if (uch1 == uch2) 2763 continue; 2764 if (Character.toLowerCase(uch1) != Character.toLowerCase(uch2)) 2765 return false; 2766 } 2767 return true; 2768 } 2769 2770 2771 2772 2773 2775 2779 String regex; 2780 2783 int options; 2784 2785 2789 int nofparen; 2790 2794 Token tokentree; 2795 2796 boolean hasBackReferences = false; 2797 2798 transient int minlength; 2799 transient Op operations = null; 2800 transient int numberOfClosures; 2801 transient Context context = null; 2802 transient RangeToken firstChar = null; 2803 2804 transient String fixedString = null; 2805 transient int fixedStringOptions; 2806 transient BMPattern fixedStringTable = null; 2807 transient boolean fixedStringOnly = false; 2808 2809 2810 static final class Context { 2811 CharacterIterator ciTarget; 2812 String strTarget; 2813 char[] charTarget; 2814 int start; 2815 int limit; 2816 int length; 2817 Match match; 2818 boolean inuse = false; 2819 int[] offsets; 2820 2821 Context() { 2822 } 2823 2824 private void resetCommon(int nofclosures) { 2825 this.length = this.limit-this.start; 2826 this.inuse = true; 2827 this.match = null; 2828 if (this.offsets == null || this.offsets.length != nofclosures) 2829 this.offsets = new int[nofclosures]; 2830 for (int i = 0; i < nofclosures; i ++) this.offsets[i] = -1; 2831 } 2832 void reset(CharacterIterator target, int start, int limit, int nofclosures) { 2833 this.ciTarget = target; 2834 this.start = start; 2835 this.limit = limit; 2836 this.resetCommon(nofclosures); 2837 } 2838 void reset(String target, int start, int limit, int nofclosures) { 2839 this.strTarget = target; 2840 this.start = start; 2841 this.limit = limit; 2842 this.resetCommon(nofclosures); 2843 } 2844 void reset(char[] target, int start, int limit, int nofclosures) { 2845 this.charTarget = target; 2846 this.start = start; 2847 this.limit = limit; 2848 this.resetCommon(nofclosures); 2849 } 2850 } 2851 2852 2855 void prepare() { 2856 if (Op.COUNT) Op.nofinstances = 0; 2857 this.compile(this.tokentree); 2858 2865 if (Op.COUNT) System.err.println("DEBUG: The number of operations: "+Op.nofinstances); 2866 2867 this.minlength = this.tokentree.getMinLength(); 2868 2869 this.firstChar = null; 2870 if (!isSet(this.options, PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) 2871 && !isSet(this.options, XMLSCHEMA_MODE)) { 2872 RangeToken firstChar = Token.createRange(); 2873 int fresult = this.tokentree.analyzeFirstCharacter(firstChar, this.options); 2874 if (fresult == Token.FC_TERMINAL) { 2875 firstChar.compactRanges(); 2876 this.firstChar = firstChar; 2877 if (DEBUG) 2878 System.err.println("DEBUG: Use the first character optimization: "+firstChar); 2879 } 2880 } 2881 2882 if (this.operations != null 2883 && (this.operations.type == Op.STRING || this.operations.type == Op.CHAR) 2884 && this.operations.next == null) { 2885 if (DEBUG) 2886 System.err.print(" *** Only fixed string! *** "); 2887 this.fixedStringOnly = true; 2888 if (this.operations.type == Op.STRING) 2889 this.fixedString = this.operations.getString(); 2890 else if (this.operations.getData() >= 0x10000) { this.fixedString = REUtil.decomposeToSurrogates(this.operations.getData()); 2892 } else { 2893 char[] ac = new char[1]; 2894 ac[0] = (char)this.operations.getData(); 2895 this.fixedString = new String (ac); 2896 } 2897 this.fixedStringOptions = this.options; 2898 this.fixedStringTable = new BMPattern(this.fixedString, 256, 2899 isSet(this.fixedStringOptions, IGNORE_CASE)); 2900 } else if (!isSet(this.options, PROHIBIT_FIXED_STRING_OPTIMIZATION) 2901 && !isSet(this.options, XMLSCHEMA_MODE)) { 2902 Token.FixedStringContainer container = new Token.FixedStringContainer(); 2903 this.tokentree.findFixedString(container, this.options); 2904 this.fixedString = container.token == null ? null : container.token.getString(); 2905 this.fixedStringOptions = container.options; 2906 if (this.fixedString != null && this.fixedString.length() < 2) 2907 this.fixedString = null; 2908 if (this.fixedString != null) { 2910 this.fixedStringTable = new BMPattern(this.fixedString, 256, 2911 isSet(this.fixedStringOptions, IGNORE_CASE)); 2912 if (DEBUG) { 2913 System.err.println("DEBUG: The longest fixed string: "+this.fixedString.length() 2914 +"/" +"/"+REUtil.createOptionString(this.fixedStringOptions)); 2916 System.err.print("String: "); 2917 REUtil.dumpString(this.fixedString); 2918 } 2919 } 2920 } 2921 } 2922 2923 2933 2934 2937 static final int IGNORE_CASE = 1<<1; 2938 2939 2942 static final int SINGLE_LINE = 1<<2; 2943 2944 2947 static final int MULTIPLE_LINES = 1<<3; 2948 2949 2952 static final int EXTENDED_COMMENT = 1<<4; 2953 2954 2961 static final int USE_UNICODE_CATEGORY = 1<<5; 2963 2975 static final int UNICODE_WORD_BOUNDARY = 1<<6; 2977 2980 static final int PROHIBIT_HEAD_CHARACTER_OPTIMIZATION = 1<<7; 2981 2984 static final int PROHIBIT_FIXED_STRING_OPTIMIZATION = 1<<8; 2985 2988 static final int XMLSCHEMA_MODE = 1<<9; 2989 2992 static final int SPECIAL_COMMA = 1<<10; 2993 2994 2995 private static final boolean isSet(int options, int flag) { 2996 return (options & flag) == flag; 2997 } 2998 2999 3005 public RegularExpression(String regex) throws ParseException { 3006 this.setPattern(regex, null); 3007 } 3008 3009 3016 public RegularExpression(String regex, String options) throws ParseException { 3017 this.setPattern(regex, options); 3018 } 3019 3020 RegularExpression(String regex, Token tok, int parens, boolean hasBackReferences, int options) { 3021 this.regex = regex; 3022 this.tokentree = tok; 3023 this.nofparen = parens; 3024 this.options = options; 3025 this.hasBackReferences = hasBackReferences; 3026 } 3027 3028 3031 public void setPattern(String newPattern) throws ParseException { 3032 this.setPattern(newPattern, this.options); 3033 } 3034 3035 private void setPattern(String newPattern, int options) throws ParseException { 3036 this.regex = newPattern; 3037 this.options = options; 3038 RegexParser rp = RegularExpression.isSet(this.options, RegularExpression.XMLSCHEMA_MODE) 3039 ? new ParserForXMLSchema() : new RegexParser(); 3040 this.tokentree = rp.parse(this.regex, this.options); 3041 this.nofparen = rp.parennumber; 3042 this.hasBackReferences = rp.hasBackReferences; 3043 3044 this.operations = null; 3045 this.context = null; 3046 } 3047 3050 public void setPattern(String newPattern, String options) throws ParseException { 3051 this.setPattern(newPattern, REUtil.parseOptions(options)); 3052 } 3053 3054 3057 public String getPattern() { 3058 return this.regex; 3059 } 3060 3061 3064 public String toString() { 3065 return this.tokentree.toString(this.options); 3066 } 3067 3068 3076 public String getOptions() { 3077 return REUtil.createOptionString(this.options); 3078 } 3079 3080 3083 public boolean equals(Object obj) { 3084 if (obj == null) return false; 3085 if (!(obj instanceof RegularExpression)) 3086 return false; 3087 RegularExpression r = (RegularExpression)obj; 3088 return this.regex.equals(r.regex) && this.options == r.options; 3089 } 3090 3091 boolean equals(String pattern, int options) { 3092 return this.regex.equals(pattern) && this.options == options; 3093 } 3094 3095 3098 public int hashCode() { 3099 return (this.regex+"/"+this.getOptions()).hashCode(); 3100 } 3101 3102 3107 public int getNumberOfGroups() { 3108 return this.nofparen; 3109 } 3110 3111 3113 private static final int WT_IGNORE = 0; 3114 private static final int WT_LETTER = 1; 3115 private static final int WT_OTHER = 2; 3116 private static final int getWordType0(char ch, int opts) { 3117 if (!isSet(opts, UNICODE_WORD_BOUNDARY)) { 3118 if (isSet(opts, USE_UNICODE_CATEGORY)) { 3119 return (Token.getRange("IsWord", true).match(ch)) ? WT_LETTER : WT_OTHER; 3120 } 3121 return isWordChar(ch) ? WT_LETTER : WT_OTHER; 3122 } 3123 3124 switch (Character.getType(ch)) { 3125 case Character.UPPERCASE_LETTER: case Character.LOWERCASE_LETTER: case Character.TITLECASE_LETTER: case Character.MODIFIER_LETTER: case Character.OTHER_LETTER: case Character.LETTER_NUMBER: case Character.DECIMAL_DIGIT_NUMBER: case Character.OTHER_NUMBER: case Character.COMBINING_SPACING_MARK: return WT_LETTER; 3135 3136 case Character.FORMAT: case Character.NON_SPACING_MARK: case Character.ENCLOSING_MARK: return WT_IGNORE; 3140 3141 case Character.CONTROL: switch (ch) { 3143 case '\t': 3144 case '\n': 3145 case '\u000B': 3146 case '\f': 3147 case '\r': 3148 return WT_OTHER; 3149 default: 3150 return WT_IGNORE; 3151 } 3152 3153 default: 3154 return WT_OTHER; 3155 } 3156 } 3157 3158 3160 static final int LINE_FEED = 0x000A; 3161 static final int CARRIAGE_RETURN = 0x000D; 3162 static final int LINE_SEPARATOR = 0x2028; 3163 static final int PARAGRAPH_SEPARATOR = 0x2029; 3164 3165 private static final boolean isEOLChar(int ch) { 3166 return ch == LINE_FEED || ch == CARRIAGE_RETURN || ch == LINE_SEPARATOR 3167 || ch == PARAGRAPH_SEPARATOR; 3168 } 3169 3170 private static final boolean isWordChar(int ch) { if (ch == '_') return true; 3172 if (ch < '0') return false; 3173 if (ch > 'z') return false; 3174 if (ch <= '9') return true; 3175 if (ch < 'A') return false; 3176 if (ch <= 'Z') return true; 3177 if (ch < 'a') return false; 3178 return true; 3179 } 3180 3181 private static final boolean matchIgnoreCase(int chardata, int ch) { 3182 if (chardata == ch) return true; 3183 if (chardata > 0xffff || ch > 0xffff) return false; 3184 char uch1 = Character.toUpperCase((char)chardata); 3185 char uch2 = Character.toUpperCase((char)ch); 3186 if (uch1 == uch2) return true; 3187 return Character.toLowerCase(uch1) == Character.toLowerCase(uch2); 3188 } 3189} 3190 | Popular Tags |