1 29 30 package com.caucho.quercus.lib; 31 32 import com.caucho.quercus.QuercusException; 33 import com.caucho.quercus.QuercusRuntimeException; 34 import com.caucho.quercus.annotation.Optional; 35 import com.caucho.quercus.annotation.Reference; 36 import com.caucho.quercus.env.*; 37 import com.caucho.quercus.module.AbstractQuercusModule; 38 import com.caucho.util.L10N; 39 import com.caucho.util.LruCache; 40 41 import java.io.IOException ; 42 import java.util.ArrayList ; 43 import java.util.HashMap ; 44 import java.util.Iterator ; 45 import java.util.Map ; 46 import java.util.regex.Matcher ; 47 import java.util.regex.Pattern ; 48 49 52 public class RegexpModule 53 extends AbstractQuercusModule 54 { 55 private static final L10N L = new L10N(RegexpModule.class); 56 57 private static final int REGEXP_EVAL = 0x01; 58 59 public static final int PREG_PATTERN_ORDER = 0x01; 60 public static final int PREG_SET_ORDER = 0x02; 61 public static final int PREG_OFFSET_CAPTURE = 0x04; 62 63 public static final int PREG_SPLIT_NO_EMPTY = 0x01; 64 public static final int PREG_SPLIT_DELIM_CAPTURE = 0x02; 65 public static final int PREG_SPLIT_OFFSET_CAPTURE = 0x04; 66 67 public static final int PREG_GREP_INVERT = 1; 68 69 public static final boolean [] PREG_QUOTE = new boolean[256]; 70 71 private static final LruCache<StringValue, Pattern > _patternCache 72 = new LruCache<StringValue, Pattern >(1024); 73 74 private static final LruCache<StringValue, ArrayList <Replacement>> _replacementCache 75 = new LruCache<StringValue, ArrayList <Replacement>>(1024); 76 77 private static final HashMap <String , Value> _constMap 78 = new HashMap <String , Value>(); 79 80 public String []getLoadedExtensions() 81 { 82 return new String [] { "pcre" }; 83 } 84 85 90 public static Value ereg(Env env, 91 StringValue pattern, 92 StringValue string, 93 @Optional @Reference Value regsV) 94 { 95 return ereg(env, pattern, string, regsV, 0); 96 } 97 98 103 public static Value eregi(Env env, 104 StringValue pattern, 105 StringValue string, 106 @Optional @Reference Value regsV) 107 { 108 return ereg(env, pattern, string, regsV, Pattern.CASE_INSENSITIVE); 109 } 110 111 116 private static Value ereg(Env env, 117 StringValue rawPattern, 118 StringValue string, 119 Value regsV, 120 int flags) 121 { 122 String cleanPattern = cleanRegexp(rawPattern, false); 123 124 Pattern pattern = Pattern.compile(cleanPattern, flags); 125 Matcher matcher = pattern.matcher(string); 126 127 if (! (matcher.find())) { 128 return BooleanValue.FALSE; 129 } 130 131 if (regsV != null && ! (regsV instanceof NullValue)) { 132 ArrayValue regs = new ArrayValueImpl(); 133 regsV.set(regs); 134 135 regs.put(LongValue.ZERO, new StringValueImpl(matcher.group())); 136 int count = matcher.groupCount(); 137 138 for (int i = 1; i <= count; i++) { 139 String group = matcher.group(i); 140 141 Value value; 142 if (group == null) 143 value = BooleanValue.FALSE; 144 else 145 value = new StringValueImpl(group); 146 147 regs.put(new LongValue(i), value); 148 } 149 150 int len = matcher.end() - matcher.start(); 151 152 if (len == 0) 153 return LongValue.ONE; 154 else 155 return new LongValue(len); 156 } 157 else { 158 return LongValue.ONE; 159 } 160 } 161 162 173 public static int preg_match(Env env, 174 StringValue patternString, 175 StringValue string, 176 @Optional @Reference Value matchRef, 177 @Optional int flags, 178 @Optional int offset) 179 { 180 if (patternString.length() < 2) { 181 env.warning(L.l("Regexp pattern must have opening and closing delimiters")); 182 return 0; 183 } 184 185 Pattern pattern = compileRegexp(patternString); 186 Matcher matcher = pattern.matcher(string); 187 188 ArrayValue regs; 189 190 if (matchRef instanceof DefaultValue) 191 regs = null; 192 else 193 regs = new ArrayValueImpl(); 194 195 if (! (matcher.find(offset))) { 196 matchRef.set(regs); 197 return 0; 198 } 199 200 boolean isOffsetCapture = (flags & PREG_OFFSET_CAPTURE) != 0; 201 202 if (regs != null) { 203 if (isOffsetCapture) { 204 ArrayValueImpl part = new ArrayValueImpl(); 205 part.append(new StringValueImpl(matcher.group())); 206 part.append(new LongValue(matcher.start())); 207 208 regs.put(LongValue.ZERO, part); 209 } 210 else 211 regs.put(LongValue.ZERO, new StringValueImpl(matcher.group())); 212 213 int count = matcher.groupCount(); 214 215 for (int i = 1; i <= count; i++) { 216 String group = matcher.group(i); 217 218 if (group == null) 219 continue; 220 221 if (isOffsetCapture) { 222 for (int j = regs.getSize(); j < i; j++) { 225 ArrayValue part = new ArrayValueImpl(); 226 227 part.append(StringValue.EMPTY); 228 part.append(LongValue.MINUS_ONE); 229 230 regs.put(new LongValue(j), part); 231 } 232 233 ArrayValueImpl part = new ArrayValueImpl(); 234 part.append(new StringValueImpl(group)); 235 part.append(new LongValue(matcher.start(i))); 236 237 regs.put(new LongValue(i), part); 238 } 239 else { 240 for (int j = regs.getSize(); j < i; j++) { 243 regs.put(new LongValue(j), StringValue.EMPTY); 244 } 245 246 regs.put(new LongValue(i), new StringValueImpl(group)); 247 } 248 } 249 250 matchRef.set(regs); 251 } 252 253 return 1; 254 } 255 256 261 public static int preg_match_all(Env env, 262 StringValue patternString, 263 StringValue subject, 264 @Reference Value matchRef, 265 @Optional("PREG_PATTERN_ORDER") int flags, 266 @Optional int offset) 267 { 268 if (patternString.length() < 2) { 269 env.warning(L.l("Pattern must have at least opening and closing delimiters")); 270 return 0; 271 } 272 273 if (((flags & PREG_PATTERN_ORDER) != 0) && ((flags & PREG_SET_ORDER) != 0)) { 274 env.warning((L.l("Cannot combine PREG_PATTER_ORDER and PREG_SET_ORDER"))); 275 return 0; 276 } 277 278 Pattern pattern = compileRegexp(patternString); 279 280 ArrayValue matches; 281 282 if (matchRef instanceof ArrayValue) 283 matches = (ArrayValue) matchRef; 284 else 285 matches = new ArrayValueImpl(); 286 287 matches.clear(); 288 289 matchRef.set(matches); 290 291 if ((flags & PREG_PATTERN_ORDER) != 0) { 292 return pregMatchAllPatternOrder(env, pattern, subject, 293 matches, flags, offset); 294 } 295 else if ((flags & PREG_SET_ORDER) != 0) { 296 return pregMatchAllSetOrder(env, pattern, subject, 297 matches, flags, offset); 298 } 299 else 300 throw new UnsupportedOperationException (); 301 } 302 303 308 public static int pregMatchAllPatternOrder(Env env, 309 Pattern pattern, 310 StringValue subject, 311 ArrayValue matches, 312 int flags, 313 int offset) 314 { 315 Matcher matcher = pattern.matcher(subject); 316 317 int groupCount = matcher.groupCount(); 318 319 ArrayValue []matchList = new ArrayValue[groupCount + 1]; 320 321 for (int j = 0; j <= groupCount; j++) { 322 ArrayValue values = new ArrayValueImpl(); 323 matches.put(values); 324 matchList[j] = values; 325 } 326 327 if (! (matcher.find())) { 328 return 0; 329 } 330 331 int count = 0; 332 333 do { 334 count++; 335 336 for (int j = 0; j <= groupCount; j++) { 337 ArrayValue values = matchList[j]; 338 339 int start = matcher.start(j); 340 int end = matcher.end(j); 341 342 StringValue groupValue = subject.substring(start, end); 343 344 Value result = NullValue.NULL; 345 346 if (groupValue != null) { 347 if ((flags & PREG_OFFSET_CAPTURE) != 0) { 348 result = new ArrayValueImpl(); 349 result.put(groupValue); 350 result.put(LongValue.create(start)); 351 } else { 352 result = groupValue; 353 } 354 } 355 356 values.put(result); 357 } 358 } while (matcher.find()); 359 360 return count; 361 } 362 363 368 private static int pregMatchAllSetOrder(Env env, 369 Pattern pattern, 370 StringValue subject, 371 ArrayValue matches, 372 int flags, 373 int offset) 374 { 375 Matcher matcher = pattern.matcher(subject); 376 377 if (! (matcher.find())) { 378 return 0; 379 } 380 381 int count = 0; 382 383 do { 384 count++; 385 386 ArrayValue matchResult = new ArrayValueImpl(); 387 matches.put(matchResult); 388 389 for (int j = 0; j <= matcher.groupCount(); j++) { 390 int start = matcher.start(j); 391 int end = matcher.end(j); 392 393 StringValue groupValue = subject.substring(start, end); 394 395 Value result = NullValue.NULL; 396 397 if (groupValue != null) { 398 if ((flags & PREG_OFFSET_CAPTURE) != 0) { 399 result = new ArrayValueImpl(); 400 result.put(groupValue); 401 result.put(LongValue.create(start)); 402 } else { 403 result = groupValue; 404 } 405 } 406 matchResult.put(result); 407 } 408 } while (matcher.find()); 409 410 return count; 411 } 412 413 416 public static String preg_quote(String string, 417 @Optional String delim) 418 { 419 StringBuilder sb = new StringBuilder (); 420 421 boolean []extra = null; 422 423 if (delim != null && ! delim.equals("")) { 424 extra = new boolean[256]; 425 426 for (int i = 0; i < delim.length(); i++) 427 extra[delim.charAt(i)] = true; 428 } 429 430 int length = string.length(); 431 for (int i = 0; i < length; i++) { 432 char ch = string.charAt(i); 433 434 if (ch >= 256) 435 sb.append(ch); 436 else if (PREG_QUOTE[ch]) { 437 sb.append('\\'); 438 sb.append(ch); 439 } 440 else if (extra != null && extra[ch]) { 441 sb.append('\\'); 442 sb.append(ch); 443 } 444 else 445 sb.append(ch); 446 } 447 448 return sb.toString(); 449 } 450 451 462 public static Value preg_replace(Env env, 463 Value pattern, 464 Value replacement, 465 Value subject, 466 @Optional("-1") long limit, 467 @Optional @Reference Value count) 468 { 469 if (subject instanceof ArrayValue) { 470 ArrayValue result = new ArrayValueImpl(); 471 472 for (Value value : ((ArrayValue) subject).values()) { 473 result.put(pregReplace(env, 474 pattern, 475 replacement, 476 value.toStringValue(), 477 limit, 478 count)); 479 } 480 481 return result; 482 483 } 484 else if (subject.isset()) { 485 return pregReplace(env, pattern, replacement, subject.toStringValue(), 486 limit, count); 487 } else 488 return StringValue.EMPTY; 489 490 } 491 492 495 private static Value pregReplace(Env env, 496 Value patternValue, 497 Value replacement, 498 StringValue subject, 499 @Optional("-1") long limit, 500 Value countV) 501 { 502 StringValue string = subject; 503 504 if (limit < 0) 505 limit = Long.MAX_VALUE; 506 507 if (patternValue.isArray() && replacement.isArray()) { 508 ArrayValue patternArray = (ArrayValue) patternValue; 509 ArrayValue replacementArray = (ArrayValue) replacement; 510 511 Iterator <Value> patternIter = patternArray.values().iterator(); 512 Iterator <Value> replacementIter = replacementArray.values().iterator(); 513 514 while (patternIter.hasNext() && replacementIter.hasNext()) { 515 string = pregReplaceString(env, 516 patternIter.next().toStringValue(), 517 replacementIter.next().toStringValue(), 518 string, 519 limit, 520 countV); 521 } 522 } else if (patternValue.isArray()) { 523 ArrayValue patternArray = (ArrayValue) patternValue; 524 525 for (Value value : patternArray.values()) { 526 string = pregReplaceString(env, 527 value.toStringValue(), 528 replacement.toStringValue(), 529 string, 530 limit, 531 countV); 532 } 533 } else { 534 return pregReplaceString(env, 535 patternValue.toStringValue(), 536 replacement.toStringValue(), 537 string, 538 limit, 539 countV); 540 } 541 542 return string; 543 } 544 545 555 private static StringValue pregReplaceCallbackImpl(Env env, 556 StringValue patternString, 557 Callback fun, 558 StringValue subject, 559 long limit, 560 Value countV) 561 { 562 563 long numberOfMatches = 0; 564 565 if (limit < 0) 566 limit = Long.MAX_VALUE; 567 568 Pattern pattern = compileRegexp(patternString); 569 570 Matcher matcher = pattern.matcher(subject); 571 572 StringBuilderValue result = new StringBuilderValue(); 573 int tail = 0; 574 575 while (matcher.find() && numberOfMatches < limit) { 576 if ((countV != null) && (countV instanceof Var)) { 578 long count = ((Var) countV).getRawValue().toLong(); 579 countV.set(LongValue.create(count + 1)); 580 } 581 582 if (tail < matcher.start()) 583 result.append(subject.substring(tail, matcher.start())); 584 585 ArrayValue regs = new ArrayValueImpl(); 586 587 for (int i = 0; i <= matcher.groupCount(); i++) { 588 String group = matcher.group(i); 589 590 if (group != null) 591 regs.put(new StringValueImpl(group)); 592 else 593 regs.put(StringValue.EMPTY); 594 } 595 596 Value replacement = fun.call(env, regs); 597 598 result.append(replacement); 599 600 tail = matcher.end(); 601 602 numberOfMatches++; 603 } 604 605 if (tail < subject.length()) 606 result.append(subject.substring(tail)); 607 608 return result; 609 } 610 611 614 private static StringValue pregReplaceString(Env env, 615 StringValue patternString, 616 StringValue replacement, 617 StringValue subject, 618 long limit, 619 Value countV) 620 { 621 Pattern pattern = compileRegexp(patternString); 622 623 int patternFlags = regexpFlags(patternString); 625 boolean isEval = (patternFlags & REGEXP_EVAL) != 0; 626 627 ArrayList <Replacement> replacementProgram 628 = _replacementCache.get(replacement); 629 630 if (replacementProgram == null) { 631 replacementProgram = compileReplacement(env, replacement, isEval); 632 _replacementCache.put(replacement, replacementProgram); 633 } 634 635 return pregReplaceStringImpl(env, 636 pattern, 637 replacementProgram, 638 subject, 639 limit, 640 countV, 641 isEval); 642 } 643 644 647 public static Value ereg_replace(Env env, 648 StringValue patternString, 649 StringValue replacement, 650 StringValue subject) 651 { 652 Pattern pattern = Pattern.compile(cleanRegexp(patternString, false)); 653 654 ArrayList <Replacement> replacementProgram 655 = _replacementCache.get(replacement); 656 657 if (replacementProgram == null) { 658 replacementProgram = compileReplacement(env, replacement, false); 659 _replacementCache.put(replacement, replacementProgram); 660 } 661 662 return pregReplaceStringImpl(env, 663 pattern, 664 replacementProgram, 665 subject, 666 -1, 667 NullValue.NULL, 668 false); 669 } 670 671 674 public static Value eregi_replace(Env env, 675 StringValue patternString, 676 StringValue replacement, 677 StringValue subject) 678 { 679 Pattern pattern = Pattern.compile(cleanRegexp(patternString, false), 680 Pattern.CASE_INSENSITIVE); 681 682 ArrayList <Replacement> replacementProgram 683 = _replacementCache.get(replacement); 684 685 if (replacementProgram == null) { 686 replacementProgram = compileReplacement(env, replacement, false); 687 _replacementCache.put(replacement, replacementProgram); 688 } 689 690 return pregReplaceStringImpl(env, pattern, replacementProgram, 691 subject, -1, NullValue.NULL, false); 692 } 693 694 697 private static StringValue pregReplaceStringImpl(Env env, 698 Pattern pattern, 699 ArrayList <Replacement> replacementList, 700 StringValue subject, 701 long limit, 702 Value countV, 703 boolean isEval) 704 { 705 if (limit < 0) 706 limit = Long.MAX_VALUE; 707 708 int length = subject.length(); 709 710 Matcher matcher = pattern.matcher(subject); 711 712 StringBuilderValue result = null; 713 int tail = 0; 714 715 int replacementLen = replacementList.size(); 716 717 while (matcher.find() && limit-- > 0) { 718 if (result == null) 719 result = new StringBuilderValue(); 720 721 if ((countV != null) && (countV instanceof Var)) { 723 countV.set(LongValue.create(countV.toLong() + 1)); 724 } 725 726 if (tail < matcher.start()) 728 result.append(subject, tail, matcher.start()); 729 730 if (isEval) { 733 StringBuilderValue evalString = new StringBuilderValue(); 734 735 for (int i = 0; i < replacementLen; i++) { 736 Replacement replacement = replacementList.get(i); 737 738 replacement.eval(evalString, subject, matcher); 739 } 740 741 try { 742 result.append(env.evalCode(evalString.toString())); 743 } catch (IOException e) { 744 throw new QuercusException(e); 745 } 746 } else { 747 for (int i = 0; i < replacementLen; i++) { 748 Replacement replacement = replacementList.get(i); 749 750 replacement.eval(result, subject, matcher); 751 } 752 } 753 754 tail = matcher.end(); 755 } 756 757 if (result == null) 758 return subject; 759 760 if (tail < length) 761 result.append(subject, tail, length); 762 763 return result; 764 } 765 766 777 public static Value preg_replace_callback(Env env, 778 Value pattern, 779 Callback fun, 780 Value subject, 781 @Optional("-1") long limit, 782 @Optional @Reference Value count) 783 { 784 if (subject instanceof ArrayValue) { 785 ArrayValue result = new ArrayValueImpl(); 786 787 for (Value value : ((ArrayValue) subject).values()) { 788 result.put(pregReplaceCallback(env, 789 pattern.toStringValue(), 790 fun, 791 value.toStringValue(), 792 limit, 793 count)); 794 } 795 796 return result; 797 798 } else if (subject instanceof StringValue) { 799 return pregReplaceCallback(env, 800 pattern.toStringValue(), 801 fun, 802 subject.toStringValue(), 803 limit, 804 count); 805 } else { 806 return NullValue.NULL; 807 } 808 } 809 810 813 private static Value pregReplaceCallback(Env env, 814 Value patternValue, 815 Callback fun, 816 StringValue subject, 817 @Optional("-1") long limit, 818 @Optional @Reference Value countV) 819 { 820 if (limit < 0) 821 limit = Long.MAX_VALUE; 822 823 if (patternValue.isArray()) { 824 ArrayValue patternArray = (ArrayValue) patternValue; 825 826 for (Value value : patternArray.values()) { 827 subject = pregReplaceCallbackImpl(env, 828 value.toStringValue(), 829 fun, 830 subject, 831 limit, 832 countV); 833 } 834 835 return subject; 836 837 } else if (patternValue instanceof StringValue) { 838 return pregReplaceCallbackImpl(env, 839 patternValue.toStringValue(), 840 fun, 841 subject, 842 limit, 843 countV); 844 } else { 845 return NullValue.NULL; 846 } 847 } 848 849 856 public static Value preg_split(Env env, 857 StringValue patternString, 858 StringValue string, 859 @Optional("-1") long limit, 860 @Optional int flags) 861 { 862 if (limit <= 0) 863 limit = Long.MAX_VALUE; 864 865 Pattern pattern = compileRegexp(patternString); 866 Matcher matcher = pattern.matcher(string); 867 868 ArrayValue result = new ArrayValueImpl(); 869 870 int head = 0; 871 long count = 0; 872 873 boolean allowEmpty = (flags & PREG_SPLIT_NO_EMPTY) == 0; 874 boolean isCaptureOffset = (flags & PREG_SPLIT_OFFSET_CAPTURE) != 0; 875 boolean isCaptureDelim = (flags & PREG_SPLIT_DELIM_CAPTURE) != 0; 876 877 while (matcher.find()) { 878 int startPosition = head; 879 StringValue unmatched; 880 881 if (count == limit - 1) { 883 unmatched = string.substring(head); 884 head = string.length(); 885 } 886 else { 887 unmatched = string.substring(head, matcher.start()); 888 head = matcher.end(); 889 } 890 891 if (unmatched.length() != 0 || allowEmpty) { 893 if (isCaptureOffset) { 894 ArrayValue part = new ArrayValueImpl(); 895 896 part.put(unmatched); 897 part.put(LongValue.create(startPosition)); 898 899 result.put(part); 900 } 901 else { 902 result.put(unmatched); 903 } 904 905 count++; 906 } 907 908 if (count == limit) 909 break; 910 911 if (isCaptureDelim) { 913 for (int i = 1; i <= matcher.groupCount(); i++) { 914 int start = matcher.start(i); 915 int end = matcher.end(i); 916 917 if ((start != -1 && end - start > 0) || allowEmpty) { 918 919 StringValue groupValue; 920 if (start < 0) 921 groupValue = StringValue.EMPTY; 922 else 923 groupValue = string.substring(start, end); 924 925 if (isCaptureOffset) { 926 ArrayValue part = new ArrayValueImpl(); 927 928 part.put(groupValue); 929 part.put(LongValue.create(startPosition)); 930 931 result.put(part); 932 } 933 else 934 result.put(groupValue); 935 } 936 } 937 } 938 } 939 940 if (count < limit && (head < string.length() || allowEmpty)) { 942 if (isCaptureOffset) { 943 ArrayValue part = new ArrayValueImpl(); 944 945 part.put(string.substring(head)); 946 part.put(LongValue.create(head)); 947 948 result.put(part); 949 } 950 else 951 result.put(string.substring(head)); 952 } 953 954 return result; 955 956 1027 } 1028 1029 1032 public static String sql_regcase(String string) 1033 { 1034 StringBuilder sb = new StringBuilder (); 1035 1036 int len = string.length(); 1037 for (int i = 0; i < len; i++) { 1038 char ch = string.charAt(i); 1039 1040 if (Character.isLowerCase(ch)) { 1041 sb.append('['); 1042 sb.append(Character.toUpperCase(ch)); 1043 sb.append(ch); 1044 sb.append(']'); 1045 } 1046 else if (Character.isUpperCase(ch)) { 1047 sb.append('['); 1048 sb.append(ch); 1049 sb.append(Character.toLowerCase(ch)); 1050 sb.append(']'); 1051 } 1052 else 1053 sb.append(ch); 1054 } 1055 1056 return sb.toString(); 1057 } 1058 1059 1064 public static Value split(Env env, 1065 StringValue patternString, 1066 StringValue string, 1067 @Optional("-1") long limit) 1068 { 1069 if (limit < 0) 1070 limit = Long.MAX_VALUE; 1071 1072 String cleanRegexp = cleanRegexp(patternString, false); 1073 1074 Pattern pattern = Pattern.compile(cleanRegexp); 1075 1076 ArrayValue result = new ArrayValueImpl(); 1077 1078 Matcher matcher = pattern.matcher(string); 1079 long count = 0; 1080 int head = 0; 1081 1082 while ((matcher.find()) && (count < limit)) { 1083 StringValue value; 1084 if (count == limit - 1) { 1085 value = string.substring(head); 1086 head = string.length(); 1087 } else { 1088 value = string.substring(head, matcher.start()); 1089 head = matcher.end(); 1090 } 1091 1092 result.put(value); 1093 1094 count++; 1095 } 1096 1097 if ((head <= string.length() && (count != limit))) { 1098 result.put(string.substring(head)); 1099 } 1100 1101 return result; 1102 } 1103 1104 1114 public static ArrayValue preg_grep(Env env, 1115 StringValue patternString, 1116 ArrayValue input, 1117 @Optional("0") int flag) 1118 { 1119 1121 Pattern pattern = compileRegexp(patternString); 1122 1123 Matcher matcher = null; 1124 1125 ArrayValue matchArray = new ArrayValueImpl(); 1126 1127 for (Map.Entry <Value, Value> entry : input.entrySet()) { 1128 Value entryValue = entry.getValue(); 1129 Value entryKey = entry.getKey(); 1130 1131 matcher = pattern.matcher(entryValue.toString()); 1132 1133 boolean found = matcher.find(); 1134 1135 if (!found && (flag == PREG_GREP_INVERT)) 1136 matchArray.append(entryKey, entryValue); 1137 else if (found && (flag != PREG_GREP_INVERT)) 1138 matchArray.append(entryKey, entryValue); 1139 } 1140 1141 return matchArray; 1142 } 1143 1144 1153 public static ArrayValue spliti(Env env, 1154 StringValue patternString, 1155 StringValue string, 1156 @Optional("-1") long limit) 1157 { 1158 if (limit < 0) 1159 limit = Long.MAX_VALUE; 1160 1161 1163 String cleanRegexp = cleanRegexp(patternString, false); 1164 1165 Pattern pattern = Pattern.compile(cleanRegexp, Pattern.CASE_INSENSITIVE); 1166 1167 ArrayValue result = new ArrayValueImpl(); 1168 1169 Matcher matcher = pattern.matcher(string); 1170 long count = 0; 1171 int head = 0; 1172 1173 while ((matcher.find()) && (count < limit)) { 1174 StringValue value; 1175 if (count == limit - 1) { 1176 value = string.substring(head); 1177 head = string.length(); 1178 } else { 1179 value = string.substring(head, matcher.start()); 1180 head = matcher.end(); 1181 } 1182 1183 result.put(value); 1184 1185 count++; 1186 } 1187 1188 if ((head <= string.length()) && (count != limit)) { 1189 result.put(string.substring(head)); 1190 } 1191 1192 return result; 1193 } 1194 1195 private static Pattern compileRegexp(StringValue rawRegexp) 1196 { 1197 Pattern pattern = _patternCache.get(rawRegexp); 1198 1199 if (pattern != null) 1200 return pattern; 1201 1202 if (rawRegexp.length() < 2) { 1203 throw new IllegalStateException (L.l( 1204 "Can't find delimiters in regexp '{0}'.", 1205 rawRegexp)); 1206 } 1207 1208 char delim = rawRegexp.charAt(0); 1209 1210 if (delim == '{') 1211 delim = '}'; 1212 else if (delim == '[') 1213 delim = ']'; 1214 else if (delim == '(') 1215 delim = ')'; 1216 1217 int tail = rawRegexp.lastIndexOf(delim); 1218 1219 if (tail <= 0) 1220 throw new IllegalStateException (L.l( 1221 "Can't find second {0} in regexp '{1}'.", 1222 String.valueOf((char) delim), 1223 rawRegexp)); 1224 1225 int len = rawRegexp.length(); 1226 1227 int flags = 0; 1228 boolean isExt = false; 1229 boolean isGreedy = true; 1230 1231 for (int i = tail + 1; i < len; i++) { 1232 char ch = rawRegexp.charAt(i); 1233 1234 switch (ch) { 1235 case 'i': 1236 flags |= Pattern.CASE_INSENSITIVE; 1237 break; 1238 case 's': 1239 flags |= Pattern.DOTALL; 1240 break; 1241 case 'x': 1242 flags |= Pattern.COMMENTS; 1243 break; 1244 case 'm': 1245 flags |= Pattern.MULTILINE; 1246 break; 1247 case 'U': 1248 isGreedy = false; 1249 break; 1250 } 1251 } 1252 1253 StringValue regexp = rawRegexp.substring(1, tail); 1254 1255 String cleanRegexp = cleanRegexp(regexp, (flags & Pattern.COMMENTS) != 0); 1256 1257 if (! isGreedy) 1258 cleanRegexp = toNonGreedy(cleanRegexp); 1259 1260 pattern = Pattern.compile(cleanRegexp, flags); 1261 1262 _patternCache.put(rawRegexp, pattern); 1263 1264 return pattern; 1265 } 1266 1267 private static int regexpFlags(StringValue rawRegexp) 1268 { 1269 char delim = rawRegexp.charAt(0); 1270 if (delim == '{') 1271 delim = '}'; 1272 else if (delim == '[') 1273 delim = ']'; 1274 1275 int tail = rawRegexp.lastIndexOf(delim); 1276 1277 if (tail <= 0) 1278 throw new IllegalStateException (L.l( 1279 "Can't find second {0} in regexp '{1}'.", 1280 String.valueOf((char) delim), 1281 rawRegexp)); 1282 1283 int len = rawRegexp.length(); 1284 1285 int flags = 0; 1286 1287 for (int i = tail + 1; i < len; i++) { 1288 char ch = rawRegexp.charAt(i); 1289 1290 switch (ch) { 1291 case 'e': 1292 flags |= REGEXP_EVAL; 1293 break; 1294 } 1295 } 1296 1297 return flags; 1298 } 1299 1300 private static ArrayList <Replacement> 1301 compileReplacement(Env env, StringValue replacement, boolean isEval) 1302 { 1303 ArrayList <Replacement> program = new ArrayList <Replacement>(); 1304 StringBuilder text = new StringBuilder (); 1305 1306 for (int i = 0; i < replacement.length(); i++) { 1307 char ch = replacement.charAt(i); 1308 1309 if ((ch == '\\' || ch == '$') && i + 1 < replacement.length()) { 1310 char digit; 1311 1312 if ('0' <= (digit = replacement.charAt(i + 1)) && digit <= '9') { 1313 int group = digit - '0'; 1314 i++; 1315 1316 if (i + 1 < replacement.length() && 1317 '0' <= (digit = replacement.charAt(i + 1)) && digit <= '9') { 1318 group = 10 * group + digit - '0'; 1319 i++; 1320 } 1321 1322 if (text.length() > 0) 1323 program.add(new TextReplacement(text)); 1324 1325 if (isEval) 1326 program.add(new GroupEscapeReplacement(group)); 1327 else 1328 program.add(new GroupReplacement(group)); 1329 1330 text.setLength(0); 1331 } 1332 else if (ch == '\\') { 1333 i++; 1334 1335 if (digit != '\\') { 1336 text.append('\\'); 1337 } 1338 text.append(digit); 1339 } else if (digit == '{') { 1342 i += 2; 1343 1344 int group = 0; 1345 1346 while (i < replacement.length() && 1347 '0' <= (digit = replacement.charAt(i)) && digit <= '9') { 1348 group = 10 * group + digit - '0'; 1349 1350 i++; 1351 } 1352 1353 if (digit != '}') { 1354 env.warning(L.l("bad regexp {0}", replacement)); 1355 throw new QuercusException("bad regexp"); 1356 } 1357 1358 if (text.length() > 0) 1359 program.add(new TextReplacement(text)); 1360 1361 if (isEval) 1362 program.add(new GroupEscapeReplacement(group)); 1363 else 1364 program.add(new GroupReplacement(group)); 1365 1366 text.setLength(0); 1367 } 1368 else 1369 text.append(ch); 1370 } 1371 else 1372 text.append(ch); 1373 } 1374 1375 if (text.length() > 0) 1376 program.add(new TextReplacement(text)); 1377 1378 return program; 1379 } 1380 1381 private static final String [] POSIX_CLASSES = { 1382 "[:alnum:]", "[:alpha:]", "[:blank:]", "[:cntrl:]", 1383 "[:digit:]", "[:graph:]", "[:lower:]", "[:print:]", 1384 "[:punct:]", "[:space:]", "[:upper:]", "[:xdigit:]" 1385 }; 1386 1387 private static final String [] REGEXP_CLASSES = { 1388 "\\p{Alnum}", "\\p{Alpha}", "\\p{Blank}", "\\p{Cntrl}", 1389 "\\p{Digit}", "\\p{Graph}", "\\p{Lower}", "\\p{Print}", 1390 "\\p{Punct}", "\\p{Space}", "\\p{Upper}", "\\p{XDigit}" 1391 }; 1392 1393 1397 private static String cleanRegexp(StringValue regexp, boolean isComments) 1399 { 1400 int len = regexp.length(); 1401 1402 StringBuilder sb = new StringBuilder (); 1403 char quote = 0; 1404 1405 for (int i = 0; i < len; i++) { 1406 char ch = regexp.charAt(i); 1407 1408 switch (ch) { 1409 case '\\': 1410 sb.append(ch); 1411 1412 if (i + 1 < len) { 1413 i++; 1414 1415 ch = regexp.charAt(i); 1416 1417 if (ch == '0' || 1418 '1' <= ch && ch <= '3' && i + 1 < len && '0' <= regexp.charAt(i + 1) && ch <= '7') { 1419 1421 sb.append('0'); 1423 sb.append(ch); 1424 } 1425 else if (ch == 'x' && i + 1 < len && regexp.charAt(i + 1) == '{') { 1426 int tail = regexp.indexOf('}', i + 1); 1427 1428 if (tail > 0) { 1429 StringValue hex = regexp.substring(i + 2, tail); 1430 1431 if (hex.length() == 1) 1432 sb.append("x0" + hex); 1433 else if (hex.length() == 2) 1434 sb.append("x" + hex); 1435 else if (hex.length() == 4) 1436 sb.append("u" + hex); 1437 else 1438 throw new QuercusRuntimeException(L.l("illegal hex escape")); 1439 1440 i = tail; 1441 } 1442 else { 1443 sb.append("\\x"); 1444 } 1445 } 1446 else 1447 sb.append(ch); 1448 } 1449 break; 1450 1451 case '[': 1452 if (quote == '[') { 1453 if (i + 1 < len && regexp.charAt(i + 1) == ':') { 1454 String test = regexp.substring(i).toString(); 1455 boolean hasMatch = false; 1456 1457 for (int j = 0; j < POSIX_CLASSES.length; j++) { 1458 if (test.startsWith(POSIX_CLASSES[j])) { 1459 hasMatch = true; 1460 1461 sb.append(REGEXP_CLASSES[j]); 1462 1463 i += POSIX_CLASSES[j].length() - 1; 1464 } 1465 } 1466 1467 if (! hasMatch) 1468 sb.append("\\["); 1469 } 1470 else 1471 sb.append("\\["); 1472 } 1473 else if (i + 1 < len && regexp.charAt(i + 1) == '[' 1474 && ! (i + 2 < len && regexp.charAt(i + 2) == ':')) { 1475 sb.append("[\\["); 1478 i += 1; 1479 } 1480 else if (i + 2 < len && 1481 regexp.charAt(i + 1) == '^' && 1482 regexp.charAt(i + 2) == ']') { 1483 sb.append("[^\\]"); 1484 i += 2; 1485 } 1486 else 1487 sb.append('['); 1488 1489 if (quote == 0) 1490 quote = '['; 1491 break; 1492 1493 case '#': 1494 if (quote == '[' && isComments) 1495 sb.append("\\#"); 1496 else 1497 sb.append(ch); 1498 break; 1499 1500 case ']': 1501 sb.append(ch); 1502 1503 if (quote == '[') 1504 quote = 0; 1505 break; 1506 1507 case '{': 1508 if (i + 1 < len && 1509 ('0' <= (ch = regexp.charAt(i + 1)) && ch <= '9' || ch == ',')) { 1510 sb.append("{"); 1511 for (i++; 1512 i < len && 1513 ('0' <= (ch = regexp.charAt(i)) && ch <= '9' || ch == ','); 1514 i++) { 1515 sb.append(ch); 1516 } 1517 1518 if (i < len) 1519 sb.append(regexp.charAt(i)); 1520 } 1521 else { 1522 sb.append("\\{"); 1523 } 1524 break; 1525 1526 case '}': 1527 sb.append("\\}"); 1528 break; 1529 1530 default: 1531 sb.append(ch); 1532 } 1533 } 1534 1535 return sb.toString(); 1536 } 1537 1538 1541 private static String toNonGreedy(String regexp) 1542 { 1543 int len = regexp.length(); 1544 1545 StringBuilder sb = new StringBuilder (); 1546 char quote = 0; 1547 1548 for (int i = 0; i < len; i++) { 1549 char ch = regexp.charAt(i); 1550 1551 switch (ch) { 1552 case '\\': 1553 sb.append(ch); 1554 1555 if (i + 1 < len) { 1556 sb.append(regexp.charAt(i + 1)); 1557 i++; 1558 } 1559 break; 1560 1561 case '[': 1562 sb.append(ch); 1563 1564 if (quote == 0) 1565 quote = ch; 1566 break; 1567 1568 case ']': 1569 sb.append(ch); 1570 1571 if (quote == '[') 1572 quote = 0; 1573 break; 1574 1575 case '(': 1577 sb.append(ch); 1578 1579 if (i + 1 < len) { 1580 ch = regexp.charAt(i + 1); 1581 1582 if (ch == '?') { 1583 sb.append(ch); 1584 i++; 1585 } 1586 } 1587 break; 1588 1589 case '*': 1590 case '?': 1591 case '+': 1592 sb.append(ch); 1593 1594 if (i + 1 < len && (ch = regexp.charAt(i + 1)) != '?') { 1595 sb.append('?'); 1596 } 1597 else { 1598 i++; 1600 } 1601 break; 1602 1603 default: 1604 sb.append(ch); 1605 } 1606 } 1607 1608 return sb.toString(); 1609 } 1610 1611 static class Replacement { 1612 void eval(StringBuilderValue sb, StringValue subject, Matcher matcher) 1613 { 1614 } 1615 } 1616 1617 static class TextReplacement 1618 extends Replacement 1619 { 1620 private char []_text; 1621 1622 TextReplacement(StringBuilder text) 1623 { 1624 int length = text.length(); 1625 1626 _text = new char[length]; 1627 1628 text.getChars(0, length, _text, 0); 1629 } 1630 1631 void eval(StringBuilderValue sb, StringValue subject, Matcher matcher) 1632 { 1633 sb.append(_text, 0, _text.length); 1634 } 1635 } 1636 1637 static class GroupReplacement 1638 extends Replacement 1639 { 1640 private int _group; 1641 1642 GroupReplacement(int group) 1643 { 1644 _group = group; 1645 } 1646 1647 void eval(StringBuilderValue sb, StringValue subject, Matcher matcher) 1648 { 1649 if (_group <= matcher.groupCount()) 1650 sb.append(subject.substring(matcher.start(_group), 1651 matcher.end(_group))); 1652 } 1653 } 1654 1655 static class GroupEscapeReplacement 1656 extends Replacement 1657 { 1658 private int _group; 1659 1660 GroupEscapeReplacement(int group) 1661 { 1662 _group = group; 1663 } 1664 1665 void eval(StringBuilderValue sb, StringValue subject, Matcher matcher) 1666 { 1667 if (_group <= matcher.groupCount()) { 1668 StringValue group = subject.substring(matcher.start(_group), 1669 matcher.end(_group));; 1670 int len = group.length(); 1671 1672 for (int i = 0; i < len; i++) { 1673 char ch = group.charAt(i); 1674 1675 if (ch == '\'') 1676 sb.append("\\\'"); 1677 else if (ch == '\"') 1678 sb.append("\\\""); 1679 else 1680 sb.append(ch); 1681 } 1682 } 1683 } 1684 } 1685 1686 static { 1687 PREG_QUOTE['\\'] = true; 1688 PREG_QUOTE['+'] = true; 1689 PREG_QUOTE['*'] = true; 1690 PREG_QUOTE['?'] = true; 1691 PREG_QUOTE['['] = true; 1692 PREG_QUOTE['^'] = true; 1693 PREG_QUOTE[']'] = true; 1694 PREG_QUOTE['$'] = true; 1695 PREG_QUOTE['('] = true; 1696 PREG_QUOTE[')'] = true; 1697 PREG_QUOTE['{'] = true; 1698 PREG_QUOTE['}'] = true; 1699 PREG_QUOTE['='] = true; 1700 PREG_QUOTE['!'] = true; 1701 PREG_QUOTE['<'] = true; 1702 PREG_QUOTE['>'] = true; 1703 PREG_QUOTE['|'] = true; 1704 PREG_QUOTE[':'] = true; 1705 } 1706} 1707 | Popular Tags |