1 package org.apache.oro.text.perl; 2 3 59 60 import java.util.*; 61 import org.apache.oro.text.regex.*; 62 import org.apache.oro.text.*; 63 import org.apache.oro.util.*; 64 65 158 public final class Perl5Util implements MatchResult { 159 160 private static final String __matchExpression = "m?(\\W)(.*)\\1([imsx]*)"; 161 162 163 private PatternCache __patternCache; 164 165 private Cache __expressionCache; 166 167 private Perl5Matcher __matcher = new Perl5Matcher(); 168 169 private Pattern __matchPattern; 170 171 private MatchResult __lastMatch; 172 173 178 private Object __originalInput; 179 180 184 private int __inputBeginOffset, __inputEndOffset; 185 186 187 private static final String __nullString = ""; 188 189 193 public static final int SPLIT_ALL = Util.SPLIT_ALL; 194 195 214 public Perl5Util(PatternCache cache) { 215 __patternCache = cache; 216 __expressionCache = new CacheLRU(cache.capacity()); 217 __compilePatterns(); 218 } 219 220 227 public Perl5Util() { 228 this(new PatternCacheLRU()); 229 } 230 231 235 private void __compilePatterns() { 236 Perl5Compiler compiler = new Perl5Compiler(); 237 238 try { 239 __matchPattern = 240 compiler.compile(__matchExpression, Perl5Compiler.SINGLELINE_MASK); 241 } catch(MalformedPatternException e) { 242 throw new RuntimeException (e.getMessage()); 245 } 246 } 247 248 259 private Pattern __parseMatchExpression(String pattern) 260 throws MalformedPerl5PatternException 261 { 262 int index, compileOptions; 263 String options, regex; 264 MatchResult result; 265 Object obj; 266 Pattern ret; 267 268 obj = __expressionCache.getElement(pattern); 269 270 try { 274 if(obj != null) 275 return (Pattern)obj; 276 } catch(ClassCastException e) { 277 } 279 280 if(!__matcher.matches(pattern, __matchPattern)) 281 throw new 282 MalformedPerl5PatternException("Invalid expression: " + 283 pattern); 284 285 result = __matcher.getMatch(); 286 287 regex = result.group(2); 288 compileOptions = Perl5Compiler.DEFAULT_MASK; 289 290 options = result.group(3); 291 292 if(options != null) { 293 index = options.length(); 294 295 while(index-- > 0) { 296 switch(options.charAt(index)) { 297 case 'i' : 298 compileOptions |= Perl5Compiler.CASE_INSENSITIVE_MASK; 299 break; 300 case 'm' : compileOptions |= Perl5Compiler.MULTILINE_MASK; break; 301 case 's' : compileOptions |= Perl5Compiler.SINGLELINE_MASK; break; 302 case 'x' : compileOptions |= Perl5Compiler.EXTENDED_MASK; break; 303 default : 304 throw new 305 MalformedPerl5PatternException("Invalid options: " + options); 306 } 307 } 308 } 309 310 ret = __patternCache.getPattern(regex, compileOptions); 311 __expressionCache.addElement(pattern, ret); 312 313 return ret; 314 } 315 316 347 public synchronized boolean match(String pattern, char[] input) 348 throws MalformedPerl5PatternException 349 { 350 boolean result; 351 __parseMatchExpression(pattern); 352 353 result = __matcher.contains(input, __parseMatchExpression(pattern)); 354 355 if(result) { 356 __lastMatch = __matcher.getMatch(); 357 __originalInput = input; 358 __inputBeginOffset = 0; 359 __inputEndOffset = input.length; 360 } 361 362 return result; 363 } 364 365 366 397 public synchronized boolean match(String pattern, String input) 398 throws MalformedPerl5PatternException 399 { 400 return match(pattern, input.toCharArray()); 401 } 402 403 404 440 public synchronized boolean match(String pattern, PatternMatcherInput input) 441 throws MalformedPerl5PatternException 442 { 443 boolean result; 444 445 result = __matcher.contains(input, __parseMatchExpression(pattern)); 446 447 if(result) { 448 __lastMatch = __matcher.getMatch(); 449 __originalInput = input.getInput(); 450 __inputBeginOffset = input.getBeginOffset(); 451 __inputEndOffset = input.getEndOffset(); 452 } 453 454 return result; 455 } 456 457 458 469 public synchronized MatchResult getMatch() { 470 return __lastMatch; 471 } 472 473 474 545 public synchronized String substitute(String expression, String input) 548 throws MalformedPerl5PatternException 549 { 550 boolean backslash, finalDelimiter; 551 int index, compileOptions, numSubstitutions, numInterpolations; 552 int firstOffset, secondOffset, thirdOffset; 553 String result; 554 StringBuffer replacement; 555 Pattern compiledPattern; 556 char exp[], delimiter; 557 ParsedSubstitutionEntry entry; 558 Perl5Substitution substitution; 559 Object obj; 560 561 obj = __expressionCache.getElement(expression); 562 563 __nullTest: 564 if(obj != null) { 565 try { 570 entry = (ParsedSubstitutionEntry)obj; 571 } catch(ClassCastException e) { 572 break __nullTest; 573 } 574 575 result = Util.substitute(__matcher, entry._pattern, entry._substitution, 576 input, entry._numSubstitutions); 577 578 __lastMatch = __matcher.getMatch(); 579 580 return result; 581 } 582 583 exp = expression.toCharArray(); 584 585 if(exp.length < 4 || exp[0] != 's' || Character.isLetterOrDigit(exp[1]) 587 || exp[1] == '-') 588 throw new 589 MalformedPerl5PatternException("Invalid expression: " + expression); 590 delimiter = exp[1]; 591 firstOffset = 2; 592 secondOffset = thirdOffset = -1; 593 backslash = false; 594 595 for(index = firstOffset; index < exp.length; index++) { 597 if(exp[index] == '\\') 598 backslash = !backslash; 599 else if(exp[index] == delimiter && !backslash) { 600 secondOffset = index; 601 break; 602 } else if(backslash) 603 backslash = !backslash; 604 } 605 606 if(secondOffset == -1 || secondOffset == exp.length - 1) 607 throw new 608 MalformedPerl5PatternException("Invalid expression: " + expression); 609 610 612 backslash = false; 613 finalDelimiter = true; 614 replacement = new StringBuffer (exp.length - secondOffset); 615 for(index = secondOffset + 1; index < exp.length; index++) { 616 if(exp[index] == '\\') { 617 backslash = !backslash; 618 619 if(backslash && index + 1 < exp.length && exp[index + 1] == delimiter 624 && expression.lastIndexOf(delimiter, exp.length - 1) != (index + 1)) 625 { 626 finalDelimiter = false; 627 continue; 628 } 629 } else if(exp[index] == delimiter && finalDelimiter) { 630 thirdOffset = index; 631 break; 632 } else { 633 backslash = false; 634 finalDelimiter = true; 635 } 636 637 replacement.append(exp[index]); 638 } 639 640 if(thirdOffset == -1) 641 throw new 642 MalformedPerl5PatternException("Invalid expression: " + expression); 643 644 compileOptions = Perl5Compiler.DEFAULT_MASK; 645 numSubstitutions = 1; 646 647 if(delimiter != '\'') 649 numInterpolations = Perl5Substitution.INTERPOLATE_ALL; 650 else 651 numInterpolations = Perl5Substitution.INTERPOLATE_NONE; 652 653 for(index = thirdOffset + 1; index < exp.length; index++) { 655 switch(exp[index]) { 656 case 'i' : 657 compileOptions |= Perl5Compiler.CASE_INSENSITIVE_MASK; 658 break; 659 case 'm' : compileOptions |= Perl5Compiler.MULTILINE_MASK; break; 660 case 's' : compileOptions |= Perl5Compiler.SINGLELINE_MASK; break; 661 case 'x' : compileOptions |= Perl5Compiler.EXTENDED_MASK; break; 662 case 'g' : numSubstitutions = Util.SUBSTITUTE_ALL; break; 663 case 'o' : numInterpolations = 1; break; 664 default : 665 throw new 666 MalformedPerl5PatternException("Invalid option: " + exp[index]); 667 } 668 } 669 670 compiledPattern = 671 __patternCache.getPattern(new String (exp, firstOffset, 672 secondOffset - firstOffset), 673 compileOptions); 674 substitution = 675 new Perl5Substitution(replacement.toString(), numInterpolations); 676 entry = new ParsedSubstitutionEntry(compiledPattern, substitution, 677 numSubstitutions); 678 __expressionCache.addElement(expression, entry); 679 680 result = Util.substitute(__matcher, compiledPattern, substitution, 681 input, numSubstitutions); 682 683 __lastMatch = __matcher.getMatch(); 684 685 return result; 686 } 687 688 689 748 public synchronized Vector split(String pattern, String input, int limit) 749 throws MalformedPerl5PatternException 750 { 751 int beginOffset, groups, index; 752 String group; 753 Vector results = new Vector(20); 754 MatchResult currentResult = null; 755 PatternMatcherInput pinput; 756 Pattern compiledPattern; 757 758 compiledPattern = __parseMatchExpression(pattern); 759 760 pinput = new PatternMatcherInput(input); 761 beginOffset = 0; 762 763 while(--limit != 0 && __matcher.contains(pinput, compiledPattern)) { 764 currentResult = __matcher.getMatch(); 765 766 results.addElement(input.substring(beginOffset, 767 currentResult.beginOffset(0))); 768 if((groups = currentResult.groups()) > 1) { 769 for(index = 1; index < groups; ++index) { 770 group = currentResult.group(index); 771 if(group != null && group.length() > 0) 772 results.addElement(group); 773 } 774 } 775 776 beginOffset = currentResult.endOffset(0); 777 } 778 779 results.addElement(input.substring(beginOffset, input.length())); 780 781 __lastMatch = currentResult; 783 784 return results; 785 } 786 787 793 public synchronized Vector split(String pattern, String input) 794 throws MalformedPerl5PatternException 795 { 796 return split(pattern, input, SPLIT_ALL); 797 } 798 799 806 public synchronized Vector split(String input) 807 throws MalformedPerl5PatternException 808 { 809 return split("/\\s+/", input); 810 } 811 812 816 821 public synchronized int length() { 822 return __lastMatch.length(); 823 } 824 825 831 public synchronized int groups() { 832 return __lastMatch.groups(); 833 } 834 835 836 847 public synchronized String group(int group) { 848 return __lastMatch.group(group); 849 } 850 851 863 public synchronized int begin(int group) { 864 return __lastMatch.begin(group); 865 } 866 867 868 878 public synchronized int end(int group) { 879 return __lastMatch.end(group); 880 } 881 882 883 893 public synchronized int beginOffset(int group) { 894 return __lastMatch.beginOffset(group); 895 } 896 897 908 public synchronized int endOffset(int group) { 909 return __lastMatch.endOffset(group); 910 } 911 912 917 public synchronized String toString() { 918 return __lastMatch.toString(); 919 } 920 921 922 927 public synchronized String preMatch() { 928 int begin; 929 930 if(__originalInput == null) 931 return __nullString; 932 933 begin = __lastMatch.beginOffset(0); 934 935 if(begin <= 0) 936 return __nullString; 937 938 if(__originalInput instanceof char[]) { 939 char[] input; 940 941 input = (char[])__originalInput; 942 943 if(begin > input.length) 946 begin = input.length; 947 948 return new String (input, __inputBeginOffset, begin); 949 } else if(__originalInput instanceof String ) { 950 String input; 951 952 input = (String )__originalInput; 953 954 if(begin > input.length()) 957 begin = input.length(); 958 959 return input.substring(__inputBeginOffset, begin); 960 } 961 962 return __nullString; 963 } 964 965 966 971 public synchronized String postMatch() { 972 int end; 973 974 if(__originalInput == null) 975 return __nullString; 976 977 end = __lastMatch.endOffset(0); 978 979 if(end < 0) 980 return __nullString; 981 982 if(__originalInput instanceof char[]) { 983 char[] input; 984 985 input = (char[])__originalInput; 986 if(end >= input.length) 989 return __nullString; 990 991 return new String (input, end, __inputEndOffset - end); 992 } else if(__originalInput instanceof String ) { 993 String input; 994 995 input = (String )__originalInput; 996 997 if(end >= input.length()) 1000 return __nullString; 1001 1002 return input.substring(end, __inputEndOffset); 1003 } 1004 1005 return __nullString; 1006 } 1007 1008 1009 1018 public synchronized char[] preMatchCharArray() { 1019 int begin; 1020 char[] result = null; 1021 1022 if(__originalInput == null) 1023 return null; 1024 1025 begin = __lastMatch.beginOffset(0); 1026 1027 if(begin <= 0) 1028 return null; 1029 1030 if(__originalInput instanceof char[]) { 1031 char[] input; 1032 1033 input = (char[])__originalInput; 1034 1035 if(begin >= input.length) 1038 begin = input.length; 1039 1040 result = new char[begin - __inputBeginOffset]; 1041 System.arraycopy(input, __inputBeginOffset, result, 0, result.length); 1042 } else if(__originalInput instanceof String ) { 1043 String input; 1044 1045 input = (String )__originalInput; 1046 1047 if(begin >= input.length()) 1050 begin = input.length(); 1051 1052 result = new char[begin - __inputBeginOffset]; 1053 input.getChars(__inputBeginOffset, begin, result, 0); 1054 } 1055 1056 return result; 1057 } 1058 1059 1060 1069 public synchronized char[] postMatchCharArray() { 1070 int end; 1071 char[] result = null; 1072 1073 if(__originalInput == null) 1074 return null; 1075 1076 end = __lastMatch.endOffset(0); 1077 1078 if(end < 0) 1079 return null; 1080 1081 if(__originalInput instanceof char[]) { 1082 int length; 1083 char[] input; 1084 1085 input = (char[])__originalInput; 1086 if(end >= input.length) 1089 return null; 1090 1091 length = __inputEndOffset - end; 1092 result = new char[length]; 1093 System.arraycopy(input, end, result, 0, length); 1094 } else if(__originalInput instanceof String ) { 1095 String input; 1096 1097 input = (String )__originalInput; 1098 1099 if(end >= __inputEndOffset) 1102 return null; 1103 1104 result = new char[__inputEndOffset - end]; 1105 input.getChars(end, __inputEndOffset, result, 0); 1106 } 1107 1108 return result; 1109 } 1110 1111} 1112 1113 | Popular Tags |