1 8 13 package jfun.parsec.pattern; 14 15 import java.util.regex.Matcher ; 16 17 18 24 public final class Patterns { 25 26 27 30 public static Pattern chars_ge(int l){ 31 return hasAtLeast(l); 32 } 33 36 public static Pattern chars_eq(int l){ 37 return hasExact(l); 38 } 39 45 public static Pattern hasAtLeast(final int l){ 46 return new Pattern(){ 47 public int match(final CharSequence src, final int len, 48 final int from){ 49 if(from+l > len) return Pattern.MISMATCH; 50 else return l; 51 } 52 }; 53 } 54 60 public static Pattern hasExact(final int l){ 61 return new Pattern(){ 62 public int match(final CharSequence src, final int len, 63 final int from){ 64 if(from+l != len) return Pattern.MISMATCH; 65 else return l; 66 } 67 }; 68 } 69 74 public static Pattern eof(){ 75 return hasExact(0); 76 } 77 84 public static Pattern isChar(final char c){ 85 return new Pattern(){ 86 public int match(final CharSequence src, final int len, 87 final int from){ 88 if(from>=len) return Pattern.MISMATCH; 89 else if(src.charAt(from) != c) return Pattern.MISMATCH; 90 else return 1; 91 } 92 }; 93 } 94 101 public static Pattern range(final char c1, final char c2){ 102 return new Pattern(){ 103 public int match(final CharSequence src, final int len, 104 final int from){ 105 if(from>=len) return Pattern.MISMATCH; 106 final char c = src.charAt(from); 107 if(c >= c1 && c <= c2) return 1; 108 else return Pattern.MISMATCH; 109 } 110 }; 111 } 112 119 public static Pattern notRange(final char c1, final char c2){ 120 return new Pattern(){ 121 public int match(final CharSequence src, final int len, 122 final int from){ 123 if(from>=len) return Pattern.MISMATCH; 124 final char c = src.charAt(from); 125 if(c >= c1 && c <= c2) return Pattern.MISMATCH; 126 else return 1; 127 } 128 }; 129 } 130 136 public static Pattern among(final char[] cs){ 137 return isChar(CharPredicates.among(cs)); 138 } 139 145 public static Pattern notAmong(final char[] cs){ 146 return isChar(CharPredicates.notAmong(cs)); 147 } 148 155 public static Pattern notChar(final char c){ 156 return new Pattern(){ 157 public int match(final CharSequence src, final int len, 158 final int from){ 159 if(from>=len) return Pattern.MISMATCH; 160 else if(src.charAt(from) == c) return Pattern.MISMATCH; 161 else return 1; 162 } 163 }; 164 } 165 172 public static Pattern isChar(final CharPredicate cp){ 173 return new Pattern(){ 174 public int match(final CharSequence src, final int len, 175 final int from){ 176 if(from>=len) return Pattern.MISMATCH; 177 else if(cp.isChar(src.charAt(from))) return 1; 178 else return Pattern.MISMATCH; 179 } 180 public String toString(){ 181 return ""+cp; 182 } 183 }; 184 } 185 191 public static Pattern isEscaped(){ 192 return new Pattern(){ 193 public int match(final CharSequence src, final int len, 194 final int from){ 195 if(from>=len-1) return Pattern.MISMATCH; 196 else if(src.charAt(from)=='\\') return 2; 197 else return Pattern.MISMATCH; 198 } 199 }; 200 } 201 202 208 public static Pattern isLineComment(final String open){ 209 return seq(isString(open), many(CharPredicates.notChar('\n'))); 210 } 211 215 public static Pattern isString(final String str){ 216 return new Pattern(){ 217 public int match(final CharSequence src, final int len, 218 final int from){ 219 if(len - from < str.length()) return MISMATCH; 220 return matchString(str, src, len, from); 221 } 222 public String toString(){ 223 return str; 224 } 225 }; 226 } 227 231 public static Pattern isStringCI(final String str){ 232 return new Pattern(){ 233 public int match(final CharSequence src, final int len, 234 final int from){ 235 if(len - from < str.length()) return MISMATCH; 236 return matchStringCI(str, src, len, from); 237 } 238 }; 239 } 240 245 public static Pattern notString(final String str){ 246 return new Pattern(){ 247 public int match(final CharSequence src, final int len, 248 final int from){ 249 if(from >= len) return MISMATCH; 250 if(matchString(str, src, len, from) == Pattern.MISMATCH) 251 return 1; 252 else return MISMATCH; 253 } 254 }; 255 } 256 261 public static Pattern notStringCI(final String str){ 262 return new Pattern(){ 263 public int match(final CharSequence src, final int len, 264 final int from){ 265 if(from >= len) return MISMATCH; 266 if(matchStringCI(str, src, len, from) == Pattern.MISMATCH) 267 return 1; 268 else return MISMATCH; 269 } 270 }; 271 } 272 273 private static boolean compareIgnoreCase(final char a, final char b){ 274 return Character.toLowerCase(a) == Character.toLowerCase(b); 275 } 276 277 private static int matchString(String str, 278 final CharSequence src, final int len, 279 final int from){ 280 final int slen = str.length(); 281 if(len - from < slen) return Pattern.MISMATCH; 282 for(int i=0; i<slen; i++){ 283 final char exp = str.charAt(i); 284 final char enc = src.charAt(from+i); 285 if(exp != enc){ 286 return Pattern.MISMATCH; 287 } 288 } 289 return slen; 290 } 291 292 private static int matchStringCI(String str, 293 final CharSequence src, final int len, 294 final int from){ 295 final int slen = str.length(); 296 if(len - from < slen) return Pattern.MISMATCH; 297 for(int i=0; i<slen; i++){ 298 final char exp = str.charAt(i); 299 final char enc = src.charAt(from+i); 300 if(!compareIgnoreCase(exp, enc)){ 301 return Pattern.MISMATCH; 302 } 303 } 304 return slen; 305 } 306 307 313 public static Pattern not(final Pattern pp){ 314 return new Pattern(){ 315 public int match(final CharSequence src, final int len, 316 final int from){ 317 if(pp.match(src,len,from)!=Pattern.MISMATCH) return Pattern.MISMATCH; 318 else return 0; 319 } 320 }; 321 } 322 328 public static Pattern peek(final Pattern pp){ 329 return new Pattern(){ 330 public int match(final CharSequence src, final int len, 331 final int from){ 332 if(pp.match(src,len,from)==Pattern.MISMATCH) return Pattern.MISMATCH; 333 else return 0; 334 } 335 }; 336 } 337 343 public static Pattern or(final Pattern pp1, final Pattern pp2){ 344 return new Pattern(){ 345 public int match(final CharSequence src, final int len, 346 final int from){ 347 final int l1 = pp1.match(src,len,from); 348 if(l1!=Pattern.MISMATCH) return l1; 349 else return pp2.match(src, len, from); 350 } 351 }; 352 } 353 354 361 public static Pattern and(final Pattern...pps){ 362 if(pps.length==0) return always(); 363 if(pps.length==1) return pps[0]; 364 return _and(pps); 365 } 366 376 public static Pattern seq(final Pattern pp1, final Pattern pp2){ 377 return new Pattern(){ 378 public int match(final CharSequence src, final int len, 379 final int from){ 380 final int l1 = pp1.match(src,len,from); 381 if(l1==Pattern.MISMATCH) return l1; 382 final int l2 = pp2.match(src, len, from+l1); 383 if(l2==Pattern.MISMATCH) return l2; 384 return l1+l2; 385 } 386 }; 387 } 388 394 public static Pattern or(final Pattern... pps){ 395 if(pps.length==0) return never(); 396 else if(pps.length==1) return pps[0]; 397 return _or(pps); 398 } 399 400 406 public static Pattern seq(final Pattern... pps){ 407 if(pps.length==0) return always(); 408 else if(pps.length==1) return pps[0]; 409 return _seq(pps); 410 } 411 412 419 public static Pattern repeat(final int n, final CharPredicate cp){ 420 if(n==0) return always(); 421 if(n==1) return isChar(cp); 422 return new Pattern(){ 423 public int match(final CharSequence src, final int len, 424 final int from){ 425 return match_repeat(n, cp, src, len, from, 0); 426 } 427 }; 428 } 429 435 public static Pattern repeat(final int n, final Pattern pp){ 436 if(n==0) return always(); 437 if(n==1) return pp; 438 return new Pattern(){ 439 public int match(final CharSequence src, final int len, 440 final int from){ 441 return match_repeat(n, pp, src, len, from, 0); 442 } 443 }; 444 } 445 private static int min(int a, int b){ 446 return a>b?b:a; 447 } 448 456 public static Pattern many(final int min, final CharPredicate cp){ 457 if(min<0) throw new IllegalArgumentException ("min<0"); 458 return new Pattern(){ 459 public int match(final CharSequence src, final int len, 460 final int from){ 461 final int minlen = match_repeat(min, cp, src, len, from, 0); 462 if(minlen==MISMATCH) return MISMATCH; 463 return match_many(cp, src, len, from+minlen, minlen); 464 } 465 }; 466 } 467 472 public static Pattern many(final CharPredicate cp){ 473 return new Pattern(){ 474 public int match(final CharSequence src, final int len, 475 final int from){ 476 return match_many(cp, src, len, from, 0); 477 } 478 }; 479 } 480 488 public static Pattern many(final int min, final Pattern pp){ 489 if(min<0) throw new IllegalArgumentException ("min<0"); 490 return new Pattern(){ 491 public int match(final CharSequence src, final int len, 492 final int from){ 493 final int minlen = match_repeat(min, pp, src, len, from, 0); 494 if(MISMATCH==minlen) return MISMATCH; 495 return match_many(pp, src, len, from+minlen, minlen); 496 } 497 }; 498 } 499 505 public static Pattern many(final Pattern pp){ 506 return new Pattern(){ 507 public int match(final CharSequence src, final int len, 508 final int from){ 509 return match_many(pp, src, len, from, 0); 510 } 511 }; 512 } 513 522 public static Pattern some(final int min, final int max, final CharPredicate cp){ 523 if(max<0 || min <0 || min > max) throw new IllegalArgumentException (); 524 if(max == 0) return always(); 525 return new Pattern(){ 526 public int match(final CharSequence src, final int len, 527 final int from){ 528 final int minlen = match_repeat(min, cp, src, len, from, 0); 529 if(minlen==MISMATCH) return MISMATCH; 530 return match_some(max-min, cp, src, len, from+minlen, minlen); 531 } 532 }; 533 } 534 541 public static Pattern some(final int max, final CharPredicate cp){ 542 if(max < 0) throw new IllegalArgumentException ("max<0"); 543 if(max == 0) return always(); 544 return new Pattern(){ 545 public int match(final CharSequence src, final int len, 546 final int from){ 547 return match_some(max, cp, src, len, from, 0); 548 } 549 }; 550 } 551 560 public static Pattern some(final int min, final int max, final Pattern pp){ 561 if(min<0 || max<0 || min>max) throw new IllegalArgumentException (); 562 if(max == 0) return always(); 563 return new Pattern(){ 564 public int match(final CharSequence src, final int len, 565 final int from){ 566 final int minlen = match_repeat(min, pp, src, len, from, 0); 567 if(MISMATCH==minlen) return MISMATCH; 568 return match_some(max-min, pp, src, len, from+minlen, minlen); 569 } 570 }; 571 } 572 579 public static Pattern some(final int max, final Pattern pp){ 580 if(max<0) throw new IllegalArgumentException ("max<0"); 581 if(max == 0) return always(); 582 return new Pattern(){ 583 public int match(final CharSequence src, final int len, 584 final int from){ 585 return match_some(max, pp, src, len, from, 0); 586 } 587 }; 588 } 589 596 public static Pattern longer(final Pattern p1, final Pattern p2){ 597 return longest(p1, p2); 598 } 599 605 public static Pattern longest(final Pattern... pps){ 606 if(pps.length==0) return never(); 607 if(pps.length==1) return pps[0]; 608 return new Pattern(){ 609 public int match(final CharSequence src, final int len, 610 final int from){ 611 int r = MISMATCH; 612 for(int i=0;i<pps.length;i++){ 613 final int l = pps[i].match(src,len,from); 614 if(l > r) r = l; 615 } 616 return r; 617 } 618 }; 619 } 620 627 public static Pattern shorter(final Pattern p1, final Pattern p2){ 628 return shortest(p1, p2); 629 } 630 636 public static Pattern shortest(final Pattern... pps){ 637 if(pps.length==0) return never(); 638 if(pps.length==1) return pps[0]; 639 return new Pattern(){ 640 public int match(final CharSequence src, final int len, 641 final int from){ 642 int r = MISMATCH; 643 for(int i=0;i<pps.length;i++){ 644 final int l = pps[i].match(src,len,from); 645 if(l != MISMATCH){ 646 if(r==MISMATCH || l < r) 647 r = l; 648 } 649 } 650 return r; 651 } 652 }; 653 } 654 663 public static Pattern ifelse(final Pattern cond, final Pattern yes, final Pattern no){ 664 return new Pattern(){ 665 public int match(final CharSequence src, final int len, 666 final int from){ 667 final int lc = cond.match(src, len, from); 668 if(lc==MISMATCH){ 669 return no.match(src, len, from); 670 } 671 else{ 672 final int ly = yes.match(src, len, from+lc); 673 if(ly==MISMATCH) return MISMATCH; 674 else return lc+ly; 675 } 676 } 677 }; 678 } 679 685 public static Pattern many1(final CharPredicate cp){ 686 return many(1, cp); 687 } 688 692 public static Pattern optional(final Pattern pp){ 693 return new Pattern(){ 694 public int match(final CharSequence src, final int len, 695 final int from){ 696 final int l= pp.match(src, len, from); 697 return (l==Pattern.MISMATCH)?0:l; 698 } 699 }; 700 } 701 702 706 public static Pattern never(){ 707 return _never; 708 } 709 713 public static Pattern always(){ 714 return _always; 715 } 716 717 718 719 725 public static Pattern isDecimalL(){ 726 final CharPredicate cp = CharPredicates.isDigit(); 727 return seq(many1(cp), 728 optional( 729 seq(isChar('.'), many(cp)) 730 ) 731 ); 732 } 733 737 public static Pattern isDecimalR(){ 738 return seq(isChar('.'), many1(CharPredicates.isDigit())); 739 } 740 744 public static Pattern isDecimal(){ 745 return or(isDecimalL(), isDecimalR()); 746 } 747 752 public static Pattern isWord(){ 753 757 return regex("[a-zA-Z_][0-9a-zA-Z_]*"); 758 } 759 763 public static Pattern isInteger(){ 764 return many1(CharPredicates.isDigit()); 765 } 766 770 public static Pattern isOctInteger(){ 771 return seq(isChar('0'), many(CharPredicates.range('0','7'))); 772 } 773 778 public static Pattern isDecInteger(){ 779 return seq(range('1', '9'), many(CharPredicates.isDigit())); 780 } 781 786 public static Pattern isHexInteger(){ 787 return seq(or(isString("0x"), isString("0X")), 788 many1(CharPredicates.isHexDigit())); 789 } 790 795 public static Pattern isExponential(){ 796 return seq( 797 among(new char[]{'e','E'}), 798 optional(isChar('-')), 799 isInteger() 800 ); 801 } 802 807 public static Pattern regex(final java.util.regex.Pattern p){ 808 return new Pattern(){ 809 public int match(final CharSequence src, final int len, 810 final int from){ 811 if(from > len) return Pattern.MISMATCH; 812 final Matcher matcher = p.matcher(src.subSequence(from, len)); 813 if(matcher.lookingAt()){ 814 return matcher.end(); 815 } 816 else return Pattern.MISMATCH; 817 } 818 }; 819 } 820 825 public static Pattern regex(String s){ 826 return regex(java.util.regex.Pattern.compile(s)); 827 } 828 829 834 public static Pattern regex_pattern(){ 835 return regex_pattern; 836 } 837 841 public static Pattern regex_modifiers(){ 842 return regex_modifiers; 843 } 844 private static final Pattern _never = new Pattern(){ 845 public int match(final CharSequence src, final int len, 846 final int from){ 847 return Pattern.MISMATCH; 848 } 849 }; 850 private static final Pattern _always = new Pattern(){ 851 public int match(final CharSequence src, final int len, 852 final int from){ 853 return 0; 854 } 855 }; 856 857 private static int match_repeat(final int n, final CharPredicate cp, 858 final CharSequence src, final int len, final int from, final int acc){ 859 final int tail = from + n; 860 if(tail > len) return Pattern.MISMATCH; 861 for(int i=from;i<tail;i++){ 862 if(!cp.isChar(src.charAt(i))) return Pattern.MISMATCH; 863 } 864 return n+acc; 865 } 866 private static int match_repeat(final int n, final Pattern pp, 867 final CharSequence src, final int len, final int from, final int acc){ 868 int end = from; 869 for(int i=0;i<n;i++){ 870 final int l = pp.match(src,len,end); 871 if(l==Pattern.MISMATCH) return Pattern.MISMATCH; 872 end += l; 873 } 874 return end-from+acc; 875 } 876 private static int match_some(final int max, final CharPredicate cp, 877 final CharSequence src, final int len, final int from, final int acc){ 878 final int k = min(max+from, len); 879 for(int i=from;i<k;i++){ 880 if(!cp.isChar(src.charAt(i))) return i-from+acc; 881 } 882 return k-from+acc; 883 } 884 private static int match_some(final int max, final Pattern pp, 885 final CharSequence src, final int len, final int from, final int acc){ 886 int begin = from; 887 for(int i=0;i<max;i++){ 888 final int l = pp.match(src, len, begin); 889 if(Pattern.MISMATCH==l) return begin-from+acc; 890 begin+=l; 891 } 892 return begin-from+acc; 893 } 894 private static int match_many(final CharPredicate cp, 895 final CharSequence src, final int len, final int from, final int acc){ 896 for(int i=from;i<len;i++){ 897 if(!cp.isChar(src.charAt(i))) return i-from+acc; 898 } 899 return len-from+acc; 900 } 901 private static int match_many(final Pattern pp, 902 final CharSequence src, final int len, final int from, final int acc){ 903 for(int i=from;;){ 904 final int l = pp.match(src,len,i); 905 if(Pattern.MISMATCH==l) return i-from+acc; 906 if(l==0) return i-from+acc; i += l; 909 } 910 } 911 private static Pattern _or(final Pattern[] pps){ 912 return new Pattern(){ 913 public int match(final CharSequence src, final int len, 914 final int from){ 915 for(int i=0;i<pps.length;i++){ 916 final int l = pps[i].match(src,len,from); 917 if(l!=Pattern.MISMATCH) return l; 918 } 919 return Pattern.MISMATCH; 920 } 921 }; 922 } 923 private static Pattern _seq(final Pattern[] pps){ 924 return new Pattern(){ 925 public int match(final CharSequence src, final int len, 926 final int from){ 927 int end = from; 928 for(int i=0;i<pps.length;i++){ 929 final int l = pps[i].match(src,len,end); 930 if(l==Pattern.MISMATCH) return l; 931 end += l; 932 } 933 return end-from; 934 } 935 }; 936 } 937 private static Pattern _and(final Pattern[] pps){ 938 return new Pattern(){ 939 public int match(final CharSequence src, final int len, 940 final int from){ 941 int ret = 0; 942 for(int i=0;i<pps.length;i++){ 943 final int l = pps[i].match(src,len,from); 944 if(l==MISMATCH) return MISMATCH; 945 if(l>ret) ret=l; 946 } 947 return ret; 948 } 949 }; 950 } 951 952 private static final jfun.parsec.pattern.Pattern getRegularExpressionPattern(){ 953 final jfun.parsec.pattern.Pattern quote = jfun.parsec.pattern.Patterns.isChar('/'); 954 final jfun.parsec.pattern.Pattern escape = jfun.parsec.pattern.Patterns.isChar('\\') 955 .seq(jfun.parsec.pattern.Patterns.hasAtLeast(1)); 956 final char[] not_allowed = {'/', '\n', '\r', '\\'}; 957 final jfun.parsec.pattern.Pattern content = jfun.parsec.pattern.Patterns.or( 958 escape, jfun.parsec.pattern.Patterns.notAmong(not_allowed) 959 ); 960 return quote.seq(content.many()).seq(quote); 961 } 962 private static final jfun.parsec.pattern.Pattern getModifiersPattern(){ 963 return jfun.parsec.pattern.Patterns.isChar(CharPredicates.isAlpha()).many(); 964 } 965 private static final Pattern regex_pattern = getRegularExpressionPattern(); 966 private static final Pattern regex_modifiers = getModifiersPattern(); 967 } 968 | Popular Tags |