1 8 13 package jfun.parsec; 14 15 import jfun.parsec.pattern.CharPredicate; 16 import jfun.parsec.pattern.CharPredicates; 17 import jfun.parsec.pattern.Pattern; 18 import jfun.parsec.pattern.Patterns; 19 20 21 31 public final class Scanners { 32 33 39 public static Parser<_> many(CharPredicate cp){return many("many", cp);} 40 46 public static Parser<_> many1(CharPredicate cp){return many1("many1", cp);} 47 52 public static Parser<_> many(Pattern pp){return many("many", pp);} 53 58 public static Parser<_> many1(Pattern pp){return many1("many1", pp);} 59 66 public static Parser<_> many(final String name, final CharPredicate cp){ 67 return isPattern(name, Patterns.many(cp), "many"); 68 } 69 76 public static Parser<_> many1(final String name, final CharPredicate cp){ 77 return isPattern(name, Patterns.many1(cp), "many1"); 78 } 79 85 public static Parser<_> many(final String name, final Pattern pp){ 86 return isPattern(name, pp.many(), "many"); 87 } 88 94 public static Parser<_> many1(final String name, final Pattern pp){ 95 return isPattern(name, pp.many1(), "many1"); 96 } 97 98 103 public static Parser<_> isString(final String str){ 104 return isString(str, str); 105 } 106 112 public static Parser<_> isString(final String str, final String err){ 113 return isString("=="+str, str, err); 114 } 115 122 public static Parser<_> isString(final String name, final String str, final String err){ 123 return isPattern(name, Patterns.isString(str), err); 124 } 125 129 public static Parser<_> isWhitespaces(){ 130 return isWhitespaces("isWhitespaces"); 131 } 132 137 public static Parser<_> isWhitespaces(final String name){ 138 return isWhitespaces(name, "whitespaces"); 139 } 140 146 public static Parser<_> isWhitespaces(final String name, final String err){ 147 return isPattern(name, Patterns.many1(CharPredicates.isWhitespace()), err); 148 } 149 155 public static Parser<_> isPattern(final Pattern pp, final String err){ 156 return isPattern("isPattern", pp, err); 157 } 158 165 public static Parser<_> isPattern(final String name, final Pattern pp, final String err){ 166 return new Parser<_>(name){ 167 boolean apply(final ParseContext ctxt){ 168 final int at = ctxt.getAt(); 169 final CharSequence src = ctxt.getSource(); 170 final int mlen = pp.match(src, src.length(), at); 171 if(mlen < 0){ 172 return setErrorExpecting(err, ctxt); 173 } 174 ctxt.next(mlen); 175 return nothing(ctxt); 176 } 177 }; 178 } 179 180 187 public static Parser<_> isStringCI(final String str, final String err){ 188 return isStringCI("is " + str, str, err); 189 } 190 197 public static Parser<_> isStringCI(final String name, final String str, final String err){ 198 return isPattern(name, Patterns.isStringCI(str), err); 199 } 200 205 public static Parser<_> isStringCI(final String str){ 206 return isStringCI(str, str); 207 } 208 213 public static Parser<_> anyChar(){ 214 return anyChar("any character"); 215 } 216 222 public static Parser<_> anyChar(final String err){ 223 return anyChar("anyChar", err); 224 } 225 232 public static Parser<_> anyChar(final String name, final String err){ 233 return new Parser<_>(name){ 234 boolean apply(final ParseContext ctxt){ 235 if(ctxt.isEof()){ 236 return setErrorExpecting(err, ctxt); 237 } 238 ctxt.next(); 239 return nothing(ctxt); 240 } 241 }; 242 } 243 244 249 public static Parser<_> isChar(final CharPredicate cp){ 250 return isChar("isChar", cp); 251 } 252 258 public static Parser<_> isChar(final String name, final CharPredicate cp){ 259 return isChar(name, cp, "" + cp); 260 } 261 267 public static Parser<_> isChar(final CharPredicate cp, final String err){ 268 return isChar("isChar", cp, err); 269 } 270 277 public static Parser<_> isChar(final String name, final CharPredicate cp, final String err){ 278 return new Parser<_>(name){ 279 boolean apply(final ParseContext ctxt){ 280 if(ctxt.isEof()){ 281 return setErrorExpecting(err, ctxt); 282 } 283 final char c = ctxt.peekChar(); 284 if(cp.isChar(c)){ 285 ctxt.next(); 286 return nothing(ctxt); 287 } 288 else{ 289 return setErrorExpecting(err, ctxt); 290 } 291 } 292 }; 293 } 294 295 301 public static Parser<_> isChar(final char ch, final String err){ 302 return isChar("=="+ch, ch, err); 303 } 304 311 public static Parser<_> isChar(final String name, final char ch, final String err){ 312 return isChar(name, CharPredicates.isChar(ch), err); 313 } 314 319 public static Parser<_> isChar(final char ch){ 320 return isChar("=="+ch, ch); 321 } 322 328 public static Parser<_> isChar(final String name, final char ch){ 329 return isChar(name, ch, CharEncoder.encode(ch)); 330 } 331 337 public static Parser<_> notChar(final char ch, final String err){ 338 return notChar("!="+ch, ch, err); 339 } 340 347 public static Parser<_> notChar(final String name, final char ch, final String err){ 348 return isChar(name, CharPredicates.notChar(ch), err); 349 } 350 355 public static Parser<_> notChar(final char ch){ 356 return notChar("!="+ch, ch); 357 } 358 364 public static Parser<_> notChar(final String name, final char ch){ 365 return notChar(name, ch, "^"+CharEncoder.encode(ch)); 366 } 367 368 private static char[] copy(final char[] chars){ 369 return (char[])chars.clone(); 370 } 371 private static StringBuffer toString(final StringBuffer buf, final char[] chars){ 372 buf.append('['); 373 if(chars.length >0){ 374 buf.append(CharEncoder.encode(chars[0])); 375 for(int i=1; i<chars.length; i++){ 376 buf.append(',').append(CharEncoder.encode(chars[i])); 377 } 378 } 379 buf.append(']'); 380 return buf; 381 } 382 private static Parser<_> _among(final String name, final char[] chars, final String err){ 383 return isChar(name, CharPredicates.among(chars), err); 384 } 385 private static Parser<_> _notAmong(final String name, final char[] chars, final String err){ 386 return isChar(name, CharPredicates.notAmong(chars), err); 387 } 388 396 public static Parser<_> among(final String name, final char[] chars, final String err){ 397 if(chars.length==0)return Parsers.zero(); 398 if(chars.length==1)return isChar(name, chars[0], err); 399 return _among(name, copy(chars), err); 400 } 401 408 public static Parser<_> among(final String name, final char[] chars){ 409 final StringBuffer buf = new StringBuffer (); 410 toString(buf, chars); 411 return among(name, chars, buf.toString()); 412 } 413 420 public static Parser<_> among(final char[] chars, final String err){ 421 return among("among", chars, err); 422 } 423 429 public static Parser<_> among(final char[] chars){ 430 return among("among", chars); 431 } 432 440 public static Parser<_> notAmong(final String name, final char[] chars, final String err){ 441 if(chars.length==0)return anyChar(); 442 if(chars.length==1)return notChar(name, chars[0], err); 443 return _notAmong(name, copy(chars), err); 444 } 445 452 public static Parser<_> notAmong(final String name, final char[] chars){ 453 final StringBuffer buf = new StringBuffer (); 454 buf.append("^"); 455 toString(buf, chars); 456 return notAmong(name, chars, buf.toString()); 457 } 458 465 public static Parser<_> notAmong(final char[] chars, final String err){ 466 return among("notAmong", chars, err); 467 } 468 474 public static Parser<_> notAmong(final char[] chars){ 475 return notAmong("notAmong", chars); 476 } 477 478 484 public static Parser<_> isLineComment(final String start){ 485 return isLineComment("isLineComment", start); 486 } 487 494 public static Parser<_> isLineComment(final String name, final String start){ 495 return isPattern(name, Patterns.isLineComment(start), start); 496 } 497 501 public static Parser<_> javaLineComment(){ 502 return isLineComment("//"); 503 } 504 508 public static Parser<_> sqlLineComment(){ 509 return isLineComment("--"); 510 } 511 515 public static Parser<_> javaBlockComment(){ 516 return Parsers.seq(isString("/*"), 518 519 p_javaBlockCommented(), isString("*/")); 520 } 522 526 public static Parser<_> haskellBlockComment(){ 527 return 528 Parsers.seq(isString("{-"), p_haskellBlockCommented(), isString("-}")); 530 } 531 535 public static Parser<_> haskellLineComment(){ 536 return isLineComment("--"); 537 } 538 544 public static Parser<_> isBlockComment(final String start, final String end){ 545 return isBlockComment("isBlockComment", start, end); 546 } 547 554 public static Parser<_> isBlockComment( 555 final String name, final String start, final String end){ 556 final Pattern opening = Patterns.isString(start).seq(Patterns.notString(end).many()); 557 return isPattern(opening, start).seq(name, isString(end)); 558 } 559 566 public static Parser<_> isBlockComment(final String open, final String close, 567 final Pattern commented){ 568 final Pattern opening = Patterns.isString(open) 569 .seq(Patterns.isString(close).not().seq(commented).many()); 570 return isPattern(opening, open).seq(isString(close)); 571 } 572 579 public static Parser<_> isBlockComment(final Parser<_> open, 580 final Parser<_> close, final Parser<?> commented){ 581 return isBlockComment("isBlockComment", open, close, commented); 582 } 583 591 public static Parser<_> isBlockComment(final String name, 592 final Parser<_> open, final Parser<_> close, final Parser<?> commented){ 593 return Parsers.seq(name, open, close.not().seq(commented).many(), close); 594 } 595 602 public static Parser<_> isNestableBlockComment(final String open, final String close){ 603 return isNestableBlockComment(open, close, anyChar()); 604 } 605 613 public static Parser<_> isNestableBlockComment(final String open, final String close, 614 final Parser<?> commented){ 615 return isNestableBlockComment(isString(open), isString(close), commented); 616 } 617 625 public static Parser<_> isNestableBlockComment( 626 final Parser<?> open, final Parser<?> close, 627 final Parser<?> commented){ 628 return isNestableBlockComment("isNestableBlockComment", 629 open, close, commented); 630 } 631 640 public static Parser<_> isNestableBlockComment( 641 final String name, final Parser<?> open, final Parser<?> close, 642 final Parser<?> commented){ 643 return new Parser<_>(name){ 644 boolean apply(final ParseContext ctxt){ 645 if(!open.parse(ctxt)) return false; 646 for(int level=1; level>0;){ 647 final int at = ctxt.getAt(); 648 if(close.parse(ctxt)){ 649 if(at == ctxt.getAt()) 650 throw new IllegalParserStateException("closing comment scanner not consuming input."); 651 level--; 652 continue; 653 } 654 if(at != ctxt.getAt()) return false; 655 if(open.parse(ctxt)){ 656 if(at == ctxt.getAt()) 657 throw new IllegalParserStateException("opening comment scanner not consuming input."); 658 level++; 659 continue; 660 } 661 if(at != ctxt.getAt()) return false; 662 if(commented.parse(ctxt)){ 663 if(at == ctxt.getAt()) 664 throw new IllegalParserStateException("commented scanner not consuming input."); 665 continue; 666 } 667 return false; 668 } 669 return nothing(ctxt); 670 } 671 }; 672 } 673 679 public static Parser<_> isSqlString(){ 680 return isSqlString("isSqlString"); 681 } 682 689 public static Parser<_> isSqlString(final String name){ 690 697 final Parser<_> q = isChar('\''); 698 final Parser<_> qs = isPattern( 699 Patterns.regex("(('')|[^'])*"), "quoted string"); 700 return Parsers.between(name, q, q, qs); 701 } 702 708 public static Parser<_> isQuotedString(final String name){ 709 714 final Parser<_> q = Scanners.isChar('"'); 715 final Parser<_> qc = isPattern( 716 quoted_str ,"quoted string"); 717 return Parsers.between(name, q, q, qc); 718 } 719 private static final Pattern quoted_str = 720 Patterns.regex("((\\\\.)|[^\"\\\\])*"); 721 726 public static Parser<_> isQuotedString(){ 727 return isQuotedString("isQuotedString"); 728 } 729 736 private static final Pattern quoted_char = 737 Patterns.regex("(\\\\.)|[^'\\\\]"); 738 743 public static Parser<_> isQuotedChar(final String name){ 744 753 final Parser<_> q = Scanners.isChar('\''); 754 final Parser<_> qc = isPattern( 755 quoted_char ,"quoted char"); 756 return Parsers.between(name, q, q, qc); 757 } 758 762 public static Parser<_> isQuotedChar(){ 763 return isQuotedChar("isQuotedChar"); 764 } 765 771 public static Parser<_> quoted(final char c1, final char c2){ 772 return quoted("quoted", c1, c2); 773 } 774 781 public static Parser<_> quoted(final String name, final char c1, final char c2){ 782 return isPattern(name, Patterns.isChar(c1) 783 .seq(Patterns.many(CharPredicates.notChar(c2))), 784 ""+c1).seq(isChar(c2)); 785 } 786 792 public static Parser<_> quoted(final Parser<_> open, final Parser<_> close, final Parser<?> s){ 793 return quoted("quoted", open, close, s); 794 } 795 802 public static Parser<_> quoted( 803 final String name, final Parser<_> open, final Parser<_> close, final Parser<?> s){ 804 return Parsers.seq(name, open, s.many(), close); 805 } 806 811 public static Parser<_> javaDelimiter(){ 812 return javaDelimiter("javaDelimiter"); 813 } 814 820 public static Parser<_> javaDelimiter(final String name){ 821 return Parsers.sum( 822 isWhitespaces(), javaLineComment(), javaBlockComment()).many(name); 823 } 824 829 public static Parser<_> haskellDelimiter(){ 830 return haskellDelimiter("haskellDelimiter"); 831 } 832 838 public static Parser<_> haskellDelimiter(final String name){ 839 return Parsers.sum( 840 isWhitespaces(), haskellBlockComment(), haskellLineComment()).many(name); 841 } 842 847 public static Parser<_> sqlDelimiter(){ 848 return sqlDelimiter("sqlDelimiter"); 849 } 850 856 public static Parser<_> sqlDelimiter(final String name){ 857 return Parsers.sum(isWhitespaces(), sqlLineComment(), javaBlockComment()) 858 .many(name); 859 } 860 868 public static Parser<_> stdDelimiter(final String name, 869 final String lcomment, final String openc, final String closec){ 870 return Parsers.sum(isWhitespaces(), isLineComment(lcomment), isBlockComment(openc, closec)) 871 .many(name); 872 } 873 880 public static Parser<_> stdDelimiter(final String lcomment, final String openc, final String closec){ 881 return stdDelimiter("stdDelimiter", lcomment, openc, closec); 882 } 883 884 891 public static <R> Parser<R> delimited(final Parser<R> s, final String err){ 892 return delimited("delimited", s, err); 893 } 894 902 public static <R> Parser<R> delimited(final String name, final Parser<R> s, final String err){ 903 return s.followedBy(name, isChar(CharPredicates.isAlphaNumeric(), err).not()); 904 } 905 911 public static <R> Parser<R> delimited(final Parser<R> s){ 912 return delimited("delimited", s); 913 } 914 921 public static <R> Parser<R> delimited(final String name, final Parser<R> s){ 922 return delimited(name, s, "delimiter"); 923 } 924 933 public static Parser<_> scanChars(final Parser<?> p, final Parser<_> scanner, 934 final String module){ 935 return scanChars("scanChars", p, scanner, module); 936 } 937 947 public static Parser<_> scanChars(final String name, 948 final Parser<?> p, final Parser<_> scanner, final String module){ 949 return new Parser<_>(name){ 950 boolean apply(final ParseContext ctxt){ 951 final int from = ctxt.getAt(); 952 if(!p.parse(ctxt)) return false; 953 final ScannerState s0 = 954 new ScannerState(ctxt.getSource(), from, 955 module, ctxt.getPositionMap(), ctxt.getAt()-from, 956 ctxt.getReturn(), ctxt.getUserState()); 957 return ParserInternals.cont(ctxt, s0, scanner); 958 } 959 }; 960 } 961 967 private static Pattern notChar2(final char c1, final char c2){ 968 return new Pattern(){ 969 public int match(final CharSequence src, final int len, final int from){ 970 if(from >= len-1) return Pattern.MISMATCH; 971 if(src.charAt(from) == c1 && src.charAt(from+1) == c2) return Pattern.MISMATCH; 972 return 1; 973 } 974 }; 975 } 976 977 private static Parser<_> p_javaBlockCommented(){ 978 return isPattern(notChar2('*', '/').many(), "commented block"); 979 } 980 986 private static Parser<_> p_haskellBlockCommented(){ 987 return isPattern(notChar2('-', '}').many(), "commented block"); 988 } 989 private static boolean setErrorExpecting(final String msg, final ParseContext ctxt){ 990 return ParserInternals.setErrorExpecting(msg, ctxt); 991 } 992 private static boolean nothing(ParseContext ctxt){ 993 ctxt.setReturn(null); 994 return true; 995 } 996 1005 @Deprecated 1006 public static Parser<Tok[]> lexeme(final Parser<?> delim, final Parser<Tok> s){ 1007 return Lexers.lexeme(delim, s); 1008 } 1009 1019 @Deprecated 1020 public static Parser<Tok[]> lexeme(final String name, 1021 final Parser<?> delim, final Parser<Tok> s){ 1022 return Lexers.lexeme(name, delim, s); 1023 } 1024 1032 @Deprecated 1033 public static Parser<Tok> lexer(final Parser<?> s, final Tokenizer tn){ 1034 return Lexers.lexer(s,tn); 1035 } 1036 1045 @Deprecated 1046 public static Parser<Tok> lexer(final Parser<?> s, final Tokenizer tn, final String err){ 1047 return Lexers.lexer(s, tn, err); 1048 } 1049 1058 @Deprecated 1059 public static Parser<Tok> lexer(final String name, final Parser<?> s, final Tokenizer tn){ 1060 return Lexers.lexer(name, s, tn); 1061 } 1062 1072 @Deprecated 1073 public static Parser<Tok> lexer(final String name, final Parser<?> s, final Tokenizer tn, 1074 final String err){ 1075 return Lexers.lexer(name, s, tn, err); 1076 } 1077 1078} 1079 | Popular Tags |