1 31 32 package org.antlr.works.syntax; 33 34 import org.antlr.works.ate.syntax.generic.ATESyntaxLexer; 35 import org.antlr.works.ate.syntax.generic.ATESyntaxParser; 36 import org.antlr.works.ate.syntax.misc.ATEScope; 37 import org.antlr.works.ate.syntax.misc.ATEToken; 38 import org.antlr.works.syntax.element.*; 39 40 import java.util.*; 41 42 47 public class GrammarSyntaxParser extends ATESyntaxParser { 48 49 private static final ElementRewriteBlock REWRITE_BLOCK = new ElementRewriteBlock(); 50 private static final ElementArgumentBlock ARGUMENT_BLOCK = new ElementArgumentBlock(); 51 private static final ElementRewriteFunction REWRITE_FUNCTION = new ElementRewriteFunction(); 52 53 public static final String BEGIN_GROUP = "// $<"; 54 public static final String END_GROUP = "// $>"; 55 56 public static final String TOKENS_BLOCK_NAME = "tokens"; 57 public static final String OPTIONS_BLOCK_NAME = "options"; 58 public static final String PARSER_HEADER_BLOCK_NAME = "@header"; 59 public static final String LEXER_HEADER_BLOCK_NAME = "@lexer::header"; 60 public static final String PARSER_MEMBERS_BLOCK_NAME = "@members"; 61 public static final String LEXER_MEMBERS_BLOCK_NAME = "@lexer::members"; 62 63 public static final List<String > blockIdentifiers; 64 public static final List<String > ruleModifiers; 65 public static final List<String > keywords; 66 public static final List<String > predefinedReferences; 67 68 public List<ElementRule> rules = new ArrayList<ElementRule>(); 69 public List<ElementGroup> groups = new ArrayList<ElementGroup>(); 70 public List<ElementBlock> blocks = new ArrayList<ElementBlock>(); public List<ElementAction> actions = new ArrayList<ElementAction>(); public List<ElementReference> references = new ArrayList<ElementReference>(); 73 public List<ATEToken> decls = new ArrayList<ATEToken>(); 74 75 public ElementGrammarName name; 76 77 private LabelTable labels = new LabelTable(); 78 private List<ATEToken> unresolvedReferences = new ArrayList<ATEToken>(); 79 private Set<String > declaredReferenceNames = new HashSet<String >(); 80 private Map<ATEToken,ElementRule> refsToRules = new HashMap<ATEToken,ElementRule>(); 81 82 private ElementRule currentRule; 83 84 static { 85 blockIdentifiers = new ArrayList<String >(); 86 blockIdentifiers.add(OPTIONS_BLOCK_NAME); 87 blockIdentifiers.add(TOKENS_BLOCK_NAME); 88 blockIdentifiers.add(PARSER_HEADER_BLOCK_NAME); 89 blockIdentifiers.add(LEXER_HEADER_BLOCK_NAME); 90 blockIdentifiers.add(PARSER_MEMBERS_BLOCK_NAME); 91 blockIdentifiers.add(LEXER_MEMBERS_BLOCK_NAME); 92 93 ruleModifiers = new ArrayList<String >(); 94 ruleModifiers.add("protected"); 95 ruleModifiers.add("public"); 96 ruleModifiers.add("private"); 97 ruleModifiers.add("fragment"); 98 99 keywords = new ArrayList<String >(); 100 keywords.addAll(blockIdentifiers); 101 keywords.addAll(ruleModifiers); 102 keywords.add("returns"); 103 keywords.add("init"); 104 105 predefinedReferences = new ArrayList<String >(); 106 predefinedReferences.add("EOF"); 107 } 108 109 public GrammarSyntaxParser() { 110 } 111 112 @Override 113 public void parseTokens() { 114 rules.clear(); 115 groups.clear(); 116 blocks.clear(); 117 actions.clear(); 118 references.clear(); 119 decls.clear(); 120 currentRule = null; 121 declaredReferenceNames.clear(); 122 unresolvedReferences.clear(); 123 refsToRules.clear(); 124 125 if(!nextToken()) return; 126 127 while(true) { 128 129 if(matchName()) continue; 130 if(matchScope()) continue; if(matchBlock()) continue; 132 if(matchRule()) continue; 133 134 if(matchRuleGroup()) continue; 136 if(matchSingleComment(0)) continue; 137 if(matchComplexComment(0)) continue; 138 139 if(!nextToken()) break; 141 } 142 143 resolveReferences(); 144 } 145 146 153 public void resolveReferencesWithExternalNames(Set<String > externalNames) { 154 for(int i=unresolvedReferences.size()-1; i >= 0; i--) { 155 ATEToken ref = unresolvedReferences.get(i); 156 if(externalNames.contains(ref.getAttribute())) { 157 ref.type = GrammarSyntaxLexer.TOKEN_REFERENCE; 158 references.add(new ElementReference(refsToRules.get(ref), ref)); 159 unresolvedReferences.remove(i); 160 } 161 } 162 } 163 164 167 private void resolveReferences() { 168 for(int i=unresolvedReferences.size()-1; i >= 0; i--) { 169 ATEToken ref = unresolvedReferences.get(i); 170 if(declaredReferenceNames.contains(ref.getAttribute())) { 171 ref.type = GrammarSyntaxLexer.TOKEN_REFERENCE; 172 references.add(new ElementReference(refsToRules.get(ref), ref)); 173 unresolvedReferences.remove(i); 174 } 175 } 176 } 177 178 184 private boolean matchName() { 185 if(!isID(0)) return false; 186 187 mark(); 188 if(tryMatchName()) { 189 return true; 190 } else { 191 rewind(); 192 return false; 193 } 194 } 195 196 private boolean tryMatchName() { 197 ATEToken start = T(0); 198 199 if(ElementGrammarName.isKnownType(start.getAttribute())) { 201 if(!nextToken()) return false; 202 } 203 204 if(!matchID(0, "grammar")) return false; 205 206 ATEToken name = T(0); 208 if(!nextToken()) return false; 209 210 if(!matchSEMI(0)) return false; 212 213 this.name = new ElementGrammarName(name, start, T(-1), start); 214 return true; 215 } 216 217 226 private boolean matchScope() { 227 if(!isID(0, "scope")) return false; 228 229 mark(); 230 231 ATEToken start = T(0); 233 if(!matchID(0, "scope")) return false; 234 235 matchID(0); 237 238 if(isOpenBLOCK(0)) { 240 ATEToken beginBlock = T(0); 241 if(matchBalancedToken(ATESyntaxLexer.TOKEN_LCURLY, ATESyntaxLexer.TOKEN_RCURLY, null, true)) { 242 beginBlock.type = GrammarSyntaxLexer.TOKEN_BLOCK_LIMIT; 243 T(-1).type = GrammarSyntaxLexer.TOKEN_BLOCK_LIMIT; 244 start.type = GrammarSyntaxLexer.TOKEN_BLOCK_LABEL; 245 return true; 246 } 247 } else { 248 if(matchSEMI(0)) return true; 249 } 250 251 rewind(); 252 return false; 253 } 254 255 265 private boolean matchBlock() { 266 return matchBlock(null); 267 } 268 269 private boolean matchBlock(String label) { 270 if(label == null && !isID(0)) return false; 271 if(label != null && !isID(0, label)) return false; 272 273 mark(); 274 275 ATEToken start = T(0); 276 int startIndex = getPosition(); 277 if(label == null) { 278 if(!matchID(0)) return false; 279 } else { 280 if(!matchID(0, label)) return false; 281 } 282 283 ElementBlock block = new ElementBlock(start.getAttribute().toLowerCase(), start); 284 ATEToken beginBlock = T(0); 285 if(matchBalancedToken(ATESyntaxLexer.TOKEN_LCURLY, ATESyntaxLexer.TOKEN_RCURLY, block, true)) { 286 beginBlock.type = GrammarSyntaxLexer.TOKEN_BLOCK_LIMIT; 287 T(-1).type = GrammarSyntaxLexer.TOKEN_BLOCK_LIMIT; 288 start.type = GrammarSyntaxLexer.TOKEN_BLOCK_LABEL; 289 blocks.add(block); 290 291 block.end = T(-1); 292 block.internalTokens = new ArrayList<ATEToken>(getTokens().subList(startIndex, getPosition())); 293 block.parse(); 294 if(block.isTokenBlock) { 295 List<ATEToken> tokens = block.getDeclaredTokens(); 296 for(int i=0; i<tokens.size(); i++) { 297 ATEToken lexerToken = tokens.get(i); 298 lexerToken.type = GrammarSyntaxLexer.TOKEN_DECL; 299 addDeclaration(lexerToken); 300 } 301 } 302 return true; 303 } 304 305 rewind(); 306 return false; 307 } 308 309 320 private boolean matchRule() { 321 mark(); 322 try { 323 if(tryMatchRule()) { 324 return true; 325 } else { 326 rewind(); 327 return false; 328 } 329 } finally { 330 currentRule = null; 331 } 332 } 333 334 private boolean tryMatchRule() { 335 ATEToken start = T(0); 336 if(start == null) return false; 337 338 if(ruleModifiers.contains(start.getAttribute())) { 340 if(!nextToken()) return false; 342 } 343 344 ElementToken tokenName = (ElementToken) T(0); 346 String name = tokenName.getAttribute(); 347 if(!matchID(0)) return false; 348 349 matchArguments(); 351 352 if(matchID(0, "returns")) { 354 matchArguments(); 355 } 356 357 matchChar(0, "!"); 359 360 while(true) { 362 if(matchScope()) continue; 363 if(matchBlock()) continue; 364 if(matchSingleComment(0)) continue; 365 if(matchComplexComment(0)) continue; 366 367 if(isCOLON(0)) { 368 nextToken(); 371 break; 372 } else { 373 return false; 375 } 376 } 377 378 final ATEToken colonToken = T(-1); 380 final int oldRefsSize = references.size(); 381 final int oldBlocksSize = blocks.size(); 382 final int oldActionsSize = actions.size(); 383 currentRule = new ElementRule(this, name, start, colonToken, null); 384 labels.clear(); 385 while(true) { 386 if(matchEndOfRule(tokenName, oldRefsSize, oldBlocksSize, oldActionsSize)) return true; 388 389 if(matchBlock(OPTIONS_BLOCK_NAME)) continue; 391 392 if(matchRewriteTemplate()) continue; 394 395 if(matchAssignment(labels)) continue; 397 398 if(matchInternalRefInRule()) continue; 400 401 if(matchAction()) continue; 403 404 if(!nextToken()) return false; 406 } 407 } 408 409 private boolean matchEndOfRule(ElementToken tokenName, int oldRefsSize, int oldBlocksSize, int oldActionsSize) { 410 if(!matchSEMI(0)) return false; 411 412 matchRuleExceptionGroup(); 414 415 currentRule.end = T(-1); 417 418 tokenName.type = GrammarSyntaxLexer.TOKEN_DECL; 420 addDeclaration(tokenName); 421 422 if(references.size() > oldRefsSize) { 423 currentRule.setReferencesIndexes(oldRefsSize, references.size()-1); 424 } 425 426 if(blocks.size() > oldBlocksSize) { 427 currentRule.setBlocksIndexes(oldBlocksSize, blocks.size()-1); 428 } 429 430 if(actions.size() > oldActionsSize) { 431 currentRule.setActionsIndexes(oldActionsSize, actions.size()-1); 432 } 433 434 currentRule.completed(); 436 437 rules.add(currentRule); 439 return true; 440 } 441 442 private boolean matchInternalRefInRule() { 443 if(!matchID(0)) return false; 444 445 ATEToken refToken = T(-1); 447 while(isChar(0, ".") && isID(1)) { 450 if(!skip(2)) return false; 451 } 452 453 matchArguments(); 455 456 addReference(refToken, false); 458 return true; 459 } 460 461 private boolean matchAction() { 462 if(!isOpenBLOCK(0)) return false; 463 464 ATEToken t0 = T(0); 466 ElementAction action = new ElementAction(this, currentRule, t0); 467 if(matchBalancedToken(ATESyntaxLexer.TOKEN_LCURLY, ATESyntaxLexer.TOKEN_RCURLY, action, true)) { 468 t0.type = GrammarSyntaxLexer.TOKEN_BLOCK_LIMIT; 469 T(-1).type = GrammarSyntaxLexer.TOKEN_BLOCK_LIMIT; 470 471 action.end = T(-1); 472 action.actionNum = actions.size(); 473 action.setScope(currentRule); 474 actions.add(action); 475 return true; 476 } else { 477 return false; 478 } 479 } 480 481 private boolean matchAssignment(LabelTable labels) { 482 mark(); 483 484 ATEToken label = T(0); 485 if(matchID(0)) { 486 if(matchChar(0, "=")) { 487 label.type = GrammarSyntaxLexer.TOKEN_LABEL; 488 labels.add(label.getAttribute()); 489 return true; 490 } else if(isChar(0, "+") && isChar(1, "=")) { 491 label.type = GrammarSyntaxLexer.TOKEN_LABEL; 492 labels.add(label.getAttribute()); 493 skip(2); 494 return true; 495 } 496 } 497 498 rewind(); 499 return false; 500 } 501 502 559 560 private boolean matchRewriteTemplate() { 561 if(!isTokenType(0, GrammarSyntaxLexer.TOKEN_REWRITE)) return false; 562 563 if(!nextToken()) return false; 564 565 568 if(matchAction()) { 569 if(!matchChar(0, "?")) return true; 572 573 } 576 577 if(isID(0, "template")) { 579 if(!matchRewriteTemplateHead()) return false; 581 582 if(matchDoubleQuotedString()) return true; 583 if(matchDoubleAngleString()) return true; 584 } else if(matchRewriteTemplateHead()) { 585 } else if(matchRewriteIndirectTemplateHead()) { 587 } else if(matchAction()) { 589 } else { 591 return true; 592 } 593 594 return true; 595 } 596 597 private boolean matchRewriteIndirectTemplateHead() { 598 if(!isLPAREN(0)) return false; 599 600 mark(); 601 if(tryMatchRewriteIndirectTemplateHead()) { 602 return true; 603 } else { 604 rewind(); 605 return false; 606 } 607 } 608 609 private boolean tryMatchRewriteIndirectTemplateHead() { 610 if(!matchLPAREN(0)) return false; 611 if(!matchAction()) return false; 612 if(!matchRPAREN(0)) return false; 613 614 if(!matchLPAREN(0)) return false; 615 if(!matchRewriteTemplateArgs()) return false; 616 return matchRPAREN(0); 617 } 618 619 private boolean matchRewriteTemplateHead() { 620 if(!isID(0)) return false; 621 622 mark(); 623 if(tryMatchRewriteTemplateHead()) { 624 return true; 625 } else { 626 rewind(); 627 return false; 628 } 629 } 630 631 private boolean tryMatchRewriteTemplateHead() { 632 if(!matchID(0)) return false; 633 if(!matchLPAREN(0)) return false; 634 if(!matchRewriteTemplateArgs()) return false; 635 return matchRPAREN(0); 636 637 } 638 639 private boolean matchRewriteTemplateArgs() { 640 if(matchRewriteTemplateArg()) { 641 while(matchChar(0, ",")) { 642 matchSingleComment(0); 643 matchComplexComment(0); 644 if(!matchRewriteTemplateArg()) return false; 645 } 646 } 647 return true; 648 } 649 650 private boolean matchRewriteTemplateArg() { 651 if(!isID(0) && !isChar(1, "=")) return false; 652 653 mark(); 654 if(tryMatchRewriteTemplateArg()) { 655 return true; 656 } else { 657 rewind(); 658 return false; 659 } 660 } 661 662 private boolean tryMatchRewriteTemplateArg() { 663 if(!matchID(0)) return false; 664 if(!matchChar(0, "=")) return false; 665 666 return matchBalancedToken(ATESyntaxLexer.TOKEN_LCURLY, ATESyntaxLexer.TOKEN_RCURLY, REWRITE_FUNCTION, true); 667 668 } 669 670 private boolean matchDoubleQuotedString() { 671 if(isTokenType(0, ATESyntaxLexer.TOKEN_DOUBLE_QUOTE_STRING)) { 672 T(0).scope = REWRITE_BLOCK; 673 nextToken(); 674 return true; 675 } else { 676 return false; 677 } 678 } 679 680 private boolean matchDoubleAngleString() { 681 return matchBalancedToken(GrammarSyntaxLexer.TOKEN_OPEN_DOUBLE_ANGLE, GrammarSyntaxLexer.TOKEN_CLOSE_DOUBLE_ANGLE, REWRITE_BLOCK, false); 682 } 683 684 private boolean matchArguments() { 685 return matchBalancedToken(ATESyntaxLexer.TOKEN_LBRACK, ATESyntaxLexer.TOKEN_RBRACK, ARGUMENT_BLOCK, true); 686 } 687 688 private void matchRuleExceptionGroup() { 690 if(!matchOptional("exception")) 691 return; 692 693 if(isOpenBLOCK(1)) 695 nextToken(); 696 697 while(matchOptional("catch")) { 698 nextToken(); nextToken(); } 701 } 702 703 707 private boolean matchRuleGroup() { 708 if(!isSingleComment(0)) return false; 709 710 ATEToken token = T(0); 711 String comment = token.getAttribute(); 712 713 if(comment.startsWith(BEGIN_GROUP)) { 714 groups.add(new ElementGroup(comment.substring(BEGIN_GROUP.length(), comment.length()-1), rules.size()-1, token)); 715 nextToken(); 716 return true; 717 } else if(comment.startsWith(END_GROUP)) { 718 groups.add(new ElementGroup(rules.size()-1, token)); 719 nextToken(); 720 return true; 721 } 722 return false; 723 } 724 725 731 private boolean addReference(ATEToken ref, boolean addOnlyIfKnownLabel) { 732 refsToRules.put(ref, currentRule); 733 if(labels.lookup(ref.getAttribute())) { 734 ref.type = GrammarSyntaxLexer.TOKEN_LABEL; 736 return true; 737 } else { 738 if(!addOnlyIfKnownLabel) { 739 ref.type = GrammarSyntaxLexer.TOKEN_REFERENCE; 740 references.add(new ElementReference(refsToRules.get(ref), ref)); 741 } 742 return false; 743 } 744 } 745 746 private void addDeclaration(ATEToken token) { 747 decls.add(token); 748 declaredReferenceNames.add(token.getAttribute()); 749 } 750 751 761 private boolean matchBalancedToken(int open, int close, ATEScope scope, boolean matchInternalRef) { 762 if(T(0) == null || T(0).type != open) return false; 763 764 mark(); 765 int balance = 0; 766 while(true) { 767 T(0).scope = scope; 768 if(T(0).type == open) 769 balance++; 770 else if(T(0).type == close) { 771 balance--; 772 if(balance == 0) { 773 nextToken(); 774 return true; 775 } 776 } 777 if(!nextToken()) break; 778 779 matchInternalRefInBalancedToken(matchInternalRef); 780 } 781 rewind(); 782 return false; 783 } 784 785 809 810 private void matchInternalRefInBalancedToken(boolean matchInternalRef) { 811 if(matchInternalRef && isChar(0, "$") && isID(1)) { 812 T(0).type = GrammarSyntaxLexer.TOKEN_INTERNAL_REF; 813 814 ATEToken ref = T(1); 816 if(!addReference(ref, true)) { 817 824 unresolvedReferences.add(ref); 825 } 826 } 827 } 828 829 private boolean matchOptional(String t) { 830 if(isID(1, t)) { 831 nextToken(); 832 return true; 833 } else 834 return false; 835 } 836 837 private boolean matchChar(int index, String c) { 838 if(isChar(index, c)) { 839 nextToken(); 840 return true; 841 } else { 842 return false; 843 } 844 845 } 846 847 private boolean matchID(int index) { 848 if(isID(index)) { 849 nextToken(); 850 return true; 851 } else { 852 return false; 853 } 854 } 855 856 private boolean matchID(int index, String text) { 857 if(isID(index, text)) { 858 nextToken(); 859 return true; 860 } else { 861 return false; 862 } 863 } 864 865 private boolean matchSEMI(int index) { 866 if(isSEMI(index)) { 867 nextToken(); 868 return true; 869 } else { 870 return false; 871 } 872 } 873 874 private boolean matchLPAREN(int index) { 875 if(isLPAREN(index)) { 876 nextToken(); 877 return true; 878 } else { 879 return false; 880 } 881 } 882 883 private boolean matchRPAREN(int index) { 884 if(isRPAREN(index)) { 885 nextToken(); 886 return true; 887 } else { 888 return false; 889 } 890 } 891 892 private boolean isLPAREN(int index) { 893 return isTokenType(index, ATESyntaxLexer.TOKEN_LPAREN); 894 } 895 896 private boolean isRPAREN(int index) { 897 return isTokenType(index, ATESyntaxLexer.TOKEN_RPAREN); 898 } 899 900 private boolean isSEMI(int index) { 901 return isTokenType(index, ATESyntaxLexer.TOKEN_SEMI); 902 } 903 904 private boolean isCOLON(int index) { 905 return isTokenType(index, ATESyntaxLexer.TOKEN_COLON); 906 } 907 908 private boolean isOpenBLOCK(int index) { 909 return isTokenType(index, ATESyntaxLexer.TOKEN_LCURLY); 910 } 911 912 private class LabelTable { 913 914 Set<String > labels = new HashSet<String >(); 915 916 public void clear() { 917 labels.clear(); 918 } 919 920 public void add(String label) { 921 labels.add(label); 922 } 923 924 public boolean lookup(String label) { 925 return labels.contains(label); 926 } 927 } 928 929 } 930 | Popular Tags |