1 8 9 package beaver.spec; 10 11 import java.util.ArrayList ; 12 import java.util.Arrays ; 13 import java.util.HashMap ; 14 import java.util.HashSet ; 15 import java.util.Iterator ; 16 17 import beaver.Symbol; 18 import beaver.comp.util.Log; 19 import beaver.spec.Production.RHS; 20 import beaver.spec.ast.Declaration; 21 import beaver.spec.ast.GrammarTreeRoot; 22 import beaver.spec.ast.Rule; 23 import beaver.spec.ast.TreeWalker; 24 25 28 public class GrammarBuilder extends TreeWalker 29 { 30 33 static class RuleWalker extends TreeWalker 34 { 35 public void visit(GrammarTreeRoot node) 36 { 37 for (int i = 0; i < node.rules.length; i++) 38 { 39 node.rules[i].accept(this); 40 } 41 } 42 } 43 44 47 static class DeclarationWalker extends TreeWalker 48 { 49 public void visit(GrammarTreeRoot node) 50 { 51 for (int i = 0; i < node.declarations.length; i++) 52 { 53 node.declarations[i].accept(this); 54 } 55 } 56 } 57 58 65 static private boolean checkBraces(String code) 66 { 67 boolean ovr = false; 68 int n = 0, len = code.length(); 69 for (int i = 0; i < len; i++) 70 { 71 char c = code.charAt(i); 72 if (c == '{') 73 n++; 74 else if (c == '}') 75 n--; 76 if (n < 0) 77 { 78 ovr = true; 79 } 80 } 81 return !ovr && n == 0; 82 } 83 84 static private String trimCode(String code) 85 { 86 if (code != null) 87 { 88 int i = code.length(); 89 while (Character.isWhitespace(code.charAt(--i))); 90 if (code.charAt(i) == '\n') 91 { 92 i--; 93 if (code.charAt(i) == '\r') 94 i--; 95 } 96 code = code.substring(0, i + 1); 97 } 98 return code; 99 } 100 101 102 private Log log; 103 104 105 private Grammar grammar; 106 107 108 private HashMap symbols; 109 110 111 private int n_nonterms; 112 113 114 private int n_terms; 115 116 117 private int n_rules; 118 119 public GrammarBuilder(Log log) 120 { 121 this.log = log; 122 } 123 124 public Grammar getGrammar() 125 { 126 return grammar; 127 } 128 129 public void visit(GrammarTreeRoot root) 130 { 131 grammar = new Grammar(); 132 133 symbols = new HashMap (89); 134 symbols.put(grammar.error.name, grammar.error); 135 136 n_nonterms = 1; n_terms = 1; n_rules = 0; 139 140 final HashMap tokens = new HashMap (89); 141 final ArrayList goals = new ArrayList (); 142 143 root.accept(new TreeWalker() 144 { 145 public void visit(Declaration.Terminals decl) 146 { 147 for (int i = 0; i < decl.symbols.length; i++) 148 { 149 String sym_name = (String ) decl.symbols[i].value; 150 if (!symbols.containsKey(sym_name)) 151 { 152 symbols.put(sym_name, new Terminal(sym_name)); 153 n_terms++; 154 tokens.put(sym_name, decl.symbols[i]); 155 } 156 } 157 } 158 public void visit(Rule rule) 159 { 160 String lhs_sym_name = rule.getLHSSymbolName(); 161 if (!symbols.containsKey(lhs_sym_name)) 162 { 163 symbols.put(lhs_sym_name, new NonTerminal(lhs_sym_name)); 164 n_nonterms++; 165 } 166 else if (tokens.containsKey(lhs_sym_name)) 167 { 168 log.error(rule.lhs_sym, "nonterminal was declared as a terminal"); 169 symbols.put(lhs_sym_name, new NonTerminal(lhs_sym_name)); 170 n_nonterms++; 171 tokens.remove(lhs_sym_name); 172 n_terms--; 173 } 174 super.visit(rule); 175 } 176 public void visit(Rule.Definition rhs) 177 { 178 String prec_sym_name = rhs.getPrecedenceSymbolName(); 179 if (prec_sym_name != null && !symbols.containsKey(prec_sym_name)) 180 { 181 GrammarSymbol sym = new Terminal(prec_sym_name); 182 sym.id = -1; symbols.put(prec_sym_name, sym); 184 } 185 n_rules++; 186 } 187 }); 188 root.accept(new RuleWalker() 189 { 190 public void visit(Rule.Definition.Element rhs_item) 191 { 192 String rhs_sym_name = rhs_item.getName(); 193 GrammarSymbol rhs_sym = (GrammarSymbol) symbols.get(rhs_sym_name); 194 if (rhs_sym == null) 195 { 196 log.error(rhs_item.sym_name, "symbol is neither a terminal nor a nonterminal of the grammar"); 197 symbols.put(rhs_sym_name, rhs_sym = new Terminal(rhs_sym_name)); 198 rhs_sym.id = -1; } 200 else if (rhs_sym instanceof Terminal) 201 { 202 if (rhs_sym.id < 0) 203 log.error(rhs_item.sym_name, "symbol is not declared as a grammar terminal"); 204 else 205 tokens.remove(rhs_sym_name); 206 } 207 } 208 }); 209 for (Iterator i = tokens.values().iterator(); i.hasNext();) 210 { 211 Symbol token = (Symbol) i.next(); 212 log.warning(token, "declared terminal is not used by the grammar"); 213 symbols.remove(token.value); 214 n_terms--; 215 } 216 root.accept(new DeclarationWalker() 217 { 218 219 private int precedence = Integer.MAX_VALUE; 220 private HashSet imports = new HashSet (23); 221 222 public void visit(GrammarTreeRoot root) 223 { 224 imports.add(Grammar.EBNF_LIST_TYPE); 225 imports.add("beaver.*"); 226 super.visit(root); 227 grammar.imports = (String []) imports.toArray(new String [imports.size()]); 228 } 229 public void visit(Declaration.Header decl) 230 { 231 if (grammar.prolog == null) 232 { 233 String text = (String ) decl.code.value; 234 int i = 0; 235 char c; 236 while (Character.isWhitespace(c = text.charAt(i)) || c == '\n') i++; 237 238 grammar.prolog = i > 0 ? text.substring(i) : text; 239 } 240 else 241 { 242 grammar.prolog += (String ) decl.code.value; 243 } 244 } 245 public void visit(Declaration.PackageName decl) 246 { 247 if (grammar.package_name != null) 248 { 249 log.warning(decl.name, "Parser package has been already defined as \"" + grammar.package_name + "\", new name ignored."); 250 } 251 else 252 { 253 grammar.package_name = decl.getName(); 254 } 255 } 256 public void visit(Declaration.Imports decl) 257 { 258 for (int i = 0; i < decl.symbols.length; i++) 259 { 260 imports.add(decl.symbols[i].value); 261 } 262 } 263 public void visit(Declaration.ClassName decl) 264 { 265 if (grammar.class_name != null) 266 { 267 log.warning(decl.name, "Parser class name has been already defined as \"" + grammar.class_name + "\", new name ignored."); 268 } 269 else 270 { 271 grammar.class_name = decl.getName(); 272 } 273 } 274 public void visit(Declaration.ClassCode decl) 275 { 276 if (grammar.class_code != null) 277 { 278 log.warning(decl.code, "Embedded parser class code has been already defined, new code ignored."); 279 } 280 else 281 { 282 grammar.class_code = trimCode(getCode(decl)); 283 } 284 } 285 public void visit(Declaration.ConstructorCode decl) 286 { 287 if (grammar.init_code != null) 288 { 289 log.warning(decl.code, "Parser initialization code has been already defined, new code ignored."); 290 } 291 else 292 { 293 grammar.init_code = trimCode(getCode(decl)); 294 } 295 } 296 public void visit(Declaration.Goal decl) 297 { 298 String sym_name = decl.getName(); 299 GrammarSymbol sym = (GrammarSymbol) symbols.get(sym_name); 300 if (sym == null) 301 { 302 log.error(decl.name, "Symbol is undefined"); 303 } 304 else if (sym instanceof Terminal) 305 { 306 log.error(decl.name, "Symbol is a terminal"); 307 } 308 else 309 { 310 goals.add(sym); 311 } 312 } 313 public void visit(Declaration.TypeOf decl) 314 { 315 String type = decl.getTypeName(); 316 for (int i = 0; i < decl.symbols.length; i++) 317 { 318 GrammarSymbol sym = (GrammarSymbol) symbols.get(decl.symbols[i].value); 319 if (sym == null) 320 { 321 log.error(decl.symbols[i], "Symbol is undefined"); 322 } 323 else if (sym.type != null) 324 { 325 log.error(decl.symbols[i], "Symbol's Java type is already set to \"" + sym.type + "\""); 326 } 327 else 328 { 329 sym.type = type; 330 } 331 } 332 } 333 public void visit(Declaration.LeftAssoc decl) 334 { 335 setPrecedence(decl, Terminal.Associativity.LEFT); 336 } 337 public void visit(Declaration.RightAssoc decl) 338 { 339 setPrecedence(decl, Terminal.Associativity.RIGHT); 340 } 341 public void visit(Declaration.NonAssoc decl) 342 { 343 setPrecedence(decl, Terminal.Associativity.NONE); 344 } 345 346 private void setPrecedence(Declaration.SymbolsContainer decl, Terminal.Associativity type) 347 { 348 for (int i = 0; i < decl.symbols.length; i++) 349 { 350 String sym_name = (String ) decl.symbols[i].value; 351 GrammarSymbol sym = (GrammarSymbol) symbols.get(sym_name); 352 if (sym == null) 353 { 354 log.warning(decl.symbols[i], "Symbol is not used by the grammar"); 355 } 356 else if (sym instanceof NonTerminal) 357 { 358 log.error(decl.symbols[i], "Symbol is a non-terminal."); 359 } 360 else 361 { 362 ((Terminal) sym).setPrecedence(precedence, type); 363 } 364 } 365 precedence--; 366 } 367 368 private String getCode(Declaration.CodeContainer decl) 369 { 370 String code = decl.getCode(); 371 if (!checkBraces(code)) 372 { 373 log.warning(decl, "Java code has unbalanced braces"); 374 } 375 return code; 376 } 377 }); 378 379 final ArrayList rules = new ArrayList (n_rules * 2); 381 if (goals.isEmpty()) 382 { 383 log.warning(root.rules[0].lhs_sym, "Grammar has not declared any goals, will use first declared nonterminal"); 384 grammar.goal_symbol = (NonTerminal) symbols.get(root.rules[0].getLHSSymbolName()); 385 } 386 else if (goals.size() == 1) { 388 grammar.goal_symbol = (NonTerminal) goals.get(0); 389 } 390 else { 392 NonTerminal[] alts = (NonTerminal[]) goals.toArray(new NonTerminal[goals.size()]); 393 394 grammar.goal_symbol = new NonTerminal("$goal"); 395 symbols.put(grammar.goal_symbol.name, grammar.goal_symbol); 396 n_nonterms++; 397 398 rules.add(new Production(rules.size(), grammar.goal_symbol, new Production.RHS(alts[0]))); 399 400 for (int i = 1; i < alts.length; i++) 401 { 402 Terminal term = new Terminal("$" + alts[i].name); 403 symbols.put(term.name, term); 404 n_terms++; 405 rules.add(new Production(rules.size(), grammar.goal_symbol, new Production.RHS(term, alts[i]))); 406 } 407 } 408 409 root.accept(new RuleWalker() 410 { 411 boolean found = false; 412 413 public void visit(GrammarTreeRoot root) 414 { 415 super.visit(root); 416 if (found) 417 { 418 NonTerminal new_goal_sym = new NonTerminal("$goal"); 419 new_goal_sym.type = grammar.goal_symbol.type; 420 symbols.put(new_goal_sym.name, new_goal_sym); 421 n_nonterms++; 422 423 rules.add(new Production(rules.size(), new_goal_sym, new Production.RHS(grammar.goal_symbol))); 424 425 grammar.goal_symbol = new_goal_sym; 426 } 427 } 428 429 public void visit(Rule.Definition.Element rhs_item) 430 { 431 if (!found) 432 { 433 found = grammar.goal_symbol.name.equals(rhs_item.getName()); 434 } 435 } 436 }); 437 root.accept(new RuleWalker() 438 { 439 private NonTerminal lhs_sym; 440 private ArrayList rhs_elements = new ArrayList (); 441 442 public void visit(Rule rule) 443 { 444 lhs_sym = (NonTerminal) symbols.get(rule.getLHSSymbolName()); 445 super.visit(rule); 446 } 447 448 public void visit(Rule.Definition rhs) 449 { 450 rhs_elements.clear(); 451 452 super.visit(rhs); 453 454 Production rule = new Production(rules.size(), 455 lhs_sym, 456 new Production.RHS((Production.RHS.Item[]) rhs_elements.toArray(new Production.RHS.Item[rhs_elements.size()])), 457 rhs.getPrecedenceSymbolName() == null ? null : (Terminal) symbols.get(rhs.getPrecedenceSymbolName())); 458 String code = rhs.getReduceActionCode(); 459 if (code != null) 460 { 461 if (!checkBraces(code)) 462 { 463 log.warning(rhs.code, "Java code has unbalanced braces"); 464 } 465 rule.code = trimCode(code); 466 } 467 if (rhs.elements.length > 0) 468 { 469 rule.start_pos = rhs.elements[0].getStart(); 470 rule.end_pos = rhs.elements[rhs.elements.length - 1].getEnd(); 471 } 472 rules.add(rule); 473 } 474 475 public void visit(Rule.Definition.Element rhs_item) 476 { 477 GrammarSymbol rhs_sym = rhs_item.ebnf_sym.value == null ? (GrammarSymbol) symbols.get(rhs_item.getName()) : getExtendedSymbol(rhs_item); 478 479 rhs_elements.add(new Production.RHS.Item(rhs_sym, rhs_item.getAlias())); 480 } 481 482 private GrammarSymbol getExtendedSymbol(Rule.Definition.Element rhs_item) 483 { 484 switch (rhs_item.getExtUseMark()) 485 { 486 case '?': return getOpt(rhs_item.getName()); 487 case '+': return getLst(rhs_item.getName()); 488 case '*': return getOpt(getLst(rhs_item.getName()).name); 489 } 490 throw new IllegalArgumentException ("unrecognized extended symbol notation"); 491 } 492 493 private NonTerminal getOpt(String sym_name) 494 { 495 String opt_sym_name = "opt$" + sym_name; 496 NonTerminal opt_sym = (NonTerminal) symbols.get(opt_sym_name); 497 if (opt_sym == null) 498 { 499 GrammarSymbol item_sym = (GrammarSymbol) symbols.get(sym_name); 500 symbols.put(opt_sym_name, opt_sym = new NonTerminal(opt_sym_name, item_sym.type)); 501 n_nonterms++; 502 rules.add(new Production(rules.size(), opt_sym, new Production.RHS())); 503 rules.add(new Production(rules.size(), opt_sym, new Production.RHS(item_sym))); 504 } 505 return opt_sym; 506 } 507 508 private NonTerminal getLst(String sym_name) 509 { 510 String lst_sym_name = "lst$" + sym_name; 511 NonTerminal lst_sym = (NonTerminal) symbols.get(lst_sym_name); 512 if (lst_sym == null) 513 { 514 GrammarSymbol item_sym = (GrammarSymbol) symbols.get(sym_name); 515 symbols.put(lst_sym_name, lst_sym = new NonTerminal(lst_sym_name, item_sym.type != null ? "+" + item_sym.type : null)); 516 n_nonterms++; 517 518 rules.add(new Production(rules.size(), lst_sym, new Production.RHS(item_sym))); 519 rules.add(new Production(rules.size(), lst_sym, new Production.RHS(lst_sym, item_sym))); 520 } 521 return lst_sym; 522 } 523 }); 524 525 grammar.rules = (Production[]) rules.toArray(new Production[rules.size()]); 526 grammar.nonterminals = getNonTerminals(); 527 grammar.terminals = getTerminals(); 528 529 propagateTypes(grammar.nonterminals); 530 writeListsCode(grammar.nonterminals); 531 } 532 533 private Terminal[] getTerminals() 534 { 535 Production[] rules = new Production[grammar.rules.length]; 536 System.arraycopy(grammar.rules, 0, rules, 0, rules.length); 537 Arrays.sort(rules, Production.NUM_TERM_CMP); 538 539 Terminal[] terms = new Terminal[n_terms]; 540 terms[0] = grammar.eof; 541 int n = 1; 542 for (int i = 0; i < rules.length; i++) 543 { 544 RHS rhs = rules[i].rhs; 545 if (rhs.n_term > 0) 546 { 547 for (int j = 0; j < rhs.items.length; j++) 548 { 549 GrammarSymbol sym = rhs.items[j].symbol; 550 if (sym instanceof Terminal && sym.id == 0) 551 { 552 Terminal term = (Terminal) sym; 553 term.id = (short) n; 554 terms[n++] = term; 555 } 556 } 557 } 558 } 559 if (n < n_terms) 560 throw new IllegalStateException ("found less terminals than previously counted"); 561 562 return terms; 563 } 564 565 private NonTerminal[] getNonTerminals() 566 { 567 Production[] rules = new Production[grammar.rules.length]; 568 System.arraycopy(grammar.rules, 0, rules, 0, rules.length); 569 Arrays.sort(rules, Production.NUM_NONTERM_CMP); 570 571 NonTerminal[] nts = new NonTerminal[n_nonterms]; 572 int n = 0; 573 for (int i = 0; i < rules.length; i++) 574 { 575 RHS rhs = rules[i].rhs; 576 if (rhs.n_nonterm > 0) 577 { 578 for (int j = 0; j < rhs.items.length; j++) 579 { 580 GrammarSymbol sym = rhs.items[j].symbol; 581 if (sym instanceof NonTerminal && sym.id == 0) 582 { 583 NonTerminal nt = (NonTerminal) sym; 584 nt.id = (short) (n + n_terms); 585 nts[n++] = nt; 586 } 587 } 588 } 589 } 590 grammar.goal_symbol.id = (short) (n + n_terms); 591 nts[n++] = grammar.goal_symbol; 592 if (grammar.error.id == 0) 593 { 594 grammar.error.id = (short) (n + n_terms); 595 nts[n++] = grammar.error; 596 } 597 if (n < n_nonterms) 598 throw new IllegalStateException ("found less nonterminals than previously counted"); 599 600 return nts; 601 } 602 603 private void propagateTypes(NonTerminal[] nts) 604 { 605 boolean more_found; 606 do { 607 more_found = false; 608 609 for (int i = 0; i < nts.length; i++) 610 { 611 if (nts[i].type != null) 612 continue; 613 614 if (nts[i].definitions.size() != 2) 615 { 616 if (nts[i].definitions.size() == 1) 617 { 618 Production rule = nts[i].definitions.start(); 619 if (rule.code == null && rule.rhs.size() == 1) 620 { 621 GrammarSymbol item = rule.rhs.start().symbol; 622 if (item.type != null) 623 { 624 nts[i].type = item.type; 625 more_found = true; 626 } 627 } 628 } 629 continue; 630 } 631 632 Production elem_rule = nts[i].definitions.start(); 633 if (elem_rule.rhs.size() != 1) 634 { 635 if ((elem_rule = elem_rule.next_definition).rhs.size() != 1) 636 continue; 637 } 638 if (elem_rule.code != null) 639 continue; 640 641 GrammarSymbol elem = elem_rule.rhs.start().symbol; 642 if (elem.type == null) 643 continue; 644 645 Production next_rule = elem_rule.next_definition != null ? elem_rule.next_definition : nts[i].definitions.start(); 646 if (next_rule.code != null) 647 continue; 648 649 if (next_rule.rhs.size() == 0) 650 { 651 nts[i].type = elem.type; 652 more_found = true; 653 } 654 else if (next_rule.rhs.size() >= 2 && next_rule.rhs.start().symbol == nts[i] && next_rule.rhs.end().symbol == elem) 655 { 656 nts[i].type = "+" + elem.type; 657 more_found = true; 658 } 659 } 660 } 661 while (more_found); 662 } 663 664 private void writeListsCode(NonTerminal[] nts) 665 { 666 for (int i = 0; i < nts.length; i++) 667 { 668 if (nts[i].definitions.size() != 2) 669 continue; 670 671 Production new_list_rule = nts[i].definitions.start(); 672 if (new_list_rule.rhs.size() != 1) 673 { 674 if ((new_list_rule = new_list_rule.next_definition).rhs.size() != 1) 675 continue; 676 } 677 if (new_list_rule.code != null) 678 continue; 679 GrammarSymbol elem = new_list_rule.rhs.start().symbol; 680 681 Production add_elem_rule = nts[i].definitions.start(); 682 if (add_elem_rule.rhs.size() < 2) 683 { 684 if ((add_elem_rule = add_elem_rule.next_definition).rhs.size() < 2) 685 continue; 686 } 687 if (add_elem_rule.code != null) 688 continue; 689 if (add_elem_rule.rhs.start().symbol != nts[i] || add_elem_rule.rhs.end().symbol != elem) 690 continue; 691 692 if (nts[i].type == null) 693 { 694 nts[i].type = "+" + (elem.type != null ? elem.type : "beaver.Symbol"); 695 } 696 new_list_rule.code = new StringBuffer (Grammar.EBNF_LIST_TYPE_NAME.length() * 2 + 77) 697 .append(Grammar.EBNF_LIST_TYPE_NAME).append(" lst = new ").append(Grammar.EBNF_LIST_TYPE_NAME).append("(); ") 698 .append("lst.add(_symbols[offset + 1]").append(elem.type != null ? ".value" : "").append("); ") 699 .append("return new Symbol(lst);") 700 .toString(); 701 add_elem_rule.code = new StringBuffer (Grammar.EBNF_LIST_TYPE_NAME.length() + 88) 702 .append("((").append(Grammar.EBNF_LIST_TYPE_NAME).append(") _symbols[offset + 1].value).add(_symbols[offset + ").append(add_elem_rule.rhs.size()).append("]").append(elem.type != null ? ".value" : "").append("); ") 703 .append("return _symbols[offset + 1];") 704 .toString(); 705 } 706 } 707 } | Popular Tags |