1 8 9 package net.sourceforge.chaperon.process; 10 11 import net.sourceforge.chaperon.build.Automaton; 12 import net.sourceforge.chaperon.build.ReduceAction; 13 import net.sourceforge.chaperon.build.ShiftAction; 14 import net.sourceforge.chaperon.build.State; 15 import net.sourceforge.chaperon.model.grammar.Grammar; 16 import net.sourceforge.chaperon.model.grammar.Production; 17 import net.sourceforge.chaperon.model.symbol.Symbol; 18 import net.sourceforge.chaperon.model.symbol.Terminal; 19 20 import org.apache.commons.logging.Log; 21 22 import org.xml.sax.Attributes ; 23 import org.xml.sax.ContentHandler ; 24 import org.xml.sax.Locator ; 25 import org.xml.sax.SAXException ; 26 import org.xml.sax.ext.LexicalHandler ; 27 import org.xml.sax.helpers.AttributesImpl ; 28 import org.xml.sax.helpers.LocatorImpl ; 29 30 import java.util.Stack ; 31 32 34 40 public class GeneralParserProcessor implements ContentHandler , LexicalHandler 41 { 42 private static final String NS = "http://chaperon.sourceforge.net/schema/lexemes/1.0"; 43 private static final String LEXEMES = "lexemes"; 44 private static final String LEXEME = "lexeme"; 45 46 47 private static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/1.0"; 48 private static final String OUTPUT = "output"; 49 private ContentHandler contentHandler = null; 50 private LexicalHandler lexicalHandler = null; 51 private Locator locator = null; 52 private LocatorImpl locatorImpl = null; 53 private static final int STATE_OUTER = 0; 54 private static final int STATE_LEXEMES = 1; 55 private static final int STATE_LEXEME = 2; 56 private int state = STATE_OUTER; 57 private Automaton automaton; 58 private Grammar grammar; 59 private boolean flatten = false; 60 61 private Stack current = new Stack (); 63 private Stack next = new Stack (); 64 private Log log; 65 66 private int maxActiveStates = 50; 68 69 72 public GeneralParserProcessor() {} 73 74 81 public GeneralParserProcessor(Automaton automaton, Log log) 82 { 83 this.automaton = automaton; 84 this.log = log; 85 } 86 87 92 public void setParserAutomaton(Automaton automaton) 93 { 94 this.automaton = automaton; 95 this.grammar = automaton.getGrammar(); 96 } 97 98 101 public void setContentHandler(ContentHandler handler) 102 { 103 this.contentHandler = handler; 104 } 105 106 109 public void setLexicalHandler(LexicalHandler handler) 110 { 111 this.lexicalHandler = handler; 112 } 113 114 119 public void setLog(Log log) 120 { 121 this.log = log; 122 } 123 124 130 public void setFlatten(boolean flatten) 131 { 132 this.flatten = flatten; 133 } 134 135 140 public void setDocumentLocator(Locator locator) 141 { 142 this.locator = locator; 143 144 if (locator!=null) 145 { 146 this.locatorImpl = new LocatorImpl (locator); 147 contentHandler.setDocumentLocator(locatorImpl); 148 } 149 } 150 151 156 public void startDocument() throws SAXException 157 { 158 locatorImpl.setLineNumber(locator.getLineNumber()); 159 locatorImpl.setColumnNumber(locator.getColumnNumber()); 160 contentHandler.startDocument(); 161 state = STATE_OUTER; 162 } 163 164 174 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 175 throws SAXException 176 { 177 locatorImpl.setLineNumber(locator.getLineNumber()); 178 locatorImpl.setColumnNumber(locator.getColumnNumber()); 179 180 if (state==STATE_OUTER) 181 { 182 if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(LEXEMES))) 183 { 184 processStartDocument(); 185 state = STATE_LEXEMES; 186 } 187 else 188 contentHandler.startElement(namespaceURI, localName, qName, atts); 189 } 190 else if (state==STATE_LEXEMES) 191 { 192 if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(LEXEME))) 193 { 194 processLexeme(atts.getValue("symbol"), atts.getValue("text")); 195 state = STATE_LEXEME; 196 } 197 else 198 throw new SAXException ("Unexpected start element."); 199 } 200 else if (state==STATE_LEXEME) 201 throw new SAXException ("Unexpected start element."); 202 } 203 204 213 public void endElement(String namespaceURI, String localName, String qName) 214 throws SAXException 215 { 216 locatorImpl.setLineNumber(locator.getLineNumber()); 217 locatorImpl.setColumnNumber(locator.getColumnNumber()); 218 219 if (state==STATE_OUTER) 220 contentHandler.endElement(namespaceURI, localName, qName); 221 else if (state==STATE_LEXEMES) 222 { 223 if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(LEXEMES))) 224 { 225 contentHandler.startPrefixMapping("", NS_OUTPUT); 226 contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl ()); 227 228 processEndDocument(); 229 230 contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT); 231 contentHandler.endPrefixMapping(""); 232 233 state = STATE_OUTER; 234 } 235 else 236 throw new SAXException ("Unexpected end element."); 237 } 238 else if (state==STATE_LEXEME) 239 state = STATE_LEXEMES; 240 } 241 242 251 public void characters(char[] ch, int start, int length) 252 throws SAXException 253 { 254 locatorImpl.setLineNumber(locator.getLineNumber()); 255 locatorImpl.setColumnNumber(locator.getColumnNumber()); 256 257 if (state==STATE_OUTER) 258 contentHandler.characters(ch, start, length); 259 } 260 261 270 public void ignorableWhitespace(char[] ch, int start, int length) 271 throws SAXException 272 { 273 locatorImpl.setLineNumber(locator.getLineNumber()); 274 locatorImpl.setColumnNumber(locator.getColumnNumber()); 275 276 if (state==STATE_OUTER) 277 contentHandler.ignorableWhitespace(ch, start, length); 278 } 279 280 288 public void startPrefixMapping(String prefix, String uri) 289 throws SAXException 290 { 291 locatorImpl.setLineNumber(locator.getLineNumber()); 292 locatorImpl.setColumnNumber(locator.getColumnNumber()); 293 294 contentHandler.startPrefixMapping(prefix, uri); 295 } 296 297 304 public void endPrefixMapping(String prefix) throws SAXException 305 { 306 locatorImpl.setLineNumber(locator.getLineNumber()); 307 locatorImpl.setColumnNumber(locator.getColumnNumber()); 308 309 contentHandler.endPrefixMapping(prefix); 310 } 311 312 320 public void processingInstruction(String target, String data) 321 throws SAXException 322 { 323 locatorImpl.setLineNumber(locator.getLineNumber()); 324 locatorImpl.setColumnNumber(locator.getColumnNumber()); 325 326 if (state==STATE_OUTER) 327 contentHandler.processingInstruction(target, data); 328 } 329 330 337 public void skippedEntity(String name) throws SAXException 338 { 339 locatorImpl.setLineNumber(locator.getLineNumber()); 340 locatorImpl.setColumnNumber(locator.getColumnNumber()); 341 342 if (state==STATE_OUTER) 343 contentHandler.skippedEntity(name); 344 } 345 346 351 public void endDocument() throws SAXException 352 { 353 locatorImpl.setLineNumber(locator.getLineNumber()); 354 locatorImpl.setColumnNumber(locator.getColumnNumber()); 355 356 if (state==STATE_OUTER) 357 contentHandler.endDocument(); 358 } 359 360 363 public void startDTD(String name, String publicId, String systemId) 364 throws SAXException 365 { 366 if (lexicalHandler!=null) 367 lexicalHandler.startDTD(name, publicId, systemId); 368 } 369 370 373 public void endDTD() throws SAXException 374 { 375 if (lexicalHandler!=null) 376 lexicalHandler.endDTD(); 377 } 378 379 382 public void startEntity(String name) throws SAXException 383 { 384 if (lexicalHandler!=null) 385 lexicalHandler.startEntity(name); 386 } 387 388 391 public void endEntity(String name) throws SAXException 392 { 393 if (lexicalHandler!=null) 394 lexicalHandler.endEntity(name); 395 } 396 397 400 public void startCDATA() throws SAXException 401 { 402 if (lexicalHandler!=null) 403 lexicalHandler.startCDATA(); 404 } 405 406 409 public void endCDATA() throws SAXException 410 { 411 if (lexicalHandler!=null) 412 lexicalHandler.endCDATA(); 413 } 414 415 418 public void comment(char[] ch, int start, int len) throws SAXException 419 { 420 if (lexicalHandler!=null) 421 lexicalHandler.comment(ch, start, len); 422 } 423 424 429 private void processStartDocument() 430 { 431 current.clear(); 432 current.push(new StateNode(automaton.getState(0), null, null)); 433 next.clear(); 434 435 count = 0; 436 437 System.out.println("Automaton:\n"+automaton); 438 439 } 441 442 private static int count = 0; 443 444 452 private void processLexeme(String symbolname, String text) 453 { 454 Terminal symbol = new Terminal(symbolname); 455 456 System.out.println("\n===================================\nProcess "+symbolname); 457 458 if (current.isEmpty()) 459 throw new IllegalStateException ("Parsing process is aborted"); 460 461 System.out.println("Current states"); 462 463 for (int i = 0; i<current.size(); i++) 464 System.out.println(current.get(i)); 465 466 System.out.println(); 467 468 if (current.size()>maxActiveStates) 469 throw new IllegalStateException ("Processor occupied too many states"); 470 471 472 int watchdog = 0; 473 474 while (!current.isEmpty()) 475 { 476 if (watchdog++>20) 477 throw new IllegalStateException ("overflow"); 478 479 StateNode statenode = (StateNode)current.pop(); 480 481 next.push(statenode); 482 483 ReduceAction[] reduceactions = statenode.state.getReduceActions(); 484 485 if (reduceactions.length>0) 486 { 487 for (int i = 0; i<reduceactions.length; i++) 488 { 489 Production production = reduceactions[i].production; 490 491 if ((log!=null) && (log.isDebugEnabled())) 492 log.debug( 493 494 " reduce "+production.getSymbol()); 495 496 498 ProductionNode productionnode = new ProductionNode(production); 499 TreeNode[] descendants = new TreeNode[production.getDefinition().getSymbolCount()]; 500 501 StateNode ancestor = statenode; 502 503 for (int j = production.getDefinition().getSymbolCount()-1; j>=0; j--) 504 { 505 descendants[j] = ancestor.treenode; 506 ancestor = ancestor.ancestor; 507 } 508 509 productionnode.descendants = descendants; 510 511 if (descendants.length>0) 512 { 513 productionnode.linenumber = descendants[0].linenumber; 514 productionnode.columnnumber = descendants[0].columnnumber; 515 } 516 else 517 { 518 productionnode.linenumber = locator.getLineNumber(); 519 productionnode.columnnumber = locator.getColumnNumber(); 520 } 521 522 ShiftAction shiftaction = ancestor.state.getShiftAction(productionnode.symbol); 523 524 if (shiftaction!=null) 525 { 526 StateNode newstatenode = getStateNode(current, shiftaction.state, ancestor); 527 528 if (newstatenode==null) 529 { 530 System.out.println("new state node: new state="+automaton.indexOf(shiftaction.state)+ 531 " ancestor state="+automaton.indexOf(ancestor.state)); 532 newstatenode = new StateNode(shiftaction.state, ancestor, productionnode); 533 current.push(newstatenode); 534 } 535 else 536 { 537 System.out.println("merging state node"); 538 539 ProductionNode oldproductionnode = (ProductionNode)newstatenode.treenode; 540 541 if (grammar.getPriority(oldproductionnode.production)>grammar.getPriority(production)) 542 { 543 System.out.println("priority("+production+") < priority("+ 544 oldproductionnode.production+")"); 545 newstatenode.treenode = productionnode; 546 } 547 else 548 System.out.println("priority("+production+") >= priority("+ 549 oldproductionnode.production+")"); 550 } 551 } 552 } 553 } 554 } 555 556 Stack dummy = next; 557 next = current; 558 current = dummy; 559 560 System.out.println("Current states"); 561 562 for (int i = 0; i<current.size(); i++) 563 System.out.println(current.get(i)); 564 565 System.out.println(); 566 567 568 TokenNode tokennode = new TokenNode(symbol, text); 569 570 if (locator!=null) 571 { 572 tokennode.linenumber = locator.getLineNumber(); 573 tokennode.columnnumber = locator.getColumnNumber(); 574 } 575 576 while (!current.isEmpty()) 577 { 578 StateNode statenode = (StateNode)current.pop(); 579 580 ShiftAction shiftaction = statenode.state.getShiftAction(symbol); 581 582 if (shiftaction!=null) 583 { 584 if ((log!=null) && (log.isDebugEnabled())) 585 log.debug( 586 587 " shift token "+symbolname+" ("+symbol+")"); 588 589 next.push(new StateNode(shiftaction.state, statenode, tokennode)); 590 } 591 } 592 593 if (next.isEmpty()) 594 throw new IllegalArgumentException ("Token "+symbolname+" is not expected in this state"); 595 596 dummy = next; 597 next = current; 598 current = dummy; 599 600 System.out.println("Current states"); 601 602 for (int i = 0; i<current.size(); i++) 603 System.out.println(current.get(i)); 604 605 System.out.println(); 606 } 607 608 614 private void processEndDocument() throws SAXException 615 { 616 System.out.println("\n===================================\nProcess EOF"); 617 618 while (!current.isEmpty()) 619 { 620 StateNode statenode = (StateNode)current.pop(); 621 622 ReduceAction[] reduceactions = statenode.state.getReduceActions(); 623 624 if (reduceactions.length>0) 625 { 626 for (int i = 0; i<reduceactions.length; i++) 627 { 628 Production production = reduceactions[i].production; 629 630 ProductionNode productionnode = new ProductionNode(production); 631 TreeNode[] descendants = new TreeNode[production.getDefinition().getSymbolCount()]; 632 633 StateNode ancestor = statenode; 634 635 for (int j = production.getDefinition().getSymbolCount()-1; j>=0; j--) 636 { 637 descendants[j] = ancestor.treenode; 638 ancestor = ancestor.ancestor; 639 } 640 641 productionnode.descendants = descendants; 642 643 ShiftAction shiftaction = ancestor.state.getShiftAction(productionnode.symbol); 644 645 if ((automaton.getState(0)==ancestor.state) && 647 (productionnode.symbol.equals(grammar.getStartSymbol()))) 648 { 649 if ((log!=null) && (log.isDebugEnabled())) 650 log.debug("State "+state+" accept"); 651 652 StateNode newstatenode = getStateNode(next, null, ancestor); 653 654 if (newstatenode==null) 655 { 656 newstatenode = new StateNode(null, ancestor, productionnode); 657 next.push(newstatenode); 658 } 659 else 660 { 661 System.out.println("merging state node"); 662 663 ProductionNode oldproductionnode = (ProductionNode)newstatenode.treenode; 664 665 if (grammar.getPriority(oldproductionnode.production)>grammar.getPriority(production)) 666 { 667 System.out.println("priority("+production+") < priority("+ 668 oldproductionnode.production+")"); 669 newstatenode.treenode = productionnode; 670 } 671 else 672 System.out.println("priority("+production+") >= priority("+ 673 oldproductionnode.production+")"); 674 } 675 } 676 else 677 { 678 if ((log!=null) && (log.isDebugEnabled())) 679 log.debug( 680 681 " reduce "+production.getSymbol()+" ("+production+")"); 682 683 687 StateNode newstatenode = getStateNode(current, shiftaction.state, ancestor); 688 689 if (newstatenode==null) 690 { 691 newstatenode = new StateNode(shiftaction.state, ancestor, productionnode); 692 current.push(newstatenode); 693 } 694 else 695 { 696 System.out.println("merging state node"); 697 698 ProductionNode oldproductionnode = (ProductionNode)newstatenode.treenode; 699 700 if (grammar.getPriority(oldproductionnode.production)>grammar.getPriority(production)) 701 { 702 System.out.println("priority("+production+") < priority("+ 703 oldproductionnode.production+")"); 704 newstatenode.treenode = productionnode; 705 } 706 else 707 System.out.println("priority("+production+") >= priority("+ 708 oldproductionnode.production+")"); 709 } 710 } 711 712 System.out.println("Current states"); 713 714 for (int k = 0; k<current.size(); k++) 715 System.out.println(current.get(k)); 716 717 System.out.println(); 718 } 719 } 720 } 721 722 if (log.isDebugEnabled()) 723 log.debug("Parser found "+next.size()+" alternatives"); 724 725 System.out.println(); 726 727 int index = 1; 728 729 while (!next.isEmpty()) 730 { 731 StateNode state = (StateNode)next.pop(); 732 733 fireEvents(null, state.treenode); 735 index++; 736 } 737 738 if (next.size()>1) 739 log.warn("Grammar is ambig, found "+next.size()+" alternative trees"); 740 } 741 742 private StateNode getStateNode(Stack stack, State state, StateNode ancestor) 743 { 744 StateNode statenode = null; 745 746 for (int j = 0; j<stack.size(); j++) 747 { 748 statenode = (StateNode)stack.get(j); 749 750 if ((statenode.ancestor==ancestor) && (statenode.state==state)) 751 return statenode; 752 } 753 754 return null; 755 } 756 757 765 private void fireEvents(ProductionNode parent, TreeNode node) 766 throws SAXException 767 { 768 if (node instanceof ProductionNode) 769 { 770 ProductionNode production = (ProductionNode)node; 771 772 if (locatorImpl!=null) 773 { 774 locatorImpl.setLineNumber(production.linenumber); 775 locatorImpl.setColumnNumber(production.columnnumber); 776 } 777 778 if ((!flatten) || (parent==null) || (!parent.symbol.equals(production.symbol))) 779 contentHandler.startElement(NS_OUTPUT, production.symbol.getName(), 780 production.symbol.getName(), new AttributesImpl ()); 781 782 for (int i = 0; i<production.descendants.length; i++) 783 fireEvents(production, production.descendants[i]); 784 785 if ((!flatten) || (parent==null) || (!parent.symbol.equals(production.symbol))) 786 contentHandler.endElement(NS_OUTPUT, production.symbol.getName(), 787 production.symbol.getName()); 788 } 789 else 790 { 791 TokenNode token = (TokenNode)node; 792 793 if (locatorImpl!=null) 794 { 795 locatorImpl.setLineNumber(token.linenumber); 796 locatorImpl.setColumnNumber(token.columnnumber); 797 } 798 799 contentHandler.startElement(NS_OUTPUT, token.symbol.getName(), token.symbol.getName(), 800 new AttributesImpl ()); 801 contentHandler.characters(token.text.toCharArray(), 0, token.text.length()); 802 contentHandler.endElement(NS_OUTPUT, token.symbol.getName(), token.symbol.getName()); 803 } 804 } 805 806 private class StateNode 807 { 808 public StateNode(State state, StateNode ancestor, TreeNode treenode) 809 { 810 this.state = state; 811 this.treenode = treenode; 812 this.ancestor = ancestor; 813 } 814 815 public State state = null; 816 public StateNode ancestor = null; 817 public TreeNode treenode = null; 818 819 public String toString() 820 { 821 StringBuffer buffer = new StringBuffer (); 822 823 if (ancestor!=null) 824 { 825 buffer.append(ancestor.toString()); 826 buffer.append(" <- "); 827 } 828 829 buffer.append("<"); 830 buffer.append(automaton.indexOf(state)); 831 832 835 buffer.append(">"); 836 837 return buffer.toString(); 838 } 839 } 840 841 private abstract class TreeNode 842 { 843 public Symbol symbol = null; 844 public int linenumber = 1; 845 public int columnnumber = 1; 846 } 847 848 private class TokenNode extends TreeNode 849 { 850 public TokenNode(Terminal symbol, String text) 851 { 852 this.symbol = symbol; 853 this.text = text; 854 } 855 856 public String text = null; 857 858 public String toString() 859 { 860 StringBuffer buffer = new StringBuffer (); 861 862 buffer.append("{"); 863 buffer.append(symbol); 864 buffer.append(":"); 865 buffer.append(text); 866 buffer.append("}"); 867 868 return buffer.toString(); 869 } 870 } 871 872 private class ProductionNode extends TreeNode 873 { 874 878 public ProductionNode(Production production) 879 { 880 this.production = production; 881 this.symbol = production.getSymbol(); 882 } 883 884 public Production production = null; 885 public TreeNode[] descendants = null; 886 887 public String toString() 888 { 889 StringBuffer buffer = new StringBuffer (); 890 891 buffer.append("{"); 892 buffer.append(symbol); 893 buffer.append(":"); 894 895 for (int i = 0; i<descendants.length; i++) 896 buffer.append(descendants[i].toString()); 897 898 buffer.append("}"); 899 900 return buffer.toString(); 901 } 902 } 903 } 904 | Popular Tags |