1 8 9 package net.sourceforge.chaperon.process; 10 11 import net.sourceforge.chaperon.common.Decoder; 12 import net.sourceforge.chaperon.common.IntegerList; 13 14 import org.apache.commons.logging.Log; 15 16 import org.xml.sax.Attributes ; 17 import org.xml.sax.ContentHandler ; 18 import org.xml.sax.Locator ; 19 import org.xml.sax.SAXException ; 20 import org.xml.sax.ext.LexicalHandler ; 21 import org.xml.sax.helpers.AttributesImpl ; 22 import org.xml.sax.helpers.LocatorImpl ; 23 24 import java.util.Stack ; 25 26 32 public class ParserProcessor implements ContentHandler , LexicalHandler 33 { 34 35 public static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/1.0"; 36 public static final String OUTPUT = "output"; 37 public static final String ERROR = "error"; 38 private boolean flatten = false; 39 private boolean localizable = false; 40 private String source; 41 private int lineNumber; 42 private int columnNumber; 43 private ContentHandler contentHandler = null; 44 private LexicalHandler lexicalHandler = null; 45 private Locator locator = null; 46 private LocatorImpl locatorImpl = null; 47 private static final int STATE_OUTSIDE = 0; 48 private static final int STATE_INSIDE = 1; 49 private static final int STATE_LEXEME = 2; 50 private static final int STATE_GROUP = 3; 51 private static final int STATE_ERROR = 4; 52 private int state = STATE_OUTSIDE; 53 private ParserAutomaton automaton; 54 private IntegerList statestack = new IntegerList(); 55 private Stack treestack = new Stack (); 56 private Log log; 57 private StringBuffer lineSnippet = new StringBuffer (); 58 private boolean unrecoverable = false; 59 private ParseException exception = null; 60 61 64 public ParserProcessor() {} 65 66 73 public ParserProcessor(ParserAutomaton automaton, Log log) 74 { 75 this.automaton = automaton; 76 this.log = log; 77 } 78 79 84 public void setParserAutomaton(ParserAutomaton automaton) 85 { 86 this.automaton = automaton; 87 } 88 89 92 public void setContentHandler(ContentHandler handler) 93 { 94 this.contentHandler = handler; 95 } 96 97 100 public void setLexicalHandler(LexicalHandler handler) 101 { 102 this.lexicalHandler = handler; 103 } 104 105 110 public void setLog(Log log) 111 { 112 this.log = log; 113 } 114 115 121 public void setLocalizable(boolean localizable) 122 { 123 this.localizable = localizable; 124 } 125 126 132 public void setFlatten(boolean flatten) 133 { 134 this.flatten = flatten; 135 } 136 137 140 public void setDocumentLocator(Locator locator) 141 { 142 this.locator = locator; 143 this.locatorImpl = null; 144 if (locator!=null) 145 { 146 this.locatorImpl = new LocatorImpl (locator); 147 contentHandler.setDocumentLocator(locatorImpl); 148 } 149 } 150 151 154 public void startDocument() throws SAXException 155 { 156 contentHandler.startDocument(); 157 state = STATE_OUTSIDE; 158 } 159 160 163 public void endDocument() throws SAXException 164 { 165 if (state==STATE_OUTSIDE) 166 contentHandler.endDocument(); 167 } 168 169 172 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 173 throws SAXException 174 { 175 if (state==STATE_OUTSIDE) 176 { 177 if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) && 178 (localName.equals(LexicalProcessor.OUTPUT))) 179 { 180 handleStartDocument(); 181 182 state = STATE_INSIDE; 183 184 if (atts.getValue("source")!=null) 185 source = atts.getValue("source"); 186 else if (locator!=null) 187 source = locator.getSystemId(); 188 else 189 source = "unknown"; 190 } 191 else 192 contentHandler.startElement(namespaceURI, localName, qName, atts); 193 } 194 else if (state==STATE_INSIDE) 195 { 196 if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) && 197 (localName.equals(LexicalProcessor.LEXEME))) 198 { 199 if (atts.getValue("column")!=null) 200 columnNumber = Integer.parseInt(atts.getValue("column")); 201 else if (locator!=null) 202 columnNumber = locator.getColumnNumber(); 203 else 204 columnNumber = 1; 205 206 if (atts.getValue("line")!=null) 207 lineNumber = Integer.parseInt(atts.getValue("line")); 208 else if (locator!=null) 209 lineNumber = locator.getLineNumber(); 210 else 211 lineNumber = 1; 212 213 handleLexeme(atts.getValue("symbol"), atts.getValue("text")); 214 state = STATE_LEXEME; 215 } 216 else if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) && 217 (localName.equals(ERROR))) 218 { 219 if (atts.getValue("column")!=null) 220 columnNumber = Integer.parseInt(atts.getValue("column")); 221 else if (locator!=null) 222 columnNumber = locator.getColumnNumber(); 223 else 224 columnNumber = 1; 225 226 if (atts.getValue("line")!=null) 227 lineNumber = Integer.parseInt(atts.getValue("line")); 228 else if (locator!=null) 229 lineNumber = locator.getLineNumber(); 230 else 231 lineNumber = 1; 232 233 handleLexeme("error", atts.getValue("text")); 234 state = STATE_ERROR; 235 } 236 else 237 throw new SAXException ("Unexpected start element."); 238 } 239 else if (state==STATE_LEXEME) 240 { 241 if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) && 242 (localName.equals(LexicalProcessor.GROUP))) 243 state = STATE_GROUP; 244 else 245 throw new SAXException ("Unexpected start element."); 246 } 247 else if ((state==STATE_ERROR) || (state==STATE_GROUP)) 248 throw new SAXException ("Unexpected start element."); 249 } 250 251 254 public void endElement(String namespaceURI, String localName, String qName) 255 throws SAXException 256 { 257 if (state==STATE_OUTSIDE) 258 contentHandler.endElement(namespaceURI, localName, qName); 259 else if (state==STATE_INSIDE) 260 { 261 if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) && 262 (localName.equals(LexicalProcessor.OUTPUT))) 263 { 264 contentHandler.startPrefixMapping("", NS_OUTPUT); 265 contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl ()); 266 267 handleEndDocument(); 268 269 contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT); 270 contentHandler.endPrefixMapping(""); 271 272 state = STATE_OUTSIDE; 273 } 274 else 275 throw new SAXException ("Unexpected end element."); 276 } 277 else if ((state==STATE_LEXEME) || (state==STATE_ERROR)) 278 state = STATE_INSIDE; 279 else if (state==STATE_GROUP) 280 state = STATE_LEXEME; 281 } 282 283 286 public void characters(char[] ch, int start, int length) 287 throws SAXException 288 { 289 if (state==STATE_OUTSIDE) 290 contentHandler.characters(ch, start, length); 291 } 292 293 296 public void ignorableWhitespace(char[] ch, int start, int length) 297 throws SAXException 298 { 299 if (state==STATE_OUTSIDE) 300 contentHandler.ignorableWhitespace(ch, start, length); 301 } 302 303 306 public void startPrefixMapping(String prefix, String uri) 307 throws SAXException 308 { 309 contentHandler.startPrefixMapping(prefix, uri); 310 } 311 312 315 public void endPrefixMapping(String prefix) throws SAXException 316 { 317 contentHandler.endPrefixMapping(prefix); 318 } 319 320 323 public void processingInstruction(String target, String data) 324 throws SAXException 325 { 326 if (locatorImpl!=null) 327 { 328 locatorImpl.setLineNumber(locator.getLineNumber()); 329 locatorImpl.setColumnNumber(locator.getColumnNumber()); 330 } 331 332 if (state==STATE_OUTSIDE) 333 contentHandler.processingInstruction(target, data); 334 } 335 336 339 public void skippedEntity(String name) throws SAXException 340 { 341 if (locatorImpl!=null) 342 { 343 locatorImpl.setLineNumber(locator.getLineNumber()); 344 locatorImpl.setColumnNumber(locator.getColumnNumber()); 345 } 346 347 if (state==STATE_OUTSIDE) 348 contentHandler.skippedEntity(name); 349 } 350 351 354 public void startDTD(String name, String publicId, String systemId) 355 throws SAXException 356 { 357 if (lexicalHandler!=null) 358 lexicalHandler.startDTD(name, publicId, systemId); 359 } 360 361 364 public void endDTD() throws SAXException 365 { 366 if (lexicalHandler!=null) 367 lexicalHandler.endDTD(); 368 } 369 370 373 public void startEntity(String name) throws SAXException 374 { 375 if (lexicalHandler!=null) 376 lexicalHandler.startEntity(name); 377 } 378 379 382 public void endEntity(String name) throws SAXException 383 { 384 if (lexicalHandler!=null) 385 lexicalHandler.endEntity(name); 386 } 387 388 391 public void startCDATA() throws SAXException 392 { 393 if (lexicalHandler!=null) 394 lexicalHandler.startCDATA(); 395 } 396 397 400 public void endCDATA() throws SAXException 401 { 402 if (lexicalHandler!=null) 403 lexicalHandler.endCDATA(); 404 } 405 406 409 public void comment(char[] ch, int start, int len) throws SAXException 410 { 411 if (lexicalHandler!=null) 412 lexicalHandler.comment(ch, start, len); 413 } 414 415 private String getLocation() 416 { 417 if (locator==null) 418 return "unknown"; 419 420 return locator.getSystemId()+":"+locator.getLineNumber()+":"+locator.getColumnNumber(); 421 } 422 423 428 private void handleStartDocument() 429 { 430 statestack.clear(); 431 statestack.push(0); 433 treestack.clear(); 434 435 lineSnippet = new StringBuffer (); 436 unrecoverable = false; 437 exception = null; 438 } 439 440 448 private void handleLexeme(String symbolname, String text) 449 throws SAXException 450 { 451 if (unrecoverable) 452 return; 453 454 int symbol = -1; 456 for (int i = 0; (i<automaton.getTerminalCount()) && (symbol==-1); i++) 457 if (automaton.getTerminal(i).equals(symbolname)) 458 symbol = i; 459 460 int state = statestack.peek(); 461 462 if (symbol==-1) 463 { 464 for (int i = 0; (i<automaton.getTerminalCount()) && (symbol==-1); i++) 465 if (automaton.getTerminal(i).equals("error")) 466 symbol = i; 467 468 if (symbol==-1) 469 { 470 if ((log!=null) && (log.isDebugEnabled())) 471 log.debug("State "+state+" unexpected token "+Decoder.toString(text)+"("+symbolname+")"); 472 473 StringBuffer message = new StringBuffer (); 474 475 message.append("Unexpected token "); 476 message.append(symbolname); 477 message.append("[\""); 478 message.append(text); 479 message.append("\"], expected tokens: "); 480 for (symbol = 0; symbol<automaton.getTerminalCount(); symbol++) 481 if (!automaton.isErrorAction(state, symbol)) 482 { 483 if (symbol>0) 484 message.append(", "); 485 486 message.append(automaton.getTerminal(symbol)); 487 } 488 489 unrecoverable = true; 490 exception = 491 new ParseException(message.toString(), symbolname, text, lineSnippet.toString(), source, 492 lineNumber, columnNumber); 493 494 return; 495 } 496 else 497 symbolname = "error"; 498 } 499 500 501 while (automaton.isReduceAction(state, symbol)) 502 { 503 int production = automaton.getReduceProduction(state, symbol); 504 505 if ((log!=null) && (log.isDebugEnabled())) 506 log.debug("State "+state+" reduce "+ 507 automaton.getNonterminal(automaton.getProductionSymbol(production))+" ("+ 508 production+")"); 509 510 ProductionNode productionnode = 511 new ProductionNode(automaton.getNonterminal(automaton.getProductionSymbol(production))); 512 513 TreeNode node = null; 514 for (int i = 0; i<automaton.getProductionLength(production); i++) 515 { 516 statestack.pop(); 517 productionnode.insert((node = (TreeNode)treestack.pop())); 518 } 519 520 if (node!=null) 521 { 522 productionnode.linenumber = node.linenumber; 523 productionnode.columnnumber = node.columnnumber; 524 } 525 526 treestack.push(productionnode); 527 528 statestack.push(automaton.getTransition(statestack.peek(), 529 automaton.getProductionSymbol(production))); 530 531 state = statestack.peek(); 532 } 533 534 535 if (automaton.isErrorAction(state, symbol)) 536 { 537 if ((log!=null) && (log.isDebugEnabled())) 538 log.debug("State "+state+" error token "+Decoder.toString(text)+"("+symbolname+")"); 539 540 StringBuffer errortext = new StringBuffer (); 541 542 while ((statestack.getCount()>1) && 544 (((automaton.isErrorAction(state, symbol)) && 545 (automaton.getErrorTransition(state, symbol)==0)) && 546 (!automaton.isShiftAction(state, symbol)))) 547 { 548 statestack.pop(); 549 550 TreeNode node = (TreeNode)treestack.pop(); 551 errortext.insert(0, node.getText()); 552 state = statestack.peek(); 553 } 554 555 if (((!symbolname.equals("error")) && (!automaton.isErrorAction(state, symbol))) || 556 ((symbolname.equals("error")) && (!automaton.isShiftAction(state, symbol)))) 557 throw new SAXException ("Couldn't accept input "+symbolname+"["+Decoder.toString(text)+ 558 "] at "+getLocation()); 559 560 if (automaton.isErrorAction(state, symbol)) 561 statestack.push(automaton.getErrorTransition(state, symbol)); 562 else 563 statestack.push(automaton.getShiftTransition(state, symbol)); 564 565 state = statestack.peek(); 566 567 if (automaton.isErrorAction(state, symbol)) 568 errortext.append(text); 569 570 TokenNode tokennode = new TokenNode("error", errortext.toString()); 572 573 if (locator!=null) 574 { 575 tokennode.linenumber = lineNumber; 576 tokennode.columnnumber = columnNumber; 577 } 578 579 treestack.push(tokennode); 580 581 if (automaton.isErrorAction(state, symbol)) 582 return; 583 } 584 585 586 while (automaton.isReduceAction(state, symbol)) 587 { 588 int production = automaton.getReduceProduction(state, symbol); 589 590 if ((log!=null) && (log.isDebugEnabled())) 591 log.debug("State "+state+" reduce "+ 592 automaton.getNonterminal(automaton.getProductionSymbol(production))+" ("+ 593 production+")"); 594 595 ProductionNode productionnode = 596 new ProductionNode(automaton.getNonterminal(automaton.getProductionSymbol(production))); 597 598 TreeNode node = null; 599 for (int i = 0; i<automaton.getProductionLength(production); i++) 600 { 601 statestack.pop(); 602 productionnode.insert((node = (TreeNode)treestack.pop())); 603 } 604 605 if (node!=null) 606 { 607 productionnode.linenumber = node.linenumber; 608 productionnode.columnnumber = node.columnnumber; 609 } 610 611 treestack.push(productionnode); 612 613 statestack.push(automaton.getTransition(statestack.peek(), 614 automaton.getProductionSymbol(production))); 615 616 state = statestack.peek(); 617 } 618 619 620 if (automaton.isShiftAction(state, symbol)) 621 { 622 if ((log!=null) && (log.isDebugEnabled())) 623 log.debug("State "+state+" shift token "+symbolname+" ("+symbol+")"); 624 625 statestack.push(automaton.getShiftTransition(state, symbol)); 626 627 TokenNode tokennode = new TokenNode(symbolname, text); 628 629 if (locator!=null) 630 { 631 tokennode.linenumber = lineNumber; 632 tokennode.columnnumber = columnNumber; 633 } 634 635 treestack.push(tokennode); 636 637 if ((text.lastIndexOf("\n")>=0) || (text.lastIndexOf("\r")>=0)) 638 { 639 lineSnippet = new StringBuffer (); 640 lineSnippet.append(text.substring(Math.max(text.lastIndexOf("\n"), text.lastIndexOf("\r")))); 641 } 642 else 643 lineSnippet.append(text); 644 } 645 } 646 647 653 private void handleEndDocument() throws SAXException 654 { 655 if (unrecoverable) 656 { 657 fireException(); 658 return; 659 } 660 661 int state = statestack.peek(); 662 663 664 if (automaton.isErrorAction(state)) 665 { 666 if ((log!=null) && (log.isDebugEnabled())) 667 log.debug("State "+state+" error unexpected end of file"); 668 669 StringBuffer errortext = new StringBuffer (); 670 671 while ((statestack.getCount()>1) && (automaton.getErrorTransition(state)==0)) 673 { 674 statestack.pop(); 675 676 TreeNode node = (TreeNode)treestack.pop(); 677 errortext.insert(0, node.getText()); 678 state = statestack.peek(); 679 } 680 681 statestack.push(automaton.getErrorTransition(state)); 683 684 state = statestack.peek(); 685 686 if (automaton.isErrorAction(state)) 688 throw new SAXException ("Couldn't accept end of document at "+getLocation()); 689 690 TokenNode tokennode = new TokenNode("error", errortext.toString()); 691 692 if (locator!=null) 693 { 694 tokennode.linenumber = lineNumber; 695 tokennode.columnnumber = columnNumber; 696 } 697 698 treestack.push(tokennode); 699 } 700 701 702 while (automaton.isReduceAction(state) || automaton.isAcceptAction(state)) 703 { 704 int production = automaton.getReduceProduction(state); 705 706 if ((log!=null) && (log.isDebugEnabled())) 707 log.debug("State "+state+" reduce "+ 708 automaton.getNonterminal(automaton.getProductionSymbol(production))+" ("+ 709 production+")"); 710 711 ProductionNode productionnode = 712 new ProductionNode(automaton.getNonterminal(automaton.getProductionSymbol(production))); 713 714 TreeNode node = null; 715 for (int i = 0; i<automaton.getProductionLength(production); i++) 716 { 717 statestack.pop(); 718 productionnode.insert((node = (TreeNode)treestack.pop())); 719 } 720 721 if (node!=null) 722 { 723 productionnode.linenumber = node.linenumber; 724 productionnode.columnnumber = node.columnnumber; 725 } 726 727 treestack.push(productionnode); 728 729 730 if ((automaton.isAcceptAction(state)) && (statestack.getCount()==1)) 731 { 732 if ((log!=null) && (log.isDebugEnabled())) 733 log.debug("State "+state+" accept"); 734 735 if (locatorImpl!=null) 736 locatorImpl.setSystemId(source); 737 738 fireEvents(productionnode); 739 return; 740 } 741 else 742 statestack.push(automaton.getTransition(statestack.peek(), 743 automaton.getProductionSymbol(production))); 744 745 state = statestack.peek(); 746 } 747 748 if ((automaton.isErrorAction(state)) && (statestack.getCount()>1)) 749 { 750 if ((log!=null) && (log.isDebugEnabled())) 751 log.debug("State "+state+" error unexpected end of file"); 752 753 StringBuffer message = new StringBuffer (); 754 755 message.append("Unexpected end of file, expected tokens: "); 756 for (int i = 0; i<automaton.getTerminalCount(); i++) 757 if (!automaton.isErrorAction(state, i)) 758 { 759 message.append(automaton.getTerminal(i)); 760 message.append(" "); 761 } 762 763 exception = 764 new ParseException(message.toString(), "", "", lineSnippet.toString(), source, lineNumber, 765 columnNumber); 766 767 fireException(); 768 } 769 } 770 771 776 private void fireEvents(TreeNode node) throws SAXException 777 { 778 Stack stack = new Stack (); 779 780 ProductionNode previous = null; 781 TreeNode next = node; 782 do 783 { 784 while (next!=null) 785 { 786 stack.push(next); 787 788 if (locatorImpl!=null) 789 { 790 locatorImpl.setLineNumber(next.linenumber); 791 locatorImpl.setColumnNumber(next.columnnumber); 792 } 793 794 if ((!flatten) || (previous==null) || (!previous.symbol.equals(next.symbol))) 795 { 796 AttributesImpl atts = new AttributesImpl (); 797 if (localizable) 798 { 799 atts.addAttribute("", "line", "line", "CDATA", String.valueOf(next.linenumber)); 800 atts.addAttribute("", "column", "column", "CDATA", String.valueOf(next.columnnumber)); 801 } 802 803 contentHandler.startElement(NS_OUTPUT, next.symbol, next.symbol, atts); 804 } 805 806 if (next instanceof ProductionNode) 807 { 808 ProductionNode production = (ProductionNode)next; 809 previous = production; 810 next = production.firstchild; 811 } 812 else 813 { 814 TokenNode token = (TokenNode)next; 815 contentHandler.characters(token.text.toCharArray(), 0, token.text.length()); 816 next = null; 817 } 818 } 819 820 next = (TreeNode)stack.pop(); 821 previous = stack.isEmpty() ? null : (ProductionNode)stack.peek(); 822 823 if (locatorImpl!=null) 824 { 825 locatorImpl.setLineNumber(next.linenumber); 826 locatorImpl.setColumnNumber(next.columnnumber); 827 } 828 829 if ((!flatten) || (previous==null) || (!previous.symbol.equals(next.symbol))) 830 contentHandler.endElement(NS_OUTPUT, next.symbol, next.symbol); 831 832 next = next.nextsibling; 833 } 834 while (!stack.isEmpty()); 835 } 836 837 private void fireException() throws SAXException 838 { 839 AttributesImpl atts = new AttributesImpl (); 840 atts.addAttribute("", "symbol", "symbol", "CDATA", exception.getSymbol()); 841 atts.addAttribute("", "text", "text", "CDATA", exception.getText()); 842 atts.addAttribute("", "line-snippet", "line-snippet", "CDATA", exception.getLineSnippet()); 843 atts.addAttribute("", "localized", "localized", "CDATA", String.valueOf(exception.isLocalized())); 844 atts.addAttribute("", "line-number", "line-number", "CDATA", 845 String.valueOf(exception.getLineNumber())); 846 atts.addAttribute("", "column-number", "column-number", "CDATA", 847 String.valueOf(exception.getColumnNumber())); 848 contentHandler.startElement(NS_OUTPUT, "exception", "exception", atts); 849 contentHandler.endElement(NS_OUTPUT, "exception", "exception"); 850 } 851 852 private abstract class TreeNode 853 { 854 public String symbol = null; 855 public int linenumber = 1; 856 public int columnnumber = 1; 857 public TreeNode previoussibling = null; 858 public TreeNode nextsibling = null; 859 860 public abstract String getText(); 861 862 public String toString() 863 { 864 return symbol; 865 } 866 } 867 868 private class TokenNode extends TreeNode 869 { 870 public TokenNode(String symbol, String text) 871 { 872 this.symbol = symbol; 873 this.text = text; 874 } 875 876 public String text = null; 877 878 public String getText() 879 { 880 return text+((nextsibling!=null) ? nextsibling.getText() : ""); 881 } 882 } 883 884 private class ProductionNode extends TreeNode 885 { 886 public ProductionNode(String symbol) 887 { 888 this.symbol = symbol; 889 } 890 891 public TreeNode firstchild = null; 892 public TreeNode lastchild = null; 893 894 public void insert(TreeNode node) 895 { 896 if (firstchild==null) 897 { 898 firstchild = node; 899 lastchild = node; 900 } 901 else 902 { 903 firstchild.previoussibling = node; 904 node.nextsibling = firstchild; 905 firstchild = node; 906 } 907 } 908 909 public void insertChilds(ProductionNode production) 910 { 911 if (firstchild==null) 912 { 913 firstchild = production.firstchild; 914 lastchild = production.lastchild; 915 } 916 else 917 { 918 firstchild.previoussibling = production.lastchild; 919 production.lastchild.nextsibling = firstchild; 920 firstchild = production.firstchild; 921 } 922 } 923 924 public String getText() 925 { 926 return ((firstchild!=null) ? firstchild.getText() : "")+ 927 ((nextsibling!=null) ? nextsibling.getText() : ""); 928 } 929 } 930 } 931 | Popular Tags |