1 8 9 package net.sourceforge.chaperon.process.extended; 10 11 import net.sourceforge.chaperon.common.Decoder; 12 import net.sourceforge.chaperon.model.Violations; 13 import net.sourceforge.chaperon.model.extended.ExtendedGrammar; 14 import net.sourceforge.chaperon.model.extended.Pattern; 15 import net.sourceforge.chaperon.model.extended.PatternIterator; 16 17 import org.apache.commons.logging.Log; 18 19 import org.xml.sax.Attributes ; 20 import org.xml.sax.ContentHandler ; 21 import org.xml.sax.Locator ; 22 import org.xml.sax.SAXException ; 23 import org.xml.sax.ext.LexicalHandler ; 24 import org.xml.sax.helpers.AttributesImpl ; 25 import org.xml.sax.helpers.LocatorImpl ; 26 27 import java.util.Stack ; 28 29 35 public class ExtendedDirectParserProcessor implements ContentHandler , LexicalHandler 36 { 37 private static final String NS = "http://chaperon.sourceforge.net/schema/text/1.0"; 38 private static final String TEXT = "text"; 39 40 41 public static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/2.0"; 42 private static final String OUTPUT = "output"; 43 44 private ContentHandler contentHandler = null; 46 private LexicalHandler lexicalHandler = null; 47 private Locator locator = null; 48 private LocatorImpl locatorImpl = null; 49 50 private static final int STATE_OUTER = 0; 52 private static final int STATE_INNER = 1; 53 private int state = STATE_OUTER; 54 55 private ExtendedGrammar grammar; 57 private boolean flatten = false; 58 private StackNodeSet current = new StackNodeSet(); 59 private StackNodeSet next = new StackNodeSet(); 60 private Log log; 61 private StackNodeList root; 62 private int line = 1; 63 private int column = 1; 64 private static final int MAXWATCHDOG = 1000; 65 66 69 public ExtendedDirectParserProcessor() {} 70 71 78 public ExtendedDirectParserProcessor(ExtendedGrammar grammar, Log log) 79 { 80 setExtendedGrammar(grammar); 81 this.log = log; 82 } 83 84 89 public void setExtendedGrammar(ExtendedGrammar grammar) 90 { 91 this.grammar = grammar; 92 93 Violations violations = grammar.validate(); 94 95 if ((violations!=null) && (violations.getViolationCount()>0)) 96 throw new IllegalArgumentException ("Grammar is not valid: "+violations.getViolation(0)); 97 98 if ((log!=null) && (log.isDebugEnabled())) 99 log.debug("grammar:\n"+grammar); 100 101 grammar.update(); 102 103 if ((log!=null) && (log.isDebugEnabled())) 104 { 105 StringBuffer buffer = new StringBuffer (); 106 buffer.append("Successors:\n"); 107 for(PatternIterator i=grammar.getAllPattern().getPattern(); i.hasNext();) 108 { 109 Pattern pattern = i.next(); 110 if (pattern.getSuccessors().hasNext()) 111 { 112 buffer.append(pattern+"->{"); 113 for(PatternIterator j=pattern.getSuccessors(); j.hasNext();) 114 { 115 buffer.append(j.next()); 116 if (j.hasNext()) 117 buffer.append(","); 118 } 119 buffer.append("}\n"); 120 } 121 } 122 123 buffer.append("\nAscending successors:\n"); 124 for(PatternIterator i=grammar.getAllPattern().getPattern(); i.hasNext();) 125 { 126 Pattern pattern = i.next(); 127 if (pattern.getAscendingSuccessors().hasNext()) 128 { 129 buffer.append(pattern+"->{"); 130 for(PatternIterator j=pattern.getAscendingSuccessors(); j.hasNext();) 131 { 132 buffer.append(j.next()); 133 if (j.hasNext()) 134 buffer.append(","); 135 } 136 buffer.append("}\n"); 137 } 138 } 139 140 buffer.append("\nDescending successors:\n"); 141 for(PatternIterator i=grammar.getAllPattern().getPattern(); i.hasNext();) 142 { 143 Pattern pattern = i.next(); 144 if (pattern.getDescendingSuccessors().hasNext()) 145 { 146 buffer.append(pattern+"->{"); 147 for(PatternIterator j=pattern.getDescendingSuccessors(); j.hasNext();) 148 { 149 buffer.append(j.next()); 150 if (j.hasNext()) 151 buffer.append(","); 152 } 153 buffer.append("}\n"); 154 } 155 } 156 log.debug(buffer.toString()); 157 } 158 } 159 160 163 public void setContentHandler(ContentHandler handler) 164 { 165 this.contentHandler = handler; 166 } 167 168 171 public void setLexicalHandler(LexicalHandler handler) 172 { 173 this.lexicalHandler = handler; 174 } 175 176 181 public void setLog(Log log) 182 { 183 this.log = log; 184 } 185 186 192 public void setFlatten(boolean flatten) 193 { 194 this.flatten = flatten; 195 } 196 197 200 public void setDocumentLocator(Locator locator) 201 { 202 this.locator = locator; 203 204 if (locator!=null) 205 { 206 this.locatorImpl = new LocatorImpl (locator); 207 contentHandler.setDocumentLocator(locatorImpl); 208 } 209 } 210 211 214 public void startDocument() throws SAXException 215 { 216 locatorImpl.setLineNumber(locator.getLineNumber()); 217 locatorImpl.setColumnNumber(locator.getColumnNumber()); 218 contentHandler.startDocument(); 219 state = STATE_OUTER; 220 } 221 222 225 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 226 throws SAXException 227 { 228 locatorImpl.setLineNumber(locator.getLineNumber()); 229 locatorImpl.setColumnNumber(locator.getColumnNumber()); 230 231 if (state==STATE_INNER) 232 throw new SAXException ("Unexpected element "+qName); 233 234 if (state==STATE_OUTER) 235 { 236 if ((namespaceURI!=null) && (namespaceURI.equals(NS))) 237 { 238 if (!localName.equals(TEXT)) 239 throw new SAXException ("Unknown element "+qName); 240 } 241 else 242 { 243 contentHandler.startElement(namespaceURI, localName, qName, atts); 244 return; 245 } 246 } 247 248 state = STATE_INNER; 249 250 current.clear(); 252 current.push(new TerminalStackNode(null, 0, grammar.getStartPattern(), null)); 253 next.clear(); 254 line = 1; 255 column = 1; 256 } 257 258 261 public void characters(char[] text, int textstart, int textlength) 262 throws SAXException 263 { 264 locatorImpl.setLineNumber(locator.getLineNumber()); 265 locatorImpl.setColumnNumber(locator.getColumnNumber()); 266 267 if (state==STATE_OUTER) 268 { 269 contentHandler.characters(text, textstart, textlength); 270 271 return; 272 } 273 274 for (int position = textstart; position<(textstart+textlength); position++) 275 { 276 if ((log!=null) && (log.isDebugEnabled())) 277 log.debug("===================================\nProcess "+Decoder.toChar(text[position])); 278 279 if (current.isEmpty()) 280 throw new IllegalStateException ("Parsing process is aborted"); 281 282 if ((log!=null) && (log.isDebugEnabled())) 283 log.debug(getStatesAsString()); 284 285 while (!current.isEmpty()) 286 { 287 StackNode node = current.pop(); 288 289 for (PatternIterator nextPattern = node.pattern.getDescendingSuccessors(); 290 nextPattern.hasNext();) 291 if (nextPattern.next().contains(text[position])) 292 { 293 reduce(node.pattern.getDefinition().getSymbol(), node, null); 294 break; 295 } 296 297 reduceEmpty(node); 298 299 shift(node, text, position); 300 301 if ((current.watchdog>MAXWATCHDOG) || (next.watchdog>MAXWATCHDOG)) 302 { 303 if ((log!=null) && (log.isInfoEnabled())) 304 log.info(getStatesAsString()); 305 throw new IllegalStateException ("Aborted parsing because of a high ambiguous grammar"+ 306 " ["+line+":"+column+"]"); 307 } 308 } 309 310 if ((log!=null) && (log.isDebugEnabled())) 311 log.debug(getStatesAsString()); 312 313 if (next.isEmpty()) 314 { 315 if ((log!=null) && (log.isInfoEnabled())) 316 log.info(getStatesAsString()); 317 throw new IllegalArgumentException ("Character '"+text[position]+"' is not expected"+" ["+ 318 line+":"+column+"]"); 319 } 320 321 swapStacks(); 322 323 increasePosition(text, position, position+1); 324 } 325 } 326 327 330 public void endElement(String namespaceURI, String localName, String qName) 331 throws SAXException 332 { 333 locatorImpl.setLineNumber(locator.getLineNumber()); 334 locatorImpl.setColumnNumber(locator.getColumnNumber()); 335 336 if (state==STATE_OUTER) 337 { 338 contentHandler.endElement(namespaceURI, localName, qName); 339 return; 340 } 341 342 if (state==STATE_INNER) 343 { 344 if ((namespaceURI!=null) && (namespaceURI.equals(NS))) 345 { 346 if (!localName.equals(TEXT)) 347 throw new SAXException ("Unknown element "+qName); 348 } 349 else 350 throw new SAXException ("Unexpected element "+qName); 351 } 352 353 state = STATE_OUTER; 354 355 if ((log!=null) && (log.isDebugEnabled())) 357 log.debug("===================================\nProcess end of text"); 358 359 root = null; 360 361 Pattern eot = grammar.getEndPattern(); 362 363 while (!current.isEmpty()) 364 { 365 StackNode node = current.pop(); 366 367 for (PatternIterator nextPattern = node.pattern.getDescendingSuccessors(); 368 nextPattern.hasNext();) 369 if (nextPattern.next()==eot) 370 { 371 reduce(node.pattern.getDefinition().getSymbol(), node, null); 372 break; 373 } 374 375 reduceEmpty(node); 376 377 if ((current.watchdog>MAXWATCHDOG) || (next.watchdog>MAXWATCHDOG)) 378 { 379 if ((log!=null) && (log.isInfoEnabled())) 380 log.info(getStatesAsString()); 381 throw new IllegalStateException ("Aborted parsing because of a high ambiguous grammar"+" ["+ 382 line+":"+column+"]"); 383 } 384 } 385 386 if ((log!=null) && (log.isDebugEnabled())) 387 log.debug(getStatesAsString()); 388 389 if (root==null) 390 { 391 if ((log!=null) && (log.isInfoEnabled())) 392 log.info(getStatesAsString()); 393 throw new IllegalStateException ("Unexpected end of text"+" ["+line+":"+column+"]"); 394 } 395 396 fireEvents(); 397 } 398 399 402 public void ignorableWhitespace(char[] ch, int start, int length) 403 throws SAXException 404 { 405 locatorImpl.setLineNumber(locator.getLineNumber()); 406 locatorImpl.setColumnNumber(locator.getColumnNumber()); 407 408 if (state==STATE_OUTER) 409 contentHandler.ignorableWhitespace(ch, start, length); 410 } 411 412 415 public void startPrefixMapping(String prefix, String uri) 416 throws SAXException 417 { 418 locatorImpl.setLineNumber(locator.getLineNumber()); 419 locatorImpl.setColumnNumber(locator.getColumnNumber()); 420 421 contentHandler.startPrefixMapping(prefix, uri); 422 } 423 424 427 public void endPrefixMapping(String prefix) throws SAXException 428 { 429 locatorImpl.setLineNumber(locator.getLineNumber()); 430 locatorImpl.setColumnNumber(locator.getColumnNumber()); 431 432 contentHandler.endPrefixMapping(prefix); 433 } 434 435 438 public void processingInstruction(String target, String data) 439 throws SAXException 440 { 441 locatorImpl.setLineNumber(locator.getLineNumber()); 442 locatorImpl.setColumnNumber(locator.getColumnNumber()); 443 444 if (state==STATE_OUTER) 445 contentHandler.processingInstruction(target, data); 446 } 447 448 451 public void skippedEntity(String name) throws SAXException 452 { 453 locatorImpl.setLineNumber(locator.getLineNumber()); 454 locatorImpl.setColumnNumber(locator.getColumnNumber()); 455 456 if (state==STATE_OUTER) 457 contentHandler.skippedEntity(name); 458 } 459 460 463 public void endDocument() throws SAXException 464 { 465 locatorImpl.setLineNumber(locator.getLineNumber()); 466 locatorImpl.setColumnNumber(locator.getColumnNumber()); 467 468 contentHandler.endDocument(); 469 } 470 471 474 public void startDTD(String name, String publicId, String systemId) 475 throws SAXException 476 { 477 lexicalHandler.startDTD(name, publicId, systemId); 478 } 479 480 483 public void endDTD() throws SAXException 484 { 485 lexicalHandler.endDTD(); 486 } 487 488 491 public void startEntity(String name) throws SAXException 492 { 493 if (lexicalHandler!=null) 494 lexicalHandler.startEntity(name); 495 } 496 497 500 public void endEntity(String name) throws SAXException 501 { 502 if (lexicalHandler!=null) 503 lexicalHandler.endEntity(name); 504 } 505 506 509 public void startCDATA() throws SAXException 510 { 511 if (lexicalHandler!=null) 512 lexicalHandler.startCDATA(); 513 } 514 515 518 public void endCDATA() throws SAXException 519 { 520 if (lexicalHandler!=null) 521 lexicalHandler.endCDATA(); 522 } 523 524 527 public void comment(char[] ch, int start, int len) throws SAXException 528 { 529 if (lexicalHandler!=null) 530 lexicalHandler.comment(ch, start, len); 531 } 532 533 private String getStatesAsString() 534 { 535 StringBuffer buffer = new StringBuffer (); 536 buffer.append("current:\n"); 537 buffer.append(current); 538 buffer.append(current.dump()); 539 buffer.append("Count of states:"); 540 buffer.append(current.size()); 541 buffer.append("\nnext:\n"); 542 buffer.append(next); 543 buffer.append(next.dump()); 544 buffer.append("Count of states:"); 545 buffer.append(next.size()); 546 return buffer.toString(); 547 } 548 549 private void swapStacks() 550 { 551 StackNodeSet dummy = next; 552 next = current; 553 current = dummy; 554 next.clear(); 555 } 556 557 private void shift(StackNode node, char[] text, int position) 558 { 559 for (PatternIterator i = node.pattern.getSuccessors(); i.hasNext();) 560 { 561 Pattern nextPattern = i.next(); 562 563 if (nextPattern.contains(text[position])) 564 { 565 if (node instanceof NonterminalStackNode) 566 { 567 for (PatternIterator j = node.last.pattern.getSuccessors(); j.hasNext();) 568 if (j.next().contains(text[position])) 569 return; 570 571 for (PatternIterator j = node.last.pattern.getAscendingSuccessors(); j.hasNext();) 572 if (j.next().contains(text[position])) 573 return; 574 } 575 576 StackNode newNode = new TerminalStackNode(text, position, nextPattern, node); 577 578 if ((log!=null) && (log.isDebugEnabled())) 579 log.debug("shift "+newNode); 580 581 next.push(newNode); 582 } 583 } 584 585 for (PatternIterator i = node.pattern.getAscendingSuccessors(); i.hasNext();) 586 { 587 Pattern firstPattern = i.next(); 588 589 if (firstPattern.contains(text[position])) 590 { 591 if (node instanceof NonterminalStackNode) 592 { 593 for (PatternIterator j = node.last.pattern.getSuccessors(); j.hasNext();) 594 if (j.next().contains(text[position])) 595 return; 596 597 for (PatternIterator j = node.last.pattern.getAscendingSuccessors(); j.hasNext();) 598 if (j.next().contains(text[position])) 599 return; 600 } 601 602 StackNode newNode = new TerminalStackNode(text, position, firstPattern, node); 603 604 if ((log!=null) && (log.isDebugEnabled())) 605 log.debug("shift "+newNode); 606 607 next.push(newNode); 608 } 609 } 610 } 611 612 private void reduce(String symbol, StackNode node, StackNodeList list) 613 { 614 if (node.sibling!=null) 615 reduce(symbol, node.sibling, list); 616 list = new StackNodeList(node, list); 617 while(node.ancestor.pattern.hasSuccessor(node.pattern)) 618 { 619 node = node.ancestor; 620 if (node.sibling!=null) 621 reduce(symbol, node.sibling, list); 622 list = new StackNodeList(node, list); 623 } 624 625 for (PatternIterator i = node.ancestor.pattern.getSuccessors(); i.hasNext();) 626 { 627 Pattern nextPattern = i.next(); 628 if (symbol.equals(nextPattern.getSymbol())) 629 { 630 StackNode newNode = new NonterminalStackNode(list, nextPattern, node.ancestor); 631 632 if ((log!=null) && (log.isDebugEnabled())) 633 log.debug("reduce "+newNode+" with "+list); 634 635 current.push(newNode); 636 } 637 } 638 639 for (PatternIterator i = node.ancestor.pattern.getAscendingSuccessors(); i.hasNext();) 640 { 641 Pattern firstPattern = i.next(); 642 if (symbol.equals(firstPattern.getSymbol())) 643 { 644 StackNode newNode = new NonterminalStackNode(list, firstPattern, node.ancestor); 645 646 if ((log!=null) && (log.isDebugEnabled())) 647 log.debug("reduce "+newNode+" with "+list); 648 649 current.push(newNode); 650 } 651 } 652 653 if ((root==null) && (node.ancestor.pattern==grammar.getStartPattern()) && 654 (symbol.equals(grammar.getStartSymbol()))) 655 { 656 root = list; 657 658 if ((log!=null) && (log.isDebugEnabled())) 659 log.debug("accept "+symbol+" with "+list); 660 } 661 } 662 663 private void reduceEmpty(StackNode node) 664 { 665 for (PatternIterator i = node.pattern.getSuccessors(); i.hasNext();) 666 { 667 Pattern nextPattern = i.next(); 668 if ((nextPattern.getSymbol()!=null) && (grammar.isNullable(nextPattern.getSymbol()))) 669 { 670 StackNode newNode = new NonterminalStackNode(null, nextPattern, node); 671 672 if ((log!=null) && (log.isDebugEnabled())) 673 log.debug("reduce "+newNode); 674 675 current.push(newNode); 676 } 677 } 678 679 for (PatternIterator i = node.pattern.getAscendingSuccessors(); i.hasNext();) 680 { 681 Pattern firstPattern = i.next(); 682 if ((firstPattern.getSymbol()!=null) && (grammar.isNullable(firstPattern.getSymbol()))) 683 { 684 if (firstPattern==node.pattern) 686 { 687 continue; 689 } 690 691 StackNode newNode = new NonterminalStackNode(null, firstPattern, node); 692 693 if ((log!=null) && (log.isDebugEnabled())) 694 log.debug("reduce "+newNode); 695 696 current.push(newNode); 697 } 698 } 699 } 700 701 private void increasePosition(char[] text, int position, int lastposition) 702 { 703 for (int i = position; i<lastposition; i++) 704 { 705 if (text[i]=='\n') 706 { 707 column = 1; 708 line++; 709 } 710 else if ((text[i]=='\r') && ((i==(text.length-1)) || (text[i+1]!='\n'))) 711 { 712 column = 1; 713 line++; 714 } 715 else 716 column++; 717 } 718 } 719 720 private void fireEvents() throws SAXException 721 { 722 contentHandler.startPrefixMapping("", NS_OUTPUT); 723 contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl ()); 724 725 String symbol = grammar.getStartSymbol(); 726 contentHandler.startElement(NS_OUTPUT, symbol, symbol, new AttributesImpl ()); 727 728 Stack stack = new Stack (); 729 StackNodeList next = root; 730 char[] text = null; 731 int position = 0; 732 int lastposition = 0; 733 line = 1; 734 column = 1; 735 736 if (locatorImpl!=null) 737 { 738 locatorImpl.setLineNumber(line); 739 locatorImpl.setColumnNumber(column); 740 } 741 742 while (next!=null) 743 { 744 if (next.node instanceof NonterminalStackNode) 745 { 746 if (text!=null) 747 { 748 contentHandler.characters(text, position, (lastposition+1)-position); 749 increasePosition(text, position, (lastposition+1)-position); 750 751 if (locatorImpl!=null) 752 { 753 locatorImpl.setLineNumber(line); 754 locatorImpl.setColumnNumber(column); 755 } 756 757 text = null; 758 } 759 760 NonterminalStackNode nonterminal = (NonterminalStackNode)next.node; 761 762 AttributesImpl atts = new AttributesImpl (); 763 764 769 contentHandler.startElement(NS_OUTPUT, next.node.pattern.getSymbol(), 770 next.node.pattern.getSymbol(), atts); 771 stack.push(next); 772 next = nonterminal.definition; 773 } 774 else 775 { 776 TerminalStackNode terminal = (TerminalStackNode)next.node; 777 if (text==null) 778 { 779 text = terminal.text; 780 position = terminal.position; 781 } 782 else if (text!=terminal.text) 783 { 784 contentHandler.characters(text, position, (lastposition+1)-position); 785 increasePosition(text, position, (lastposition+1)-position); 786 787 if (locatorImpl!=null) 788 { 789 locatorImpl.setLineNumber(line); 790 locatorImpl.setColumnNumber(column); 791 } 792 793 text = terminal.text; 794 position = terminal.position; 795 } 796 797 lastposition = terminal.position; 798 799 next = next.next; 800 } 801 802 while ((next==null) && (!stack.isEmpty())) 803 { 804 next = (StackNodeList)stack.pop(); 805 806 if (text!=null) 807 { 808 contentHandler.characters(text, position, (lastposition+1)-position); 809 increasePosition(text, position, (lastposition+1)-position); 810 811 if (locatorImpl!=null) 812 { 813 locatorImpl.setLineNumber(line); 814 locatorImpl.setColumnNumber(column); 815 } 816 817 text = null; 818 } 819 820 contentHandler.endElement(NS_OUTPUT, next.node.pattern.getSymbol(), 821 next.node.pattern.getSymbol()); 822 next = next.next; 823 } 824 } 825 826 if (text!=null) 827 { 828 contentHandler.characters(text, position, (lastposition+1)-position); 829 increasePosition(text, position, (lastposition+1)-position); 830 831 if (locatorImpl!=null) 832 { 833 locatorImpl.setLineNumber(line); 834 locatorImpl.setColumnNumber(column); 835 } 836 837 text = null; 838 } 839 840 contentHandler.endElement(NS_OUTPUT, symbol, symbol); 841 842 contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT); 843 contentHandler.endPrefixMapping(""); 844 } 845 } 846 | Popular Tags |