1 27 package org.htmlparser.sax; 28 29 import java.io.IOException ; 30 31 import org.xml.sax.ContentHandler ; 32 import org.xml.sax.DTDHandler ; 33 import org.xml.sax.EntityResolver ; 34 import org.xml.sax.ErrorHandler ; 35 import org.xml.sax.InputSource ; 36 import org.xml.sax.SAXException ; 37 import org.xml.sax.SAXNotRecognizedException ; 38 import org.xml.sax.SAXNotSupportedException ; 39 import org.xml.sax.SAXParseException ; 40 import org.xml.sax.helpers.NamespaceSupport ; 41 42 import org.htmlparser.Node; 43 import org.htmlparser.Parser; 44 import org.htmlparser.Remark; 45 import org.htmlparser.Tag; 46 import org.htmlparser.Text; 47 import org.htmlparser.util.DefaultParserFeedback; 48 import org.htmlparser.util.NodeIterator; 49 import org.htmlparser.util.NodeList; 50 import org.htmlparser.util.ParserException; 51 import org.htmlparser.util.ParserFeedback; 52 53 66 public class XMLReader 67 implements 68 org.xml.sax.XMLReader  69 { 70 83 protected boolean mNameSpaces; 85 89 protected boolean mNameSpacePrefixes; 91 94 protected EntityResolver mEntityResolver; 95 96 99 protected DTDHandler mDTDHandler; 100 101 104 protected ContentHandler mContentHandler; 105 106 109 protected ErrorHandler mErrorHandler; 110 111 114 protected Parser mParser; 115 116 119 protected NamespaceSupport mSupport; 120 121 124 protected String mParts[]; 125 126 129 public XMLReader () 130 { 131 mNameSpaces = true; 132 mNameSpacePrefixes = false; 133 134 mEntityResolver = null; 135 mDTDHandler = null; 136 mContentHandler = null; 137 mErrorHandler = null; 138 139 mSupport = new NamespaceSupport (); 140 mSupport.pushContext (); 141 mSupport.declarePrefix ("", "http://www.w3.org/TR/REC-html40"); 142 mParts = new String [3]; 146 } 147 148 152 153 209 public boolean getFeature (String name) 210 throws SAXNotRecognizedException , SAXNotSupportedException  211 { 212 boolean ret; 213 214 if (name.equals ("http://xml.org/sax/features/namespaces")) 215 ret = mNameSpaces; 216 else if (name.equals ("http://xml.org/sax/features/namespace-prefixes")) 217 ret = mNameSpacePrefixes; 218 else 219 throw new SAXNotSupportedException (name + " not yet understood"); 220 221 return (ret); 222 } 223 224 225 248 public void setFeature (String name, boolean value) 249 throws SAXNotRecognizedException , SAXNotSupportedException  250 { 251 if (name.equals ("http://xml.org/sax/features/namespaces")) 252 mNameSpaces = value; 253 else if (name.equals ("http://xml.org/sax/features/namespace-prefixes")) 254 mNameSpacePrefixes = value; 255 else 256 throw new SAXNotSupportedException (name + " not yet understood"); 257 } 258 259 260 285 public Object getProperty (String name) 286 throws SAXNotRecognizedException , SAXNotSupportedException  287 { 288 throw new SAXNotSupportedException (name + " not yet understood"); 289 } 290 291 292 317 public void setProperty (String name, Object value) 318 throws SAXNotRecognizedException , SAXNotSupportedException  319 { 320 throw new SAXNotSupportedException (name + " not yet understood"); 321 } 322 323 327 328 341 public void setEntityResolver (EntityResolver resolver) 342 { 343 mEntityResolver = resolver; 344 } 345 346 347 354 public EntityResolver getEntityResolver () 355 { 356 return (mEntityResolver); 357 } 358 359 360 373 public void setDTDHandler (DTDHandler handler) 374 { 375 mDTDHandler = handler; 376 } 377 378 379 386 public DTDHandler getDTDHandler () 387 { 388 return (mDTDHandler); 389 } 390 391 392 406 public void setContentHandler (ContentHandler handler) 407 { 408 mContentHandler = handler; 409 } 410 411 412 419 public ContentHandler getContentHandler () 420 { 421 return (mContentHandler); 422 } 423 424 425 441 public void setErrorHandler (ErrorHandler handler) 442 { 443 mErrorHandler = handler; 444 } 445 446 447 454 public ErrorHandler getErrorHandler () 455 { 456 return (mErrorHandler); 457 } 458 459 460 464 506 public void parse (InputSource input) 507 throws IOException , SAXException  508 { 509 throw new SAXException ("parse (InputSource input) is not yet supported"); 510 } 511 512 513 535 public void parse (String systemId) 536 throws IOException , SAXException  537 { 538 Locator locator; 539 ParserFeedback feedback; 540 541 if (null != mContentHandler) 542 try 543 { 544 mParser = new Parser (systemId); 545 locator = new Locator (mParser); 546 if (null != mErrorHandler) 547 feedback = new Feedback (mErrorHandler, locator); 548 else 549 feedback = new DefaultParserFeedback (DefaultParserFeedback.QUIET); 550 mParser.setFeedback (feedback); 551 552 mContentHandler.setDocumentLocator (locator); 554 try 555 { 556 mContentHandler.startDocument (); 557 for (NodeIterator iterator = mParser.elements (); iterator.hasMoreNodes (); ) 558 doSAX (iterator.nextNode ()); 559 mContentHandler.endDocument (); 560 } 561 catch (SAXException se) 562 { 563 if (null != mErrorHandler) 564 mErrorHandler.fatalError ( 565 new SAXParseException ("contentHandler threw me", locator, se)); 566 } 567 } 568 catch (ParserException pe) 569 { 570 if (null != mErrorHandler) 571 mErrorHandler.fatalError ( 572 new SAXParseException (pe.getMessage (), "", systemId, 0, 0)); 573 574 } 575 } 576 577 585 protected void doSAX (Node node) 586 throws 587 ParserException, 588 SAXException  589 { 590 Tag tag; 591 Tag end; 592 593 if (node instanceof Remark) 594 { 595 String text = mParser.getLexer ().getPage ().getText (node.getStartPosition (), node.getEndPosition ()); 596 mContentHandler.ignorableWhitespace (text.toCharArray (), 0, text.length ()); 597 } 598 else if (node instanceof Text) 599 { 600 String text = mParser.getLexer ().getPage ().getText (node.getStartPosition (), node.getEndPosition ()); 601 mContentHandler.characters (text.toCharArray (), 0, text.length ()); 602 } 603 else if (node instanceof Tag) 604 { 605 tag = (Tag)node; 606 if (mNameSpaces) 607 mSupport.processName (tag.getTagName (), mParts, false); 608 else 609 { 610 mParts[0] = ""; 611 mParts[1] = ""; 612 } 613 if (mNameSpacePrefixes) 614 mParts[2] = tag.getTagName (); 615 else if (mNameSpaces) 616 mParts[2] = ""; 617 else 618 mParts[2] = tag.getTagName (); 619 620 mContentHandler.startElement ( 621 mParts[0], mParts[1], mParts[2], new Attributes (tag, mSupport, mParts)); 625 NodeList children = tag.getChildren (); 626 if (null != children) 627 for (int i = 0; i < children.size (); i++) 628 doSAX (children.elementAt (i)); 629 end = tag.getEndTag (); 630 if (null != end) 631 { 632 if (mNameSpaces) 633 mSupport.processName (end.getTagName (), mParts, false); 634 else 635 { 636 mParts[0] = ""; 637 mParts[1] = ""; 638 } 639 if (mNameSpacePrefixes) 640 mParts[2] = end.getTagName (); 641 else if (mNameSpaces) 642 mParts[2] = ""; 643 else 644 mParts[2] = end.getTagName (); 645 mContentHandler.endElement ( 646 mParts[0], mParts[1], mParts[2]); } 650 } 651 } 652 } 653 654
| Popular Tags
|