KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > cyberneko > html > parsers > DOMFragmentParser


1 /*
2  * (C) Copyright 2002-2005, Andy Clark. All rights reserved.
3  *
4  * This file is distributed under an Apache style license. Please
5  * refer to the LICENSE file for specific details.
6  * ==============================================================
7  * This file contains some code from Apache Xerces-J which is
8  * used in accordance with the Apache license. Please refer to
9  * the LICENSE_apache file for specific details.
10  */

11
12 package org.cyberneko.html.parsers;
13
14 import org.cyberneko.html.HTMLConfiguration;
15
16 import org.apache.xerces.impl.Constants;
17 import org.apache.xerces.util.ErrorHandlerWrapper;
18
19 import org.apache.xerces.xni.Augmentations;
20 import org.apache.xerces.xni.NamespaceContext;
21 import org.apache.xerces.xni.QName;
22 import org.apache.xerces.xni.XMLAttributes;
23 import org.apache.xerces.xni.XMLDocumentHandler;
24 import org.apache.xerces.xni.XMLLocator;
25 import org.apache.xerces.xni.XMLResourceIdentifier;
26 import org.apache.xerces.xni.XMLString;
27 import org.apache.xerces.xni.XNIException;
28
29 import org.apache.xerces.xni.parser.XMLConfigurationException;
30 import org.apache.xerces.xni.parser.XMLDocumentSource;
31 import org.apache.xerces.xni.parser.XMLErrorHandler;
32 import org.apache.xerces.xni.parser.XMLInputSource;
33 import org.apache.xerces.xni.parser.XMLParseException;
34 import org.apache.xerces.xni.parser.XMLParserConfiguration;
35
36 import java.io.InputStream JavaDoc;
37 import java.io.IOException JavaDoc;
38 import java.io.Reader JavaDoc;
39
40 import org.w3c.dom.Attr JavaDoc;
41 import org.w3c.dom.CDATASection JavaDoc;
42 import org.w3c.dom.Comment JavaDoc;
43 import org.w3c.dom.Document JavaDoc;
44 import org.w3c.dom.DocumentFragment JavaDoc;
45 import org.w3c.dom.Element JavaDoc;
46 import org.w3c.dom.EntityReference JavaDoc;
47 import org.w3c.dom.Node JavaDoc;
48 import org.w3c.dom.ProcessingInstruction JavaDoc;
49 import org.w3c.dom.Text JavaDoc;
50
51 import org.xml.sax.ErrorHandler JavaDoc;
52 import org.xml.sax.InputSource JavaDoc;
53 import org.xml.sax.SAXException JavaDoc;
54 import org.xml.sax.SAXParseException JavaDoc;
55 import org.xml.sax.SAXNotRecognizedException JavaDoc;
56 import org.xml.sax.SAXNotSupportedException JavaDoc;
57
58 /**
59  * A DOM parser for HTML fragments.
60  *
61  * @author Andy Clark
62  *
63  * @version $Id: DOMFragmentParser.java,v 1.8 2005/02/14 03:56:54 andyc Exp $
64  */

65 public class DOMFragmentParser
66     implements XMLDocumentHandler {
67
68     //
69
// Constants
70
//
71

72     // features
73

74     /** Document fragment balancing only. */
75     protected static final String JavaDoc DOCUMENT_FRAGMENT =
76         "http://cyberneko.org/html/features/document-fragment";
77
78     /** Recognized features. */
79     protected static final String JavaDoc[] RECOGNIZED_FEATURES = {
80         DOCUMENT_FRAGMENT,
81     };
82
83     // properties
84

85     /** Property identifier: error handler. */
86     protected static final String JavaDoc ERROR_HANDLER =
87         Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_HANDLER_PROPERTY;
88
89     /** Current element node. */
90     protected static final String JavaDoc CURRENT_ELEMENT_NODE =
91         Constants.XERCES_PROPERTY_PREFIX + Constants.CURRENT_ELEMENT_NODE_PROPERTY;
92
93     /** Recognized properties. */
94     protected static final String JavaDoc[] RECOGNIZED_PROPERTIES = {
95         ERROR_HANDLER,
96         CURRENT_ELEMENT_NODE,
97     };
98
99     //
100
// Data
101
//
102

103     /** Parser configuration. */
104     protected XMLParserConfiguration fParserConfiguration;
105
106     /** Document source. */
107     protected XMLDocumentSource fDocumentSource;
108
109     /** DOM document fragment. */
110     protected DocumentFragment JavaDoc fDocumentFragment;
111
112     /** Document. */
113     protected Document JavaDoc fDocument;
114
115     /** Current node. */
116     protected Node JavaDoc fCurrentNode;
117
118     /** True if within a CDATA section. */
119     protected boolean fInCDATASection;
120
121     //
122
// Constructors
123
//
124

125     /** Default constructor. */
126     public DOMFragmentParser() {
127         fParserConfiguration = new HTMLConfiguration();
128         fParserConfiguration.addRecognizedFeatures(RECOGNIZED_FEATURES);
129         fParserConfiguration.addRecognizedProperties(RECOGNIZED_PROPERTIES);
130         fParserConfiguration.setFeature(DOCUMENT_FRAGMENT, true);
131         fParserConfiguration.setDocumentHandler(this);
132     } // <init>()
133

134     //
135
// Public methods
136
//
137

138     /** Parses a document fragment. */
139     public void parse(String JavaDoc systemId, DocumentFragment JavaDoc fragment)
140         throws SAXException JavaDoc, IOException JavaDoc {
141         parse(new InputSource(systemId), fragment);
142     } // parse(String,DocumentFragment)
143

144     /** Parses a document fragment. */
145     public void parse(InputSource source, DocumentFragment JavaDoc fragment)
146         throws SAXException JavaDoc, IOException JavaDoc {
147
148         fCurrentNode = fDocumentFragment = fragment;
149         fDocument = fDocumentFragment.getOwnerDocument();
150
151         try {
152             String JavaDoc pubid = source.getPublicId();
153             String JavaDoc sysid = source.getSystemId();
154             String JavaDoc encoding = source.getEncoding();
155             InputStream JavaDoc stream = source.getByteStream();
156             Reader JavaDoc reader = source.getCharacterStream();
157             
158             XMLInputSource inputSource =
159                 new XMLInputSource(pubid, sysid, sysid);
160             inputSource.setEncoding(encoding);
161             inputSource.setByteStream(stream);
162             inputSource.setCharacterStream(reader);
163             
164             fParserConfiguration.parse(inputSource);
165         }
166         catch (XMLParseException e) {
167             Exception JavaDoc ex = e.getException();
168             if (ex != null) {
169                 throw new SAXParseException JavaDoc(e.getMessage(), null, ex);
170             }
171             throw new SAXParseException JavaDoc(e.getMessage(), null);
172         }
173
174     } // parse(InputSource,DocumentFragment)
175

176     /**
177      * Allow an application to register an error event handler.
178      *
179      * <p>If the application does not register an error handler, all
180      * error events reported by the SAX parser will be silently
181      * ignored; however, normal processing may not continue. It is
182      * highly recommended that all SAX applications implement an
183      * error handler to avoid unexpected bugs.</p>
184      *
185      * <p>Applications may register a new or different handler in the
186      * middle of a parse, and the SAX parser must begin using the new
187      * handler immediately.</p>
188      *
189      * @param errorHandler The error handler.
190      * @exception java.lang.NullPointerException If the handler
191      * argument is null.
192      * @see #getErrorHandler
193      */

194     public void setErrorHandler(ErrorHandler errorHandler) {
195         fParserConfiguration.setErrorHandler(new ErrorHandlerWrapper(errorHandler));
196     } // setErrorHandler(ErrorHandler)
197

198     /**
199      * Return the current error handler.
200      *
201      * @return The current error handler, or null if none
202      * has been registered.
203      * @see #setErrorHandler
204      */

205     public ErrorHandler getErrorHandler() {
206
207         ErrorHandler errorHandler = null;
208         try {
209             XMLErrorHandler xmlErrorHandler =
210                 (XMLErrorHandler)fParserConfiguration.getProperty(ERROR_HANDLER);
211             if (xmlErrorHandler != null &&
212                 xmlErrorHandler instanceof ErrorHandlerWrapper) {
213                 errorHandler = ((ErrorHandlerWrapper)xmlErrorHandler).getErrorHandler();
214             }
215         }
216         catch (XMLConfigurationException e) {
217             // do nothing
218
}
219         return errorHandler;
220
221     } // getErrorHandler():ErrorHandler
222

223     /**
224      * Set the state of any feature in a SAX2 parser. The parser
225      * might not recognize the feature, and if it does recognize
226      * it, it might not be able to fulfill the request.
227      *
228      * @param featureId The unique identifier (URI) of the feature.
229      * @param state The requested state of the feature (true or false).
230      *
231      * @exception SAXNotRecognizedException If the
232      * requested feature is not known.
233      * @exception SAXNotSupportedException If the
234      * requested feature is known, but the requested
235      * state is not supported.
236      */

237     public void setFeature(String JavaDoc featureId, boolean state)
238         throws SAXNotRecognizedException JavaDoc, SAXNotSupportedException JavaDoc {
239
240         try {
241             fParserConfiguration.setFeature(featureId, state);
242         }
243         catch (XMLConfigurationException e) {
244             String JavaDoc message = e.getMessage();
245             if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
246                 throw new SAXNotRecognizedException JavaDoc(message);
247             }
248             else {
249                 throw new SAXNotSupportedException JavaDoc(message);
250             }
251         }
252
253     } // setFeature(String,boolean)
254

255     /**
256      * Query the state of a feature.
257      *
258      * Query the current state of any feature in a SAX2 parser. The
259      * parser might not recognize the feature.
260      *
261      * @param featureId The unique identifier (URI) of the feature
262      * being set.
263      * @return The current state of the feature.
264      * @exception org.xml.sax.SAXNotRecognizedException If the
265      * requested feature is not known.
266      * @exception SAXNotSupportedException If the
267      * requested feature is known but not supported.
268      */

269     public boolean getFeature(String JavaDoc featureId)
270         throws SAXNotRecognizedException JavaDoc, SAXNotSupportedException JavaDoc {
271
272         try {
273             return fParserConfiguration.getFeature(featureId);
274         }
275         catch (XMLConfigurationException e) {
276             String JavaDoc message = e.getMessage();
277             if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
278                 throw new SAXNotRecognizedException JavaDoc(message);
279             }
280             else {
281                 throw new SAXNotSupportedException JavaDoc(message);
282             }
283         }
284
285     } // getFeature(String):boolean
286

287     /**
288      * Set the value of any property in a SAX2 parser. The parser
289      * might not recognize the property, and if it does recognize
290      * it, it might not support the requested value.
291      *
292      * @param propertyId The unique identifier (URI) of the property
293      * being set.
294      * @param value The value to which the property is being set.
295      *
296      * @exception SAXNotRecognizedException If the
297      * requested property is not known.
298      * @exception SAXNotSupportedException If the
299      * requested property is known, but the requested
300      * value is not supported.
301      */

302     public void setProperty(String JavaDoc propertyId, Object JavaDoc value)
303         throws SAXNotRecognizedException JavaDoc, SAXNotSupportedException JavaDoc {
304
305         try {
306             fParserConfiguration.setProperty(propertyId, value);
307         }
308         catch (XMLConfigurationException e) {
309             String JavaDoc message = e.getMessage();
310             if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
311                 throw new SAXNotRecognizedException JavaDoc(message);
312             }
313             else {
314                 throw new SAXNotSupportedException JavaDoc(message);
315             }
316         }
317
318     } // setProperty(String,Object)
319

320     /**
321      * Query the value of a property.
322      *
323      * Return the current value of a property in a SAX2 parser.
324      * The parser might not recognize the property.
325      *
326      * @param propertyId The unique identifier (URI) of the property
327      * being set.
328      * @return The current value of the property.
329      * @exception org.xml.sax.SAXNotRecognizedException If the
330      * requested property is not known.
331      * @exception SAXNotSupportedException If the
332      * requested property is known but not supported.
333      */

334     public Object JavaDoc getProperty(String JavaDoc propertyId)
335         throws SAXNotRecognizedException JavaDoc, SAXNotSupportedException JavaDoc {
336
337         if (propertyId.equals(CURRENT_ELEMENT_NODE)) {
338             return (fCurrentNode!=null &&
339                     fCurrentNode.getNodeType() == Node.ELEMENT_NODE)? fCurrentNode:null;
340         }
341
342         try {
343             return fParserConfiguration.getProperty(propertyId);
344         }
345         catch (XMLConfigurationException e) {
346             String JavaDoc message = e.getMessage();
347             if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
348                 throw new SAXNotRecognizedException JavaDoc(message);
349             }
350             else {
351                 throw new SAXNotSupportedException JavaDoc(message);
352             }
353         }
354
355     } // getProperty(String):Object
356

357     //
358
// XMLDocumentHandler methods
359
//
360

361     /** Sets the document source. */
362     public void setDocumentSource(XMLDocumentSource source) {
363         fDocumentSource = source;
364     } // setDocumentSource(XMLDocumentSource)
365

366     /** Returns the document source. */
367     public XMLDocumentSource getDocumentSource() {
368         return fDocumentSource;
369     } // getDocumentSource():XMLDocumentSource
370

371     /** Start document. */
372     public void startDocument(XMLLocator locator, String JavaDoc encoding,
373                               Augmentations augs) throws XNIException {
374         startDocument(locator, encoding, null, augs);
375     } // startDocument(XMLLocator,String,Augmentations)
376

377     // since Xerces 2.2.0
378

379     /** Start document. */
380     public void startDocument(XMLLocator locator, String JavaDoc encoding,
381                               NamespaceContext nscontext,
382                               Augmentations augs) throws XNIException {
383         fInCDATASection = false;
384     } // startDocument(XMLLocator,String,NamespaceContext,Augmentations)
385

386     /** XML declaration. */
387     public void xmlDecl(String JavaDoc version, String JavaDoc encoding,
388                         String JavaDoc standalone, Augmentations augs)
389         throws XNIException {
390     } // xmlDecl(String,String,String,Augmentations)
391

392     /** Document type declaration. */
393     public void doctypeDecl(String JavaDoc root, String JavaDoc pubid, String JavaDoc sysid,
394                             Augmentations augs) throws XNIException {
395     } // doctypeDecl(String,String,String,Augmentations)
396

397     /** Processing instruction. */
398     public void processingInstruction(String JavaDoc target, XMLString data,
399                                       Augmentations augs)
400         throws XNIException {
401         ProcessingInstruction JavaDoc pi =
402             fDocument.createProcessingInstruction(target, data.toString());
403         fCurrentNode.appendChild(pi);
404     } // processingInstruction(String,XMLString,Augmentations)
405

406     /** Comment. */
407     public void comment(XMLString text, Augmentations augs)
408         throws XNIException {
409         Comment JavaDoc comment = fDocument.createComment(text.toString());
410         fCurrentNode.appendChild(comment);
411     } // comment(XMLString,Augmentations)
412

413     /** Start prefix mapping. @deprecated Since Xerces 2.2.0. */
414     public void startPrefixMapping(String JavaDoc prefix, String JavaDoc uri,
415                                    Augmentations augs) throws XNIException {
416     } // startPrefixMapping(String,String,Augmentations)
417

418     /** End prefix mapping. @deprecated Since Xerces 2.2.0. */
419     public void endPrefixMapping(String JavaDoc prefix, Augmentations augs)
420         throws XNIException {
421     } // endPrefixMapping(String,Augmentations)
422

423     /** Start element. */
424     public void startElement(QName element, XMLAttributes attrs,
425                              Augmentations augs) throws XNIException {
426         Element JavaDoc elementNode = fDocument.createElement(element.rawname);
427         int count = attrs != null ? attrs.getLength() : 0;
428         for (int i = 0; i < count; i++) {
429             String JavaDoc aname = attrs.getQName(i);
430             String JavaDoc avalue = attrs.getValue(i);
431             elementNode.setAttribute(aname, avalue);
432         }
433         fCurrentNode.appendChild(elementNode);
434         fCurrentNode = elementNode;
435     } // startElement(QName,XMLAttributes,Augmentations)
436

437     /** Empty element. */
438     public void emptyElement(QName element, XMLAttributes attrs,
439                              Augmentations augs) throws XNIException {
440         startElement(element, attrs, augs);
441         endElement(element, augs);
442     } // emptyElement(QName,XMLAttributes,Augmentations)
443

444     /** Characters. */
445     public void characters(XMLString text, Augmentations augs)
446         throws XNIException {
447
448         if (fInCDATASection) {
449             Node JavaDoc node = fCurrentNode.getLastChild();
450             if (node != null && node.getNodeType() == Node.CDATA_SECTION_NODE) {
451                 CDATASection JavaDoc cdata = (CDATASection JavaDoc)node;
452                 cdata.appendData(text.toString());
453             }
454             else {
455                 CDATASection JavaDoc cdata = fDocument.createCDATASection(text.toString());
456                 fCurrentNode.appendChild(cdata);
457             }
458         }
459         else {
460             Node JavaDoc node = fCurrentNode.getLastChild();
461             if (node != null && node.getNodeType() == Node.TEXT_NODE) {
462                 Text JavaDoc textNode = (Text JavaDoc)node;
463                 textNode.appendData(text.toString());
464             }
465             else {
466                 Text JavaDoc textNode = fDocument.createTextNode(text.toString());
467                 fCurrentNode.appendChild(textNode);
468             }
469         }
470
471     } // characters(XMLString,Augmentations)
472

473     /** Ignorable whitespace. */
474     public void ignorableWhitespace(XMLString text, Augmentations augs)
475         throws XNIException {
476         characters(text, augs);
477     } // ignorableWhitespace(XMLString,Augmentations)
478

479     /** Start general entity. */
480     public void startGeneralEntity(String JavaDoc name, XMLResourceIdentifier id,
481                                    String JavaDoc encoding, Augmentations augs)
482         throws XNIException {
483         EntityReference JavaDoc entityRef = fDocument.createEntityReference(name);
484         fCurrentNode.appendChild(entityRef);
485         fCurrentNode = entityRef;
486     } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
487

488     /** Text declaration. */
489     public void textDecl(String JavaDoc version, String JavaDoc encoding,
490                          Augmentations augs) throws XNIException {
491     } // textDecl(String,String,Augmentations)
492

493     /** End general entity. */
494     public void endGeneralEntity(String JavaDoc name, Augmentations augs)
495         throws XNIException {
496         fCurrentNode = fCurrentNode.getParentNode();
497     } // endGeneralEntity(String,Augmentations)
498

499     /** Start CDATA section. */
500     public void startCDATA(Augmentations augs) throws XNIException {
501         fInCDATASection = true;
502     } // startCDATA(Augmentations)
503

504     /** End CDATA section. */
505     public void endCDATA(Augmentations augs) throws XNIException {
506         fInCDATASection = false;
507     } // endCDATA(Augmentations)
508

509     /** End element. */
510     public void endElement(QName element, Augmentations augs)
511         throws XNIException {
512         fCurrentNode = fCurrentNode.getParentNode();
513     } // endElement(QName,Augmentations)
514

515     /** End document. */
516     public void endDocument(Augmentations augs) throws XNIException {
517     } // endDocument(Augmentations)
518

519     //
520
// DEBUG
521
//
522

523     /***
524     public static void print(Node node) {
525         short type = node.getNodeType();
526         switch (type) {
527             case Node.ELEMENT_NODE: {
528                 System.out.print('<');
529                 System.out.print(node.getNodeName());
530                 org.w3c.dom.NamedNodeMap attrs = node.getAttributes();
531                 int attrCount = attrs != null ? attrs.getLength() : 0;
532                 for (int i = 0; i < attrCount; i++) {
533                     Node attr = attrs.item(i);
534                     System.out.print(' ');
535                     System.out.print(attr.getNodeName());
536                     System.out.print("='");
537                     System.out.print(attr.getNodeValue());
538                     System.out.print('\'');
539                 }
540                 System.out.print('>');
541                 break;
542             }
543             case Node.TEXT_NODE: {
544                 System.out.print(node.getNodeValue());
545                 break;
546             }
547         }
548         Node child = node.getFirstChild();
549         while (child != null) {
550             print(child);
551             child = child.getNextSibling();
552         }
553         if (type == Node.ELEMENT_NODE) {
554             System.out.print("</");
555             System.out.print(node.getNodeName());
556             System.out.print('>');
557         }
558         else if (type == Node.DOCUMENT_NODE || type == Node.DOCUMENT_FRAGMENT_NODE) {
559             System.out.println();
560         }
561         System.out.flush();
562     }
563
564     public static void main(String[] argv) throws Exception {
565         DOMFragmentParser parser = new DOMFragmentParser();
566         HTMLDocument document = new org.apache.html.dom.HTMLDocumentImpl();
567         for (int i = 0; i < argv.length; i++) {
568             String sysid = argv[i];
569             System.err.println("# "+sysid);
570             DocumentFragment fragment = document.createDocumentFragment();
571             parser.parse(sysid, fragment);
572             print(fragment);
573         }
574     }
575     /***/

576
577 } // class DOMFragmentParser
578
Popular Tags