KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > cyberneko > html > parsers > DOMParser


1 /*
2  * (C) Copyright 2002-2005, Andy Clark. All rights reserved.
3  *
4  * This file is distributed under an Apache style license. Please
5  * refer to the LICENSE file for specific details.
6  */

7
8 package org.cyberneko.html.parsers;
9
10 import org.cyberneko.html.HTMLConfiguration;
11
12 import org.apache.xerces.xni.Augmentations;
13 import org.apache.xerces.xni.XNIException;
14
15 import org.w3c.dom.DOMException JavaDoc;
16
17 /**
18  * A DOM parser for HTML documents.
19  *
20  * @author Andy Clark
21  *
22  * @version $Id: DOMParser.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
23  */

24 public class DOMParser
25     /***/
26     extends org.apache.xerces.parsers.DOMParser {
27     /***
28     // NOTE: It would be better to extend from AbstractDOMParser but
29     // most users will find it easier if the API is just like the
30     // Xerces DOM parser. By extending directly from DOMParser,
31     // users can register SAX error handlers, entity resolvers,
32     // and the like. -Ac
33     extends org.apache.xerces.parsers.AbstractDOMParser {
34     /***/

35
36     //
37
// Constructors
38
//
39

40     /** Default constructor. */
41     public DOMParser() {
42         super(new HTMLConfiguration());
43         /*** extending DOMParser ***/
44         try {
45             setProperty("http://apache.org/xml/properties/dom/document-class-name",
46                                        "org.apache.html.dom.HTMLDocumentImpl");
47         }
48         catch (org.xml.sax.SAXNotRecognizedException JavaDoc e) {
49             throw new RuntimeException JavaDoc("http://apache.org/xml/properties/dom/document-class-name property not recognized");
50         }
51         catch (org.xml.sax.SAXNotSupportedException JavaDoc e) {
52             throw new RuntimeException JavaDoc("http://apache.org/xml/properties/dom/document-class-name property not supported");
53         }
54         /*** extending AbstractDOMParser ***
55         fConfiguration.setProperty("http://apache.org/xml/properties/dom/document-class-name",
56                                    "org.apache.html.dom.HTMLDocumentImpl");
57         /***/

58     } // <init>()
59

60     //
61
// XMLDocumentHandler methods
62
//
63

64     /** Doctype declaration. */
65     public void doctypeDecl(String JavaDoc root, String JavaDoc pubid, String JavaDoc sysid,
66                             Augmentations augs) throws XNIException {
67         
68         // NOTE: Xerces HTML DOM implementation (up to and including
69
// 2.5.0) throws a heirarchy request error exception
70
// when a doctype node is appended to the tree. So,
71
// don't insert this node into the tree for those
72
// versions... -Ac
73

74         String JavaDoc VERSION = org.apache.xerces.impl.Version.fVersion;
75         boolean okay = true;
76         if (VERSION.startsWith("Xerces-J 2.")) {
77             okay = getParserSubVersion() > 5;
78         }
79         // REVISIT: As soon as XML4J is updated with the latest code
80
// from Xerces, then this needs to be updated to
81
// check XML4J's version. -Ac
82
else if (VERSION.startsWith("XML4J")) {
83             okay = false;
84         }
85
86         // if okay, insert doctype; otherwise, don't risk it
87
if (okay) {
88             super.doctypeDecl(root, pubid, sysid, augs);
89         }
90
91     } // doctypeDecl(String,String,String,Augmentations)
92

93     //
94
// Private static methods
95
//
96

97     /** Returns the parser's sub-version number. */
98     private static int getParserSubVersion() {
99         try {
100             String JavaDoc VERSION = org.apache.xerces.impl.Version.fVersion;
101             int index1 = VERSION.indexOf('.') + 1;
102             int index2 = VERSION.indexOf('.', index1);
103             if (index2 == -1) { index2 = VERSION.length(); }
104             return Integer.parseInt(VERSION.substring(index1, index2));
105         }
106         catch (Exception JavaDoc e) {
107             return -1;
108         }
109     } // getParserSubVersion():int
110

111 } // class DOMParser
112
Popular Tags