KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > xml > xmlc > html > parsers > HTMLDocBuilder


1 /*
2  * Enhydra Java Application Server Project
3  *
4  * The contents of this file are subject to the Enhydra Public License
5  * Version 1.1 (the "License"); you may not use this file except in
6  * compliance with the License. You may obtain a copy of the License on
7  * the Enhydra web site ( http://www.enhydra.org/ ).
8  *
9  * Software distributed under the License is distributed on an "AS IS"
10  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
11  * the License for the specific terms governing rights and limitations
12  * under the License.
13  *
14  * The Initial Developer of the Enhydra Application Server is Lutris
15  * Technologies, Inc. The Enhydra Application Server and portions created
16  * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
17  * All Rights Reserved.
18  *
19  * Contributor(s):
20  *
21  * $Id: HTMLDocBuilder.java,v 1.2 2005/01/26 08:29:24 jkjome Exp $
22  */

23
24 package org.enhydra.xml.xmlc.html.parsers;
25
26 import java.util.HashSet JavaDoc;
27
28 import org.enhydra.xml.xmlc.XMLCError;
29 import org.enhydra.xml.xmlc.XMLCException;
30 import org.enhydra.xml.xmlc.dom.XMLCDocument;
31 import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
32 import org.w3c.dom.Comment JavaDoc;
33 import org.w3c.dom.Document JavaDoc;
34 import org.w3c.dom.Element JavaDoc;
35 import org.w3c.dom.Node JavaDoc;
36 import org.w3c.dom.html.HTMLDocument;
37 import org.xml.sax.InputSource JavaDoc;
38
39 /**
40  * Class used by HTML parser to build a DOM.
41  * <P>
42  * The document builder functions assume they are being called in the order the
43  * document is parsed. They keep a current node where new child nodes are
44  * appended.
45  */

46 public class HTMLDocBuilder {
47     /**
48      * XMLC Document object.
49      */

50     private XMLCDocument fXmlcDoc;
51
52     /**
53      * Factory for creating the document.
54      */

55     private XMLCDomFactory fDomFactory;
56
57     /**
58      * The document.
59      */

60     private HTMLDocument fDocument;
61
62     /**
63      * Have we got the parser callback for the document element.
64      * This is used to determine where to insert comments, since the
65      * document element pre-exists.
66      */

67     private boolean fGotDocElement;
68
69     /**
70      * The current node that is being constructed. This functions as a stack
71      * during document construction.
72      */

73     private Node JavaDoc fCurrentNode;
74
75     /**
76      * Table used to determine what tags have been closed by
77      * fixUnrecognizedTagNesting.
78      */

79     private HashSet JavaDoc fClosedUnrecognizedElements = null;
80
81     /**
82      * Constructor. Creates XMLCDocument object.
83      */

84     public HTMLDocBuilder(XMLCDomFactory domFactory,
85                           InputSource JavaDoc input) throws XMLCException {
86         fXmlcDoc = new XMLCDocument(domFactory);
87         fDomFactory = domFactory;
88         Document doc = fXmlcDoc.createDocument(null, null);
89         if (!(doc instanceof HTMLDocument)) {
90             throw new XMLCException("DOM factory ("
91                                     + fDomFactory.getClass().getName()
92                                     + ") created a document that was not a HTMLDocument, got "
93                                     + doc.getClass().getName());
94         }
95         fDocument = (HTMLDocument)doc;
96         fCurrentNode = fDocument;
97
98         String JavaDoc encoding = input.getEncoding();
99         if (encoding != null) {
100             fXmlcDoc.setEncoding(encoding);
101         }
102     }
103
104     /**
105      * Generate error about a method being called that should
106      * be called before the document is created.
107      */

108     private void docNotCreatedError() {
109         throw new XMLCError("Bug: parser event on document contents occured before document is created");
110     }
111
112     /**
113      * Get the XMLC document associated with this object.
114      */

115     public XMLCDocument getXMLCDocument() {
116         return fXmlcDoc;
117     }
118
119     /**
120      * Determine if an element name is a frameset-only element.
121      */

122     private boolean isFrameSetElement(String JavaDoc tagName) {
123         return tagName.equalsIgnoreCase("frameset")
124             || tagName.equalsIgnoreCase("noframes");
125
126     }
127
128     /**
129      * Start a new Element.
130      */

131     public void startElement(String JavaDoc tagName) {
132         // Document element already exists
133
if (tagName.equals("html")) {
134             fCurrentNode = fDocument.getDocumentElement();
135             fGotDocElement = true;
136         } else {
137             Element JavaDoc element = fDocument.createElement(tagName);
138             fCurrentNode.appendChild(element);
139             fCurrentNode = element;
140         }
141
142         if (isFrameSetElement(tagName)) {
143             fXmlcDoc.setIsHtmlFrameSet();
144         }
145     }
146     
147     /**
148      * Add an attribute to the element on the top of the
149      * stack.
150      */

151     public void addAttribute(String JavaDoc name, String JavaDoc value) {
152         ((Element JavaDoc)fCurrentNode).setAttribute(name, value);
153     }
154
155     /**
156      * Finish the element being constructed.
157      */

158     public void finishElement() {
159         if (fCurrentNode == null) {
160             throw new XMLCError("node stack underflow; malformed document");
161         }
162         if (!(fCurrentNode instanceof Element JavaDoc)) {
163             throw new XMLCError("DOM node top of stack not a element for end tag");
164         }
165         fCurrentNode = fCurrentNode.getParentNode();
166     }
167
168     /**
169      * Add a <code>Text</code> node.
170      */

171     public void addTextNode(String JavaDoc data) {
172         if (fDocument == null) {
173             docNotCreatedError();
174         }
175         fCurrentNode.appendChild(fDocument.createTextNode(data));
176     }
177
178     /**
179      * Add a <code>Comment</code> node.
180      */

181     public void addComment(String JavaDoc data) {
182         Comment JavaDoc comment = fDocument.createComment(data);
183         // Handle insertion before document element (current should always
184
// be document, but we might be handling some invalid node).
185
if ((!fGotDocElement) && (fCurrentNode == fDocument)) {
186             fCurrentNode.insertBefore(comment, fDocument.getDocumentElement());
187         } else {
188             fCurrentNode.appendChild(comment);
189         }
190     }
191
192     /**
193      * Get the node on the top of the stack during parsing.
194      * FIXME: Added to work around bugs in the swing parser.
195      */

196     public Node JavaDoc getCurrentNode() {
197         return fCurrentNode;
198     }
199
200     /**
201      * Pop the current node off of the stack. This is *only* used
202      * during error recover from a broken parser.
203      * FIXME: Added to work around bugs in the swing parser.
204      */

205     public void popCurrentNode() {
206         fCurrentNode = fCurrentNode.getParentNode();
207     }
208
209     /**
210      * Recursive part of findUnrecognizedTag
211      */

212     private Node JavaDoc recursiveFindUnrecognizedTag(String JavaDoc tagNameUpper,
213                                               Node JavaDoc parent) {
214         // Search right to left.
215
for (Node JavaDoc child = parent.getLastChild(); child != null;
216              child = child.getPreviousSibling()) {
217             if (child.getNodeName().equals(tagNameUpper)
218                 && !fClosedUnrecognizedElements.contains(child)) {
219                 return child; // Found it!
220
}
221         }
222         
223         // Search up the tree.
224
Node JavaDoc grandParent = parent.getParentNode();
225         if (grandParent != null) {
226             return recursiveFindUnrecognizedTag(tagNameUpper, grandParent);
227         } else {
228             return null;
229         }
230     }
231
232     /**
233      * Find the element for an unrecognized tag. This searches up the parse
234      * stack, looking at the siblings of each node on the stack. This starts
235      * with the parent of the top of the stack, and searches its children from
236      * right to left. Thus the first node checked is node on the top of the
237      * stack.
238      */

239     private Node JavaDoc findUnrecognizedTag(String JavaDoc tagNameUpper) throws XMLCException {
240         Node JavaDoc openingElement = null;
241         if (fCurrentNode != null) {
242             openingElement = recursiveFindUnrecognizedTag(tagNameUpper,
243                                                           fCurrentNode);
244         }
245         if (openingElement == null) {
246             throw new XMLCException("could not find matching opening tag for </"
247                                     + tagNameUpper + ">");
248         }
249         if (openingElement.getFirstChild() != null) {
250             throw new XMLCError("attempt to fix nesting for </"
251                                 + tagNameUpper
252                                 + "> found a node that already has children");
253         }
254         return openingElement;
255     }
256
257     /**
258      * Make nodes to the right of an element the element's children.
259      */

260     private void makeRightSiblingsChildren(Node JavaDoc openingElement) {
261         Node JavaDoc parent = openingElement.getParentNode();
262
263         Node JavaDoc sibling;
264         while ((sibling = openingElement.getNextSibling()) != null) {
265             openingElement.appendChild(sibling);
266         }
267     }
268
269     /**
270      * Used to correct nesting when handling an unknown tag. This is called
271      * when the end tag is encountered. The tree is walked backwards from the
272      * top of the stack to find the element pushed for the open tag. All of
273      * the siblings to the right of that element are moved to be children of
274      * the element. The stack is popped back until the parent of the
275      * element being closed is on top. This was put in to support the
276      * swing parser.
277      */

278     public void fixUnrecognizedTagNesting(String JavaDoc tagName)
279         throws XMLCException {
280         String JavaDoc tagNameUpper = tagName.toUpperCase();
281         if (fClosedUnrecognizedElements == null) {
282             fClosedUnrecognizedElements = new HashSet JavaDoc();
283         }
284
285         // Find and correct
286
Node JavaDoc openingElement = findUnrecognizedTag(tagNameUpper);
287         makeRightSiblingsChildren(openingElement);
288         fClosedUnrecognizedElements.add(openingElement);
289
290         // Clean up the stack
291
Node JavaDoc openingParent = openingElement.getParentNode();
292         while (fCurrentNode != openingParent) {
293             popCurrentNode();
294         }
295     }
296 }
297
Popular Tags