HTMLDocBuilder


1   /*
2    * Enhydra Java Application Server Project
3    * 
4    * The contents of this file are subject to the Enhydra Public License
5    * Version 1.1 (the "License"); you may not use this file except in
6    * compliance with the License. You may obtain a copy of the License on
7    * the Enhydra web site ( http://www.enhydra.org/ ).
8    * 
9    * Software distributed under the License is distributed on an "AS IS"
10   * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
11   * the License for the specific terms governing rights and limitations
12   * under the License.
13   * 
14   * The Initial Developer of the Enhydra Application Server is Lutris
15   * Technologies, Inc. The Enhydra Application Server and portions created
16   * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
17   * All Rights Reserved.
18   * 
19   * Contributor(s):
20   * 
21   * $Id: HTMLDocBuilder.java,v 1.2 2005/01/26 08:29:24 jkjome Exp $
22   */
23  
24  package org.enhydra.xml.xmlc.html.parsers;
25  
26  import java.util.HashSet  ;
27  
28  import org.enhydra.xml.xmlc.XMLCError;
29  import org.enhydra.xml.xmlc.XMLCException;
30  import org.enhydra.xml.xmlc.dom.XMLCDocument;
31  import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
32  import org.w3c.dom.Comment  ;
33  import org.w3c.dom.Document  ;
34  import org.w3c.dom.Element  ;
35  import org.w3c.dom.Node  ;
36  import org.w3c.dom.html.HTMLDocument;
37  import org.xml.sax.InputSource  ;
38  
39  /**
40   * Class used by HTML parser to build a DOM.
41   * <P>
42   * The document builder functions assume they are being called in the order the
43   * document is parsed.  They keep a current node where new child nodes are
44   * appended.
45   */
46  public class HTMLDocBuilder {
47      /**
48       * XMLC Document object.
49       */
50      private XMLCDocument fXmlcDoc;
51  
52      /**
53       * Factory for creating the document.
54       */
55      private XMLCDomFactory fDomFactory;
56  
57      /**
58       * The document.
59       */
60      private HTMLDocument fDocument;
61  
62      /**
63       * Have we got the parser callback for the document element.
64       * This is used to determine where to insert comments, since the
65       * document element pre-exists.
66       */
67      private boolean fGotDocElement;
68  
69      /**
70       * The current node that is being constructed.  This functions as a stack
71       * during document construction.
72       */
73      private Node   fCurrentNode;
74  
75      /**
76       * Table used to determine what tags have been closed by
77       * fixUnrecognizedTagNesting.
78       */
79      private HashSet   fClosedUnrecognizedElements = null;
80  
81      /**
82       * Constructor.  Creates XMLCDocument object.
83       */
84      public HTMLDocBuilder(XMLCDomFactory domFactory,
85                            InputSource   input) throws XMLCException {
86          fXmlcDoc = new XMLCDocument(domFactory);
87          fDomFactory = domFactory;
88          Document doc = fXmlcDoc.createDocument(null, null);
89          if (!(doc instanceof HTMLDocument)) {
90              throw new XMLCException("DOM factory ("
91                                      + fDomFactory.getClass().getName()
92                                      + ") created a document that was not a HTMLDocument, got "
93                                      + doc.getClass().getName());
94          }
95          fDocument = (HTMLDocument)doc;
96          fCurrentNode = fDocument;
97  
98          String   encoding = input.getEncoding();
99          if (encoding != null) {
100             fXmlcDoc.setEncoding(encoding);
101         }
102     }
103 
104     /**
105      * Generate error about a method being called that should
106      * be called before the document is created.
107      */
108     private void docNotCreatedError() {
109         throw new XMLCError("Bug: parser event on document contents occured before document is created");
110     }
111 
112     /**
113      * Get the XMLC document associated with this object.
114      */
115     public XMLCDocument getXMLCDocument() {
116         return fXmlcDoc;
117     }
118 
119     /**
120      * Determine if an element name is a frameset-only element.
121      */
122     private boolean isFrameSetElement(String   tagName) {
123         return tagName.equalsIgnoreCase("frameset")
124             || tagName.equalsIgnoreCase("noframes");
125 
126     }
127 
128     /**
129      * Start a new Element.
130      */
131     public void startElement(String   tagName) {
132         // Document element already exists
133         if (tagName.equals("html")) {
134             fCurrentNode = fDocument.getDocumentElement();
135             fGotDocElement = true;
136         } else {
137             Element   element = fDocument.createElement(tagName);
138             fCurrentNode.appendChild(element);
139             fCurrentNode = element;
140         }
141 
142         if (isFrameSetElement(tagName)) {
143             fXmlcDoc.setIsHtmlFrameSet();
144         }
145     }
146     
147     /**
148      * Add an attribute to the element on the top of the
149      * stack.
150      */
151     public void addAttribute(String   name, String   value) {
152         ((Element  )fCurrentNode).setAttribute(name, value);
153     }
154 
155     /**
156      * Finish the element being constructed. 
157      */
158     public void finishElement() {
159         if (fCurrentNode == null) {
160             throw new XMLCError("node stack underflow; malformed document");
161         }
162         if (!(fCurrentNode instanceof Element  )) {
163             throw new XMLCError("DOM node top of stack not a element for end tag");
164         }
165         fCurrentNode = fCurrentNode.getParentNode();
166     }
167 
168     /**
169      * Add a <code>Text</code> node.
170      */
171     public void addTextNode(String   data) {
172         if (fDocument == null) {
173             docNotCreatedError();
174         }
175         fCurrentNode.appendChild(fDocument.createTextNode(data));
176     }
177 
178     /**
179      * Add a <code>Comment</code> node.
180      */
181     public void addComment(String   data) {
182         Comment   comment = fDocument.createComment(data);
183         // Handle insertion before document element (current should always
184         // be document, but we might be handling some invalid node).
185         if ((!fGotDocElement) && (fCurrentNode == fDocument)) {
186             fCurrentNode.insertBefore(comment, fDocument.getDocumentElement());
187         } else {
188             fCurrentNode.appendChild(comment);
189         }
190     }
191 
192     /**
193      * Get the node on the top of the stack during parsing.
194      * FIXME: Added to work around bugs in the swing parser.
195      */
196     public Node   getCurrentNode() {
197         return fCurrentNode;
198     }
199 
200     /**
201      * Pop the current node off of the stack.  This is *only* used
202      * during error recover from a broken parser.
203      * FIXME: Added to work around bugs in the swing parser.
204      */
205     public void popCurrentNode() {
206         fCurrentNode = fCurrentNode.getParentNode();
207     }
208 
209     /**
210      * Recursive part of findUnrecognizedTag
211      */
212     private Node   recursiveFindUnrecognizedTag(String   tagNameUpper,
213                                               Node   parent) {
214         // Search right to left.
215         for (Node   child = parent.getLastChild(); child != null;
216              child = child.getPreviousSibling()) {
217             if (child.getNodeName().equals(tagNameUpper)
218                 && !fClosedUnrecognizedElements.contains(child)) {
219                 return child;  // Found it!
220             }
221         }
222         
223         // Search up the tree.
224         Node   grandParent = parent.getParentNode();
225         if (grandParent != null) {
226             return recursiveFindUnrecognizedTag(tagNameUpper, grandParent);
227         } else {
228             return null;
229         }
230     }
231 
232     /**
233      * Find the element for an unrecognized tag.  This searches up the parse
234      * stack, looking at the siblings of each node on the stack.  This starts
235      * with the parent of the top of the stack, and searches its children from
236      * right to left.  Thus the first node checked is node on the top of the
237      * stack.
238      */
239     private Node   findUnrecognizedTag(String   tagNameUpper) throws XMLCException {
240         Node   openingElement = null;
241         if (fCurrentNode != null) {
242             openingElement = recursiveFindUnrecognizedTag(tagNameUpper,
243                                                           fCurrentNode);
244         }
245         if (openingElement == null) {
246             throw new XMLCException("could not find matching opening tag for </"
247                                     + tagNameUpper + ">");
248         }
249         if (openingElement.getFirstChild() != null) {
250             throw new XMLCError("attempt to fix nesting for </"
251                                 + tagNameUpper
252                                 + "> found a node that already has children");
253         }
254         return openingElement;
255     }
256 
257     /**
258      * Make nodes to the right of an element the element's children.
259      */
260     private void makeRightSiblingsChildren(Node   openingElement) {
261         Node   parent = openingElement.getParentNode();
262 
263         Node   sibling;
264         while ((sibling = openingElement.getNextSibling()) != null) {
265             openingElement.appendChild(sibling);
266         }
267     }
268 
269     /**
270      * Used to correct nesting when handling an unknown tag.  This is called
271      * when the end tag is encountered. The tree is walked backwards from the
272      * top of the stack to find the element pushed for the open tag.  All of
273      * the siblings to the right of that element are moved to be children of
274      * the element.  The stack is popped back until the parent of the
275      * element being closed is on top.  This was put in to support the
276      * swing parser.
277      */
278     public void fixUnrecognizedTagNesting(String   tagName)
279         throws XMLCException {
280         String   tagNameUpper = tagName.toUpperCase();
281         if (fClosedUnrecognizedElements == null) {
282             fClosedUnrecognizedElements = new HashSet  ();
283         }
284 
285         // Find and correct
286         Node   openingElement = findUnrecognizedTag(tagNameUpper);
287         makeRightSiblingsChildren(openingElement);
288         fClosedUnrecognizedElements.add(openingElement);
289 
290         // Clean up the stack
291         Node   openingParent = openingElement.getParentNode();
292         while (fCurrentNode != openingParent) {
293             popCurrentNode();
294         }
295     }
296 }
297
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags