1 23 24 package org.enhydra.xml.xmlc.html.parsers; 25 26 import java.util.HashSet ; 27 28 import org.enhydra.xml.xmlc.XMLCError; 29 import org.enhydra.xml.xmlc.XMLCException; 30 import org.enhydra.xml.xmlc.dom.XMLCDocument; 31 import org.enhydra.xml.xmlc.dom.XMLCDomFactory; 32 import org.w3c.dom.Comment ; 33 import org.w3c.dom.Document ; 34 import org.w3c.dom.Element ; 35 import org.w3c.dom.Node ; 36 import org.w3c.dom.html.HTMLDocument; 37 import org.xml.sax.InputSource ; 38 39 46 public class HTMLDocBuilder { 47 50 private XMLCDocument fXmlcDoc; 51 52 55 private XMLCDomFactory fDomFactory; 56 57 60 private HTMLDocument fDocument; 61 62 67 private boolean fGotDocElement; 68 69 73 private Node fCurrentNode; 74 75 79 private HashSet fClosedUnrecognizedElements = null; 80 81 84 public HTMLDocBuilder(XMLCDomFactory domFactory, 85 InputSource input) throws XMLCException { 86 fXmlcDoc = new XMLCDocument(domFactory); 87 fDomFactory = domFactory; 88 Document doc = fXmlcDoc.createDocument(null, null); 89 if (!(doc instanceof HTMLDocument)) { 90 throw new XMLCException("DOM factory (" 91 + fDomFactory.getClass().getName() 92 + ") created a document that was not a HTMLDocument, got " 93 + doc.getClass().getName()); 94 } 95 fDocument = (HTMLDocument)doc; 96 fCurrentNode = fDocument; 97 98 String encoding = input.getEncoding(); 99 if (encoding != null) { 100 fXmlcDoc.setEncoding(encoding); 101 } 102 } 103 104 108 private void docNotCreatedError() { 109 throw new XMLCError("Bug: parser event on document contents occured before document is created"); 110 } 111 112 115 public XMLCDocument getXMLCDocument() { 116 return fXmlcDoc; 117 } 118 119 122 private boolean isFrameSetElement(String tagName) { 123 return tagName.equalsIgnoreCase("frameset") 124 || tagName.equalsIgnoreCase("noframes"); 125 126 } 127 128 131 public void startElement(String tagName) { 132 if (tagName.equals("html")) { 134 fCurrentNode = fDocument.getDocumentElement(); 135 fGotDocElement = true; 136 } else { 137 Element element = fDocument.createElement(tagName); 138 fCurrentNode.appendChild(element); 139 fCurrentNode = element; 140 } 141 142 if (isFrameSetElement(tagName)) { 143 fXmlcDoc.setIsHtmlFrameSet(); 144 } 145 } 146 147 151 public void addAttribute(String name, String value) { 152 ((Element )fCurrentNode).setAttribute(name, value); 153 } 154 155 158 public void finishElement() { 159 if (fCurrentNode == null) { 160 throw new XMLCError("node stack underflow; malformed document"); 161 } 162 if (!(fCurrentNode instanceof Element )) { 163 throw new XMLCError("DOM node top of stack not a element for end tag"); 164 } 165 fCurrentNode = fCurrentNode.getParentNode(); 166 } 167 168 171 public void addTextNode(String data) { 172 if (fDocument == null) { 173 docNotCreatedError(); 174 } 175 fCurrentNode.appendChild(fDocument.createTextNode(data)); 176 } 177 178 181 public void addComment(String data) { 182 Comment comment = fDocument.createComment(data); 183 if ((!fGotDocElement) && (fCurrentNode == fDocument)) { 186 fCurrentNode.insertBefore(comment, fDocument.getDocumentElement()); 187 } else { 188 fCurrentNode.appendChild(comment); 189 } 190 } 191 192 196 public Node getCurrentNode() { 197 return fCurrentNode; 198 } 199 200 205 public void popCurrentNode() { 206 fCurrentNode = fCurrentNode.getParentNode(); 207 } 208 209 212 private Node recursiveFindUnrecognizedTag(String tagNameUpper, 213 Node parent) { 214 for (Node child = parent.getLastChild(); child != null; 216 child = child.getPreviousSibling()) { 217 if (child.getNodeName().equals(tagNameUpper) 218 && !fClosedUnrecognizedElements.contains(child)) { 219 return child; } 221 } 222 223 Node grandParent = parent.getParentNode(); 225 if (grandParent != null) { 226 return recursiveFindUnrecognizedTag(tagNameUpper, grandParent); 227 } else { 228 return null; 229 } 230 } 231 232 239 private Node findUnrecognizedTag(String tagNameUpper) throws XMLCException { 240 Node openingElement = null; 241 if (fCurrentNode != null) { 242 openingElement = recursiveFindUnrecognizedTag(tagNameUpper, 243 fCurrentNode); 244 } 245 if (openingElement == null) { 246 throw new XMLCException("could not find matching opening tag for </" 247 + tagNameUpper + ">"); 248 } 249 if (openingElement.getFirstChild() != null) { 250 throw new XMLCError("attempt to fix nesting for </" 251 + tagNameUpper 252 + "> found a node that already has children"); 253 } 254 return openingElement; 255 } 256 257 260 private void makeRightSiblingsChildren(Node openingElement) { 261 Node parent = openingElement.getParentNode(); 262 263 Node sibling; 264 while ((sibling = openingElement.getNextSibling()) != null) { 265 openingElement.appendChild(sibling); 266 } 267 } 268 269 278 public void fixUnrecognizedTagNesting(String tagName) 279 throws XMLCException { 280 String tagNameUpper = tagName.toUpperCase(); 281 if (fClosedUnrecognizedElements == null) { 282 fClosedUnrecognizedElements = new HashSet (); 283 } 284 285 Node openingElement = findUnrecognizedTag(tagNameUpper); 287 makeRightSiblingsChildren(openingElement); 288 fClosedUnrecognizedElements.add(openingElement); 289 290 Node openingParent = openingElement.getParentNode(); 292 while (fCurrentNode != openingParent) { 293 popCurrentNode(); 294 } 295 } 296 } 297 | Popular Tags |