1 16 package org.outerj.daisy.htmlcleaner; 17 18 import org.cyberneko.dtd.parsers.DOMParser; 19 import org.xml.sax.InputSource ; 20 import org.w3c.dom.Document ; 21 import org.w3c.dom.Element ; 22 import org.w3c.dom.NodeList ; 23 import org.w3c.dom.Node ; 24 import org.w3c.dom.traversal.DocumentTraversal; 25 import org.w3c.dom.traversal.NodeFilter; 26 import org.w3c.dom.traversal.TreeWalker; 27 28 import java.net.URL ; 29 import java.util.ArrayList ; 30 import java.util.HashMap ; 31 import java.util.Map ; 32 33 class XhtmlDescriptorBuilder { 34 37 public Map build() throws Exception { 38 DOMParser parser = new DOMParser(); 39 40 URL dtdURL = getClass().getClassLoader().getResource("org/outerj/daisy/htmlcleaner/xhtml1-strict.dtd"); 41 42 InputSource inputSource = new InputSource (); 43 inputSource.setByteStream(dtdURL.openStream()); 44 inputSource.setSystemId(dtdURL.toExternalForm()); 45 46 parser.parse(inputSource); 47 Document document = parser.getDocument(); 48 49 Element dtdElement = document.getDocumentElement(); 50 Element externalSubset = findChildElement(dtdElement, "externalSubset"); 51 52 Element [] elementDecls = findChildElements(externalSubset, "elementDecl"); 54 HashMap elementDescriptors = new HashMap (); 55 for (int i = 0; i < elementDecls.length; i++) { 56 Element elementDecl = elementDecls[i]; 57 String name = elementDecl.getAttribute("ename"); 58 elementDescriptors.put(name, new ElementDescriptor(name)); 59 } 60 61 Element [] attlists = findChildElements(externalSubset, "attlist"); 63 for (int i = 0; i < attlists.length; i++) { 64 Element attlist = attlists[i]; 65 String ename = attlist.getAttribute("ename"); 66 Element [] attributeDecls = findChildElements(attlist, "attributeDecl"); 67 ElementDescriptor descriptor = (ElementDescriptor)elementDescriptors.get(ename); 68 for (int j = 0; j < attributeDecls.length; j++) { 69 String aname = attributeDecls[j].getAttribute("aname"); 70 if (!aname.equals("xmlns") && !aname.startsWith("xml:")) 71 descriptor.addAttribute(aname); 72 } 73 } 74 75 Element [] contentModels = findChildElements(externalSubset, "contentModel"); 77 for (int i = 0; i < contentModels.length; i++) { 78 Element contentModel = contentModels[i]; 79 80 String ename = contentModel.getAttribute("ename"); 81 Element [] elements = findDescendants(contentModel, "element"); 82 ElementDescriptor descriptor = (ElementDescriptor)elementDescriptors.get(ename); 83 for (int j = 0; j < elements.length; j++) { 84 String name = elements[j].getAttribute("name"); 85 descriptor.addChild(name); 86 } 87 } 88 89 return elementDescriptors; 90 } 91 92 private Element findChildElement(Element element, String name) { 93 NodeList children = element.getChildNodes(); 94 for (int i = 0; i < children.getLength(); i++) { 95 Node node = children.item(i); 96 if (node instanceof Element && node.getNodeName().equals(name)) 97 return (Element )node; 98 } 99 throw new RuntimeException ("Did not find expected element: " + name); 100 } 101 102 private Element [] findChildElements(Element element, String name) { 103 ArrayList foundElements = new ArrayList (); 104 NodeList children = element.getChildNodes(); 105 for (int i = 0; i < children.getLength(); i++) { 106 Node node = children.item(i); 107 if (node instanceof Element && node.getNodeName().equals(name)) 108 foundElements.add(node); 109 } 110 return (Element [])foundElements.toArray(new Element [foundElements.size()]); 111 } 112 113 private Element [] findDescendants(Element element, String name) { 114 ArrayList foundElements = new ArrayList (); 115 TreeWalker walker = ((DocumentTraversal)element.getOwnerDocument()).createTreeWalker(element, NodeFilter.SHOW_ELEMENT, null, false); 116 while (walker.nextNode() != null) { 117 Element currentEl = (Element )walker.getCurrentNode(); 118 if (currentEl.getNodeName().equals(name)) 119 foundElements.add(currentEl); 120 } 121 return (Element [])foundElements.toArray(new Element [foundElements.size()]); 122 } 123 } 124 | Popular Tags |