KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > htmlcleaner > XhtmlDescriptorBuilder


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.htmlcleaner;
17
18 import org.cyberneko.dtd.parsers.DOMParser;
19 import org.xml.sax.InputSource JavaDoc;
20 import org.w3c.dom.Document JavaDoc;
21 import org.w3c.dom.Element JavaDoc;
22 import org.w3c.dom.NodeList JavaDoc;
23 import org.w3c.dom.Node JavaDoc;
24 import org.w3c.dom.traversal.DocumentTraversal;
25 import org.w3c.dom.traversal.NodeFilter;
26 import org.w3c.dom.traversal.TreeWalker;
27
28 import java.net.URL JavaDoc;
29 import java.util.ArrayList JavaDoc;
30 import java.util.HashMap JavaDoc;
31 import java.util.Map JavaDoc;
32
33 class XhtmlDescriptorBuilder {
34     /**
35      * Returns a Map containing ElementDescriptors for all elements in the XHTML strict DTD.
36      */

37     public Map JavaDoc build() throws Exception JavaDoc {
38         DOMParser parser = new DOMParser();
39
40         URL JavaDoc dtdURL = getClass().getClassLoader().getResource("org/outerj/daisy/htmlcleaner/xhtml1-strict.dtd");
41
42         InputSource JavaDoc inputSource = new InputSource JavaDoc();
43         inputSource.setByteStream(dtdURL.openStream());
44         inputSource.setSystemId(dtdURL.toExternalForm());
45
46         parser.parse(inputSource);
47         Document JavaDoc document = parser.getDocument();
48
49         Element JavaDoc dtdElement = document.getDocumentElement();
50         Element JavaDoc externalSubset = findChildElement(dtdElement, "externalSubset");
51
52         // build ElementDescriptors for each element
53
Element JavaDoc[] elementDecls = findChildElements(externalSubset, "elementDecl");
54         HashMap JavaDoc elementDescriptors = new HashMap JavaDoc();
55         for (int i = 0; i < elementDecls.length; i++) {
56             Element JavaDoc elementDecl = elementDecls[i];
57             String JavaDoc name = elementDecl.getAttribute("ename");
58             elementDescriptors.put(name, new ElementDescriptor(name));
59         }
60
61         // add attribute information to them
62
Element JavaDoc[] attlists = findChildElements(externalSubset, "attlist");
63         for (int i = 0; i < attlists.length; i++) {
64             Element JavaDoc attlist = attlists[i];
65             String JavaDoc ename = attlist.getAttribute("ename");
66             Element JavaDoc[] attributeDecls = findChildElements(attlist, "attributeDecl");
67             ElementDescriptor descriptor = (ElementDescriptor)elementDescriptors.get(ename);
68             for (int j = 0; j < attributeDecls.length; j++) {
69                 String JavaDoc aname = attributeDecls[j].getAttribute("aname");
70                 if (!aname.equals("xmlns") && !aname.startsWith("xml:"))
71                     descriptor.addAttribute(aname);
72             }
73         }
74
75         // add child element information to them
76
Element JavaDoc[] contentModels = findChildElements(externalSubset, "contentModel");
77         for (int i = 0; i < contentModels.length; i++) {
78             Element JavaDoc contentModel = contentModels[i];
79
80             String JavaDoc ename = contentModel.getAttribute("ename");
81             Element JavaDoc[] elements = findDescendants(contentModel, "element");
82             ElementDescriptor descriptor = (ElementDescriptor)elementDescriptors.get(ename);
83             for (int j = 0; j < elements.length; j++) {
84                 String JavaDoc name = elements[j].getAttribute("name");
85                     descriptor.addChild(name);
86             }
87         }
88
89         return elementDescriptors;
90     }
91
92     private Element JavaDoc findChildElement(Element JavaDoc element, String JavaDoc name) {
93         NodeList JavaDoc children = element.getChildNodes();
94         for (int i = 0; i < children.getLength(); i++) {
95             Node JavaDoc node = children.item(i);
96             if (node instanceof Element JavaDoc && node.getNodeName().equals(name))
97                 return (Element JavaDoc)node;
98         }
99         throw new RuntimeException JavaDoc("Did not find expected element: " + name);
100     }
101
102     private Element JavaDoc[] findChildElements(Element JavaDoc element, String JavaDoc name) {
103         ArrayList JavaDoc foundElements = new ArrayList JavaDoc();
104         NodeList JavaDoc children = element.getChildNodes();
105         for (int i = 0; i < children.getLength(); i++) {
106             Node JavaDoc node = children.item(i);
107             if (node instanceof Element JavaDoc && node.getNodeName().equals(name))
108                 foundElements.add(node);
109         }
110         return (Element JavaDoc[])foundElements.toArray(new Element JavaDoc[foundElements.size()]);
111     }
112
113     private Element JavaDoc[] findDescendants(Element JavaDoc element, String JavaDoc name) {
114         ArrayList JavaDoc foundElements = new ArrayList JavaDoc();
115         TreeWalker walker = ((DocumentTraversal)element.getOwnerDocument()).createTreeWalker(element, NodeFilter.SHOW_ELEMENT, null, false);
116         while (walker.nextNode() != null) {
117             Element JavaDoc currentEl = (Element JavaDoc)walker.getCurrentNode();
118             if (currentEl.getNodeName().equals(name))
119                 foundElements.add(currentEl);
120         }
121         return (Element JavaDoc[])foundElements.toArray(new Element JavaDoc[foundElements.size()]);
122     }
123 }
124
Popular Tags