KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > htmlcleaner > HtmlCleanerFactory


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.htmlcleaner;
17
18 import org.xml.sax.InputSource JavaDoc;
19 import org.w3c.dom.*;
20
21 import javax.xml.parsers.DocumentBuilderFactory JavaDoc;
22 import javax.xml.parsers.DocumentBuilder JavaDoc;
23 import java.util.ArrayList JavaDoc;
24
25 /**
26  * Builds a {@link HtmlCleanerTemplate} based on a XML configuration.
27  * The configuration describes such things as which elements and
28  * attributes should be kept, or how wide the output should be.
29  * See the example config files.
30  *
31  * <p>Instances of this class are not thread safe and not reusable,
32  * in other words construct a new HtmlCleanerFactory each time you
33  * need it.
34  */

35 public class HtmlCleanerFactory {
36     private boolean handledCleanup = false;
37     private boolean handledSerialization = false;
38     HtmlCleanerTemplate template = new HtmlCleanerTemplate();
39
40     public HtmlCleanerTemplate buildTemplate(InputSource JavaDoc is) throws Exception JavaDoc {
41         DocumentBuilderFactory JavaDoc dbf = DocumentBuilderFactory.newInstance();
42         dbf.setNamespaceAware(true);
43         DocumentBuilder JavaDoc db = dbf.newDocumentBuilder();
44         Document document = db.parse(is);
45         document.normalize();
46
47         Element docEl = document.getDocumentElement();
48         if (!(docEl.getLocalName().equals("htmlcleaner") && docEl.getNamespaceURI() == null)) {
49             throw new Exception JavaDoc("Htmlcleaner config file should have root elemnet 'htmlcleaner'.");
50         }
51
52         NodeList nodeList = docEl.getChildNodes();
53         for (int i = 0; i < nodeList.getLength(); i++) {
54             Node node = nodeList.item(i);
55
56             if (node instanceof Element) {
57                 if (node.getNamespaceURI() == null && node.getLocalName().equals("cleanup")) {
58                     handleCleanupNode((Element)node);
59                 } else if (node.getNamespaceURI() == null && node.getLocalName().equals("serialization")) {
60                     handleSerializationNode((Element)node);
61                 } else {
62                     throw new Exception JavaDoc("Error in htmlcleaner config: unexpected element: " + node.getNodeName());
63                 }
64             }
65         }
66         template.initialize();
67         return template;
68     }
69
70     private void handleCleanupNode(Element cleanupEl) throws Exception JavaDoc {
71         if (handledCleanup)
72             throw new Exception JavaDoc("Error in htmlcleaner config: cleanup element is only allowed once");
73         handledCleanup = true;
74
75         NodeList cleanupNodes = cleanupEl.getChildNodes();
76         for (int k = 0; k < cleanupNodes.getLength(); k++) {
77             Node node = cleanupNodes.item(k);
78             if (node instanceof Element) {
79                 if (node.getNamespaceURI() == null && node.getLocalName().equals("allowed-span-classes")) {
80                     String JavaDoc[] classes = getClassChildren((Element)node);
81                     for (int z = 0; z < classes.length; z++)
82                         template.addAllowedSpanClass(classes[z]);
83                 } else if (node.getNamespaceURI() == null && node.getLocalName().equals("allowed-div-classes")) {
84                     String JavaDoc[] classes = getClassChildren((Element)node);
85                     for (int z = 0; z < classes.length; z++)
86                         template.addAllowedDivClass(classes[z]);
87                 } else if (node.getNamespaceURI() == null && node.getLocalName().equals("allowed-para-classes")) {
88                     String JavaDoc[] classes = getClassChildren((Element)node);
89                     for (int z = 0; z < classes.length; z++)
90                         template.addAllowedParaClass(classes[z]);
91                 } else if (node.getNamespaceURI() == null && node.getLocalName().equals("allowed-pre-classes")) {
92                     String JavaDoc[] classes = getClassChildren((Element)node);
93                     for (int z = 0; z < classes.length; z++)
94                         template.addAllowedPreClass(classes[z]);
95                 } else if (node.getNamespaceURI() == null && node.getLocalName().equals("allowed-elements")) {
96                     handleAllowedElementsNode((Element)node);
97                 } else if (node.getNamespaceURI() == null && node.getLocalName().equals("img-alternate-src-attr")) {
98                     String JavaDoc name = ((Element)node).getAttribute("name");
99                     if (name.equals(""))
100                         throw new Exception JavaDoc("Error in htmlcleaner config: missing name attribute on img-alternate-src-attr");
101                     template.setImgAlternateSrcAttr(name);
102                 } else if (node.getNamespaceURI() == null && node.getLocalName().equals("link-alternate-href-attr")) {
103                     String JavaDoc name = ((Element)node).getAttribute("name");
104                     if (name.equals(""))
105                         throw new Exception JavaDoc("Error in htmlcleaner config: missing name attribute on link-alternate-href-attr");
106                     template.setLinkAlternateHrefAttr(name);
107                 } else {
108                     throw new Exception JavaDoc("Error in htmlcleaner config: unexpected element " + node.getNodeName() + " inside " + cleanupEl.getNodeName());
109                 }
110             }
111         }
112
113     }
114
115     private String JavaDoc[] getClassChildren(Element element) throws Exception JavaDoc {
116         ArrayList JavaDoc classes = new ArrayList JavaDoc();
117         NodeList nodeList = element.getChildNodes();
118         for (int i = 0; i < nodeList.getLength(); i++) {
119             Node node = nodeList.item(i);
120             if (node instanceof Element) {
121                 if (node.getNamespaceURI() == null && node.getLocalName().equals("class")) {
122                     Node text = node.getFirstChild();
123                     if (text instanceof Text) {
124                         classes.add(((Text)text).getData());
125                     } else {
126                         throw new Exception JavaDoc("Error in htmlcleaner: element class does not have a text node child");
127                     }
128                 } else {
129                     throw new Exception JavaDoc("Error in htmlcleaner config: unexpected element: " + node.getNodeName() + " as child of " + element.getNodeName());
130                 }
131             }
132         }
133         return (String JavaDoc[])classes.toArray(new String JavaDoc[classes.size()]);
134     }
135
136     private void handleAllowedElementsNode(Element element) throws Exception JavaDoc {
137         NodeList children = element.getChildNodes();
138         for (int i = 0; i < children.getLength(); i++) {
139             Node node = children.item(i);
140
141             if (node instanceof Element) {
142                 if (node.getNamespaceURI() == null && node.getLocalName().equals("element")) {
143                     String JavaDoc name = ((Element)node).getAttribute("name");
144                     if (name.equals(""))
145                         throw new Exception JavaDoc("Error in htmlcleaner config: missing name attribute on 'element' element");
146                     String JavaDoc[] attributes = getAttributeChildren((Element)node);
147                     template.addAllowedElement(name, attributes);
148                 } else {
149                     throw new Exception JavaDoc("Error in htmlcleaner config: unexpected element: '" + node.getNodeName() + "' as child of " + element.getNodeName());
150                 }
151             }
152         }
153     }
154
155     private String JavaDoc[] getAttributeChildren(Element element) throws Exception JavaDoc {
156         ArrayList JavaDoc names = new ArrayList JavaDoc();
157         NodeList children = element.getChildNodes();
158         for (int i = 0; i < children.getLength(); i++) {
159             Node node = children.item(i);
160             if (node instanceof Element) {
161                 if (node.getNamespaceURI() == null && node.getLocalName().equals("attribute")) {
162                     String JavaDoc name = ((Element)node).getAttribute("name");
163                     if (name.equals(""))
164                         throw new Exception JavaDoc("Error in htmlcleaner config: missing name attribute on attribute element");
165                     names.add(name);
166                 } else {
167                     throw new Exception JavaDoc("Error in htmlcleaner config: unexpected element: '" + node.getNodeName() + "' as child of " + element.getNodeName());
168                 }
169             }
170         }
171         return (String JavaDoc[])names.toArray(new String JavaDoc[names.size()]);
172     }
173
174     private void handleSerializationNode(Element element) throws Exception JavaDoc {
175         if (handledSerialization)
176             throw new Exception JavaDoc("Error in htmlcleaner config: serialization element is only allowed once");
177         handledSerialization = true;
178
179         NodeList children = element.getChildNodes();
180         for (int i = 0; i < children.getLength(); i++) {
181             Node node = children.item(i);
182             if (node instanceof Element) {
183                 if (node.getNamespaceURI() == null && node.getLocalName().equals("linewidth")) {
184                     String JavaDoc value = ((Element)node).getAttribute("value");
185                     if (value.equals(""))
186                         throw new Exception JavaDoc("Error in htmlcleaner config: missing value attribute on linewidth element.");
187                     int intValue = Integer.parseInt(value);
188                     template.setMaxLineWidth(intValue);
189                 } else if (node.getNamespaceURI() == null && node.getLocalName().equals("elements")) {
190                     handleElementsNode((Element)node);
191                 } else {
192                     throw new Exception JavaDoc("Error in htmlcleaner config: unexpected element '" + node.getNodeName() + "' as child of " + element.getNodeName());
193                 }
194             }
195         }
196     }
197
198     private void handleElementsNode(Element element) throws Exception JavaDoc {
199         NodeList children = element.getChildNodes();
200         for (int i = 0; i < children.getLength(); i++) {
201             Node node = children.item(i);
202             if (node instanceof Element) {
203                 if (node.getNamespaceURI() == null && node.getLocalName().equals("element")) {
204                     Element childEl = (Element)node;
205                     String JavaDoc name = childEl.getAttribute("name");
206                     if (name.equals(""))
207                         throw new Exception JavaDoc("Error in htmlcleaner config: missing name attribute on 'element' element.");
208                     String JavaDoc beforeOpenAttr = childEl.getAttribute("beforeOpen");
209                     String JavaDoc afterOpenAttr = childEl.getAttribute("afterOpen");
210                     String JavaDoc beforeCloseAttr = childEl.getAttribute("beforeClose");
211                     String JavaDoc afterCloseAttr = childEl.getAttribute("afterClose");
212                     int beforeOpen = 0, afterOpen = 0, beforeClose = 0, afterClose = 0;
213                     if (!beforeOpenAttr.equals(""))
214                         beforeOpen = Integer.parseInt(beforeOpenAttr);
215                     if (!afterOpenAttr.equals(""))
216                         afterOpen = Integer.parseInt(afterOpenAttr);
217                     if (!beforeCloseAttr.equals(""))
218                         beforeClose = Integer.parseInt(beforeCloseAttr);
219                     if (!afterCloseAttr.equals(""))
220                         afterClose = Integer.parseInt(afterCloseAttr);
221                     boolean inline = "true".equals(childEl.getAttribute("inline"));
222                     template.addOutputElement(name, beforeOpen, afterOpen, beforeClose, afterClose, inline);
223                 }
224             }
225         }
226     }
227 }
228
Popular Tags