KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > cyberneko > html > filters > ElementRemover


1 /*
2  * (C) Copyright 2002-2005, Andy Clark. All rights reserved.
3  *
4  * This file is distributed under an Apache style license. Please
5  * refer to the LICENSE file for specific details.
6  */

7
8 package org.cyberneko.html.filters;
9
10 import java.util.Hashtable JavaDoc;
11
12 import org.apache.xerces.xni.Augmentations;
13 import org.apache.xerces.xni.NamespaceContext;
14 import org.apache.xerces.xni.QName;
15 import org.apache.xerces.xni.XMLAttributes;
16 import org.apache.xerces.xni.XMLLocator;
17 import org.apache.xerces.xni.XMLResourceIdentifier;
18 import org.apache.xerces.xni.XMLString;
19 import org.apache.xerces.xni.XNIException;
20
21 /**
22  * This class is a document filter capable of removing specified
23  * elements from the processing stream. There are two options for
24  * processing document elements:
25  * <ul>
26  * <li>specifying those elements which should be accepted and,
27  * optionally, which attributes of that element should be
28  * kept; and
29  * <li>specifying those elements whose tags and content should be
30  * completely removed from the event stream.
31  * </ul>
32  * <p>
33  * The first option allows the application to specify which elements
34  * appearing in the event stream should be accepted and, therefore,
35  * passed on to the next stage in the pipeline. All elements
36  * <em>not</em> in the list of acceptable elements have their start
37  * and end tags stripped from the event stream <em>unless</em> those
38  * elements appear in the list of elements to be removed.
39  * <p>
40  * The second option allows the application to specify which elements
41  * should be completely removed from the event stream. When an element
42  * appears that is to be removed, the element's start and end tag as
43  * well as all of that element's content is removed from the event
44  * stream.
45  * <p>
46  * A common use of this filter would be to only allow rich-text
47  * and linking elements as well as the character content to pass
48  * through the filter &mdash; all other elements would be stripped.
49  * The following code shows how to configure this filter to perform
50  * this task:
51  * <pre>
52  * ElementRemover remover = new ElementRemover();
53  * remover.acceptElement("b", null);
54  * remover.acceptElement("i", null);
55  * remover.acceptElement("u", null);
56  * remover.acceptElement("a", new String[] { "href" });
57  * </pre>
58  * <p>
59  * However, this would still allow the text content of other
60  * elements to pass through, which may not be desirable. In order
61  * to further "clean" the input, the <code>removeElement</code>
62  * option can be used. The following piece of code adds the ability
63  * to completely remove any &lt;SCRIPT&gt; tags and content
64  * from the stream.
65  * <pre>
66  * remover.removeElement("script");
67  * </pre>
68  * <p>
69  * <strong>Note:</strong>
70  * All text and accepted element children of a stripped element is
71  * retained. To completely remove an element's content, use the
72  * <code>removeElement</code> method.
73  * <p>
74  * <strong>Note:</strong>
75  * Care should be taken when using this filter because the output
76  * may not be a well-balanced tree. Specifically, if the application
77  * removes the &lt;HTML&gt; element (with or without retaining its
78  * children), the resulting document event stream will no longer be
79  * well-formed.
80  *
81  * @author Andy Clark
82  *
83  * @version $Id: ElementRemover.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
84  */

85 public class ElementRemover
86     extends DefaultFilter {
87
88     //
89
// Constants
90
//
91

92     /** A "null" object. */
93     protected static final Object JavaDoc NULL = new Object JavaDoc();
94
95     //
96
// Data
97
//
98

99     // information
100

101     /** Accepted elements. */
102     protected Hashtable JavaDoc fAcceptedElements = new Hashtable JavaDoc();
103
104     /** Removed elements. */
105     protected Hashtable JavaDoc fRemovedElements = new Hashtable JavaDoc();
106
107     // state
108

109     /** The element depth. */
110     protected int fElementDepth;
111
112     /** The element depth at element removal. */
113     protected int fRemovalElementDepth;
114
115     //
116
// Public methods
117
//
118

119     /**
120      * Specifies that the given element should be accepted and, optionally,
121      * which attributes of that element should be kept.
122      *
123      * @param element The element to accept.
124      * @param attributes The list of attributes to be kept or null if no
125      * attributes should be kept for this element.
126      *
127      * see #removeElement
128      */

129     public void acceptElement(String JavaDoc element, String JavaDoc[] attributes) {
130         Object JavaDoc key = element.toLowerCase();
131         Object JavaDoc value = NULL;
132         if (attributes != null) {
133             String JavaDoc[] newarray = new String JavaDoc[attributes.length];
134             for (int i = 0; i < attributes.length; i++) {
135                 newarray[i] = attributes[i].toLowerCase();
136             }
137             value = attributes;
138         }
139         fAcceptedElements.put(key, value);
140     } // acceptElement(String,String[])
141

142     /**
143      * Specifies that the given element should be completely removed. If an
144      * element is encountered during processing that is on the remove list,
145      * the element's start and end tags as well as all of content contained
146      * within the element will be removed from the processing stream.
147      *
148      * @param element The element to completely remove.
149      */

150     public void removeElement(String JavaDoc element) {
151         Object JavaDoc key = element.toLowerCase();
152         Object JavaDoc value = NULL;
153         fRemovedElements.put(key, value);
154     } // removeElement(String)
155

156     //
157
// XMLDocumentHandler methods
158
//
159

160     // since Xerces-J 2.2.0
161

162     /** Start document. */
163     public void startDocument(XMLLocator locator, String JavaDoc encoding,
164                               NamespaceContext nscontext, Augmentations augs)
165         throws XNIException {
166         fElementDepth = 0;
167         fRemovalElementDepth = Integer.MAX_VALUE;
168         super.startDocument(locator, encoding, nscontext, augs);
169     } // startDocument(XMLLocator,String,NamespaceContext,Augmentations)
170

171     // old methods
172

173     /** Start document. */
174     public void startDocument(XMLLocator locator, String JavaDoc encoding, Augmentations augs)
175         throws XNIException {
176         startDocument(locator, encoding, null, augs);
177     } // startDocument(XMLLocator,String,Augmentations)
178

179     /** Start prefix mapping. */
180     public void startPrefixMapping(String JavaDoc prefix, String JavaDoc uri, Augmentations augs)
181         throws XNIException {
182         if (fElementDepth <= fRemovalElementDepth) {
183             super.startPrefixMapping(prefix, uri, augs);
184         }
185     } // startPrefixMapping(String,String,Augmentations)
186

187     /** Start element. */
188     public void startElement(QName element, XMLAttributes attributes, Augmentations augs)
189         throws XNIException {
190         if (fElementDepth <= fRemovalElementDepth && handleOpenTag(element, attributes)) {
191             super.startElement(element, attributes, augs);
192         }
193         fElementDepth++;
194     } // startElement(QName,XMLAttributes,Augmentations)
195

196     /** Empty element. */
197     public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs)
198         throws XNIException {
199         if (fElementDepth <= fRemovalElementDepth && handleOpenTag(element, attributes)) {
200             super.emptyElement(element, attributes, augs);
201         }
202     } // emptyElement(QName,XMLAttributes,Augmentations)
203

204     /** Comment. */
205     public void comment(XMLString text, Augmentations augs)
206         throws XNIException {
207         if (fElementDepth <= fRemovalElementDepth) {
208             super.comment(text, augs);
209         }
210     } // comment(XMLString,Augmentations)
211

212     /** Processing instruction. */
213     public void processingInstruction(String JavaDoc target, XMLString data, Augmentations augs)
214         throws XNIException {
215         if (fElementDepth <= fRemovalElementDepth) {
216             super.processingInstruction(target, data, augs);
217         }
218     } // processingInstruction(String,XMLString,Augmentations)
219

220     /** Characters. */
221     public void characters(XMLString text, Augmentations augs)
222         throws XNIException {
223         if (fElementDepth <= fRemovalElementDepth) {
224             super.characters(text, augs);
225         }
226     } // characters(XMLString,Augmentations)
227

228     /** Ignorable whitespace. */
229     public void ignorableWhitespace(XMLString text, Augmentations augs)
230         throws XNIException {
231         if (fElementDepth <= fRemovalElementDepth) {
232             super.ignorableWhitespace(text, augs);
233         }
234     } // ignorableWhitespace(XMLString,Augmentations)
235

236     /** Start general entity. */
237     public void startGeneralEntity(String JavaDoc name, XMLResourceIdentifier id, String JavaDoc encoding, Augmentations augs)
238         throws XNIException {
239         if (fElementDepth <= fRemovalElementDepth) {
240             super.startGeneralEntity(name, id, encoding, augs);
241         }
242     } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
243

244     /** Text declaration. */
245     public void textDecl(String JavaDoc version, String JavaDoc encoding, Augmentations augs)
246         throws XNIException {
247         if (fElementDepth <= fRemovalElementDepth) {
248             super.textDecl(version, encoding, augs);
249         }
250     } // textDecl(String,String,Augmentations)
251

252     /** End general entity. */
253     public void endGeneralEntity(String JavaDoc name, Augmentations augs)
254         throws XNIException {
255         if (fElementDepth <= fRemovalElementDepth) {
256             super.endGeneralEntity(name, augs);
257         }
258     } // endGeneralEntity(String,Augmentations)
259

260     /** Start CDATA section. */
261     public void startCDATA(Augmentations augs) throws XNIException {
262         if (fElementDepth <= fRemovalElementDepth) {
263             super.startCDATA(augs);
264         }
265     } // startCDATA(Augmentations)
266

267     /** End CDATA section. */
268     public void endCDATA(Augmentations augs) throws XNIException {
269         if (fElementDepth <= fRemovalElementDepth) {
270             super.endCDATA(augs);
271         }
272     } // endCDATA(Augmentations)
273

274     /** End element. */
275     public void endElement(QName element, Augmentations augs)
276         throws XNIException {
277         if (fElementDepth <= fRemovalElementDepth && elementAccepted(element.rawname)) {
278             super.endElement(element, augs);
279         }
280         fElementDepth--;
281         if (fElementDepth == fRemovalElementDepth) {
282             fRemovalElementDepth = Integer.MAX_VALUE;
283         }
284     } // endElement(QName,Augmentations)
285

286     /** End prefix mapping. */
287     public void endPrefixMapping(String JavaDoc prefix, Augmentations augs)
288         throws XNIException {
289         if (fElementDepth <= fRemovalElementDepth) {
290             super.endPrefixMapping(prefix, augs);
291         }
292     } // endPrefixMapping(String,Augmentations)
293

294     //
295
// Protected methods
296
//
297

298     /** Returns true if the specified element is accepted. */
299     protected boolean elementAccepted(String JavaDoc element) {
300         Object JavaDoc key = element.toLowerCase();
301         return fAcceptedElements.containsKey(key);
302     } // elementAccepted(String):boolean
303

304     /** Returns true if the specified element should be removed. */
305     protected boolean elementRemoved(String JavaDoc element) {
306         Object JavaDoc key = element.toLowerCase();
307         return fRemovedElements.containsKey(key);
308     } // elementRemoved(String):boolean
309

310     /** Handles an open tag. */
311     protected boolean handleOpenTag(QName element, XMLAttributes attributes) {
312         if (elementAccepted(element.rawname)) {
313             Object JavaDoc key = element.rawname.toLowerCase();
314             Object JavaDoc value = fAcceptedElements.get(key);
315             if (value != NULL) {
316                 String JavaDoc[] anames = (String JavaDoc[])value;
317                 int attributeCount = attributes.getLength();
318                 LOOP: for (int i = 0; i < attributeCount; i++) {
319                     String JavaDoc aname = attributes.getQName(i).toLowerCase();
320                     for (int j = 0; j < anames.length; j++) {
321                         if (anames[j].equals(aname)) {
322                             continue LOOP;
323                         }
324                     }
325                     attributes.removeAttributeAt(i--);
326                     attributeCount--;
327                 }
328             }
329             else {
330                 attributes.removeAllAttributes();
331             }
332             return true;
333         }
334         else if (elementRemoved(element.rawname)) {
335             fRemovalElementDepth = fElementDepth;
336         }
337         return false;
338     } // handleOpenTag(QName,XMLAttributes):boolean
339

340 } // class DefaultFilter
341
Popular Tags