KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > services > htmlparser > TagRemover


1 package org.jahia.services.htmlparser;
2
3 import java.util.ArrayList JavaDoc;
4 import java.util.HashSet JavaDoc;
5 import java.util.Iterator JavaDoc;
6 import java.util.Set JavaDoc;
7
8 import org.w3c.dom.Document JavaDoc;
9 import org.w3c.dom.Node JavaDoc;
10 import org.w3c.dom.NodeList JavaDoc;
11
12 /**
13  *
14  * <p>Title: Html DOM Visitor, removing tags</p>
15  * <p>Description: </p>
16  * <p>Copyright: Copyright (c) 2002</p>
17  * <p>Company: </p>
18  * @author Khue Nguyen
19  * @version 1.0
20  */

21 public class TagRemover implements HtmlDOMVisitor{
22
23     private Set JavaDoc tagsToRemove = new HashSet JavaDoc();
24
25     public TagRemover(){}
26
27     /**
28      * let the visitor initiate itself
29      *
30      * @param siteId , current site, if -1, use default settings
31      * @return
32      */

33     public void init(int siteId){
34     }
35
36     public void addTag(String JavaDoc tag){
37         if ( tag != null && !this.tagsToRemove.contains(tag) ){
38             this.tagsToRemove.add(tag);
39         }
40     }
41
42     public void addTags(String JavaDoc[] tags){
43         if ( tags != null ){
44             int size = tags.length;
45             for ( int i=0; i<size; i++ ){
46                 this.addTag(tags[i]);
47             }
48         }
49     }
50
51     /**
52      * Remove all unwanted markups
53      *
54      * @param doc
55      * @return
56      */

57     public Document JavaDoc parseDOM(Document JavaDoc doc){
58         if ( doc != null && this.tagsToRemove.size()>0 ){
59             removeMarkups(doc.getDocumentElement(),this.tagsToRemove);
60         }
61         return doc;
62     }
63
64     /**
65      * Filter out unwanted markups
66      *
67      * @param node, the stating node
68      * @param markups, the set of the names of the markups to remove
69      */

70     private void removeMarkups(Node JavaDoc node, Set JavaDoc markups){
71
72         if ( markups == null || node == null ){
73             return;
74         }
75
76         boolean toRemove = matchMarkup(node.getNodeName(),markups);
77         Node JavaDoc parentNode = node.getParentNode();
78
79         NodeList JavaDoc childs = node.getChildNodes();
80         ArrayList JavaDoc al = new ArrayList JavaDoc();
81
82         int count = childs.getLength();
83         for ( int i=0 ; i<count ; i++ ){
84             Node JavaDoc child = (Node JavaDoc)childs.item(i);
85             al.add(child);
86         }
87         for ( int i=0; i<count ; i++ ){
88             Node JavaDoc child = (Node JavaDoc)al.get(i);
89             removeMarkups(child,markups);
90         }
91
92         if ( toRemove ){
93             parentNode.removeChild(node);
94         } else{
95             Node JavaDoc refNode = node;
96             while ( matchMarkup(parentNode.getNodeName(),markups) ){
97                 refNode = parentNode;
98                 parentNode = parentNode.getParentNode();
99             }
100
101             if ( !refNode.equals(node) ){
102                 parentNode.insertBefore(node,refNode);
103             }
104         }
105     }
106
107     private boolean matchMarkup(String JavaDoc name, Set JavaDoc markups){
108         if ( name == null || markups == null || markups.size()==0 ){
109             return false;
110         }
111         Iterator JavaDoc iterator = markups.iterator();
112         String JavaDoc tag = null;
113         while ( iterator.hasNext() ){
114             tag = (String JavaDoc)iterator.next();
115             if ( tag.toLowerCase().equals(name.toLowerCase()) ){
116                 return true;
117             }
118         }
119         return false;
120     }
121 }
122
Popular Tags