KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > services > htmlparser > RemoveUnwantedMarkupVisitor


1 package org.jahia.services.htmlparser;
2
3 import java.util.ArrayList JavaDoc;
4 import java.util.HashSet JavaDoc;
5 import java.util.Iterator JavaDoc;
6 import java.util.Set JavaDoc;
7
8 import org.w3c.dom.Document JavaDoc;
9 import org.w3c.dom.Node JavaDoc;
10 import org.w3c.dom.NodeList JavaDoc;
11
12 import org.jahia.registries.ServicesRegistry;
13
14 /**
15  *
16  * <p>Title: Html DOM Visitor</p>
17  * <p>Description: </p>
18  * <p>Copyright: Copyright (c) 2002</p>
19  * <p>Company: </p>
20  * @author Khue Nguyen
21  * @version 1.0
22  */

23 public class RemoveUnwantedMarkupVisitor implements HtmlDOMVisitor{
24
25     private Set JavaDoc htmlMarkupSettings = new HashSet JavaDoc();
26     private Set JavaDoc unwantedMarkups = new HashSet JavaDoc();
27
28     public RemoveUnwantedMarkupVisitor(){}
29
30     /**
31      * let the visitor initiate itself
32      *
33      * @param siteId , current site, if -1, use default settings
34      * @return
35      */

36     public void init(int siteId){
37         this.htmlMarkupSettings = new HashSet JavaDoc();
38
39         Iterator JavaDoc iterator = ServicesRegistry.getInstance()
40                                 .getHtmlParserService()
41                                 .getMarkupSettings(siteId);
42
43         while ( iterator.hasNext() ){
44             MarkupSetting setting = (MarkupSetting)iterator.next();
45             this.htmlMarkupSettings.add(setting);
46             if ( setting.getSettingType().equals(ToRemoveMarkupSetting.SETTING_TYPE) ){
47                 // @todo
48
MarkupDefinition markupDef =
49                         ServicesRegistry.getInstance().getHtmlParserService()
50                         .getMarkupDefinition(setting.getMarkupDefId());
51                 if ( markupDef != null ){
52                     this.unwantedMarkups.add(markupDef);
53                 }
54             }
55         }
56     }
57
58     /**
59      * Remove all unwanted markups
60      *
61      * @param doc
62      * @return
63      */

64     public Document JavaDoc parseDOM(Document JavaDoc doc){
65         if ( doc != null && this.unwantedMarkups.size()>0 ){
66             removeMarkups(doc.getDocumentElement(),this.unwantedMarkups);
67         }
68         return doc;
69     }
70
71     /**
72      * Filter out unwanted markups
73      *
74      * @param node, the stating node
75      * @param markups, the set of the names of the markups to remove
76      */

77     private void removeMarkups(Node JavaDoc node, Set JavaDoc markups){
78
79         if ( markups == null || node == null ){
80             return;
81         }
82
83         boolean toRemove = matchMarkup(node.getNodeName(),markups);
84         Node JavaDoc parentNode = node.getParentNode();
85
86         NodeList JavaDoc childs = node.getChildNodes();
87         ArrayList JavaDoc al = new ArrayList JavaDoc();
88
89         int count = childs.getLength();
90         for ( int i=0 ; i<count ; i++ ){
91             Node JavaDoc child = (Node JavaDoc)childs.item(i);
92             al.add(child);
93         }
94         for ( int i=0; i<count ; i++ ){
95             Node JavaDoc child = (Node JavaDoc)al.get(i);
96             removeMarkups(child,markups);
97         }
98
99         if ( toRemove ){
100             parentNode.removeChild(node);
101         } else{
102             Node JavaDoc refNode = node;
103             while ( matchMarkup(parentNode.getNodeName(),markups) ){
104                 refNode = parentNode;
105                 parentNode = parentNode.getParentNode();
106             }
107
108             if ( !refNode.equals(node) ){
109                 parentNode.insertBefore(node,refNode);
110             }
111         }
112     }
113
114     private boolean matchMarkup(String JavaDoc name, Set JavaDoc markups){
115         if ( name == null || markups == null || markups.size()==0 ){
116             return false;
117         }
118         Iterator JavaDoc iterator = markups.iterator();
119         while ( iterator.hasNext() ){
120             MarkupDefinition def = (MarkupDefinition)iterator.next();
121             if ( !def.isCaseSensitive() ){
122                 if ( def.getName().toLowerCase().equals(name.toLowerCase()) ){
123                     return true;
124                 }
125             } else if ( def.getName().equals(name) ){
126                 return true;
127             }
128         }
129         return false;
130     }
131 }
Popular Tags