1 package org.jahia.services.htmlparser; 2 3 import java.util.ArrayList ; 4 import java.util.HashSet ; 5 import java.util.Iterator ; 6 import java.util.Set ; 7 8 import org.w3c.dom.Document ; 9 import org.w3c.dom.Node ; 10 import org.w3c.dom.NodeList ; 11 12 import org.jahia.registries.ServicesRegistry; 13 14 23 public class RemoveUnwantedMarkupVisitor implements HtmlDOMVisitor{ 24 25 private Set htmlMarkupSettings = new HashSet (); 26 private Set unwantedMarkups = new HashSet (); 27 28 public RemoveUnwantedMarkupVisitor(){} 29 30 36 public void init(int siteId){ 37 this.htmlMarkupSettings = new HashSet (); 38 39 Iterator iterator = ServicesRegistry.getInstance() 40 .getHtmlParserService() 41 .getMarkupSettings(siteId); 42 43 while ( iterator.hasNext() ){ 44 MarkupSetting setting = (MarkupSetting)iterator.next(); 45 this.htmlMarkupSettings.add(setting); 46 if ( setting.getSettingType().equals(ToRemoveMarkupSetting.SETTING_TYPE) ){ 47 MarkupDefinition markupDef = 49 ServicesRegistry.getInstance().getHtmlParserService() 50 .getMarkupDefinition(setting.getMarkupDefId()); 51 if ( markupDef != null ){ 52 this.unwantedMarkups.add(markupDef); 53 } 54 } 55 } 56 } 57 58 64 public Document parseDOM(Document doc){ 65 if ( doc != null && this.unwantedMarkups.size()>0 ){ 66 removeMarkups(doc.getDocumentElement(),this.unwantedMarkups); 67 } 68 return doc; 69 } 70 71 77 private void removeMarkups(Node node, Set markups){ 78 79 if ( markups == null || node == null ){ 80 return; 81 } 82 83 boolean toRemove = matchMarkup(node.getNodeName(),markups); 84 Node parentNode = node.getParentNode(); 85 86 NodeList childs = node.getChildNodes(); 87 ArrayList al = new ArrayList (); 88 89 int count = childs.getLength(); 90 for ( int i=0 ; i<count ; i++ ){ 91 Node child = (Node )childs.item(i); 92 al.add(child); 93 } 94 for ( int i=0; i<count ; i++ ){ 95 Node child = (Node )al.get(i); 96 removeMarkups(child,markups); 97 } 98 99 if ( toRemove ){ 100 parentNode.removeChild(node); 101 } else{ 102 Node refNode = node; 103 while ( matchMarkup(parentNode.getNodeName(),markups) ){ 104 refNode = parentNode; 105 parentNode = parentNode.getParentNode(); 106 } 107 108 if ( !refNode.equals(node) ){ 109 parentNode.insertBefore(node,refNode); 110 } 111 } 112 } 113 114 private boolean matchMarkup(String name, Set markups){ 115 if ( name == null || markups == null || markups.size()==0 ){ 116 return false; 117 } 118 Iterator iterator = markups.iterator(); 119 while ( iterator.hasNext() ){ 120 MarkupDefinition def = (MarkupDefinition)iterator.next(); 121 if ( !def.isCaseSensitive() ){ 122 if ( def.getName().toLowerCase().equals(name.toLowerCase()) ){ 123 return true; 124 } 125 } else if ( def.getName().equals(name) ){ 126 return true; 127 } 128 } 129 return false; 130 } 131 } | Popular Tags |