1 package org.jahia.services.htmlparser; 2 3 import java.util.ArrayList ; 4 import java.util.HashSet ; 5 import java.util.Iterator ; 6 import java.util.Set ; 7 8 import org.w3c.dom.Document ; 9 import org.w3c.dom.Node ; 10 import org.w3c.dom.NodeList ; 11 12 21 public class TagRemover implements HtmlDOMVisitor{ 22 23 private Set tagsToRemove = new HashSet (); 24 25 public TagRemover(){} 26 27 33 public void init(int siteId){ 34 } 35 36 public void addTag(String tag){ 37 if ( tag != null && !this.tagsToRemove.contains(tag) ){ 38 this.tagsToRemove.add(tag); 39 } 40 } 41 42 public void addTags(String [] tags){ 43 if ( tags != null ){ 44 int size = tags.length; 45 for ( int i=0; i<size; i++ ){ 46 this.addTag(tags[i]); 47 } 48 } 49 } 50 51 57 public Document parseDOM(Document doc){ 58 if ( doc != null && this.tagsToRemove.size()>0 ){ 59 removeMarkups(doc.getDocumentElement(),this.tagsToRemove); 60 } 61 return doc; 62 } 63 64 70 private void removeMarkups(Node node, Set markups){ 71 72 if ( markups == null || node == null ){ 73 return; 74 } 75 76 boolean toRemove = matchMarkup(node.getNodeName(),markups); 77 Node parentNode = node.getParentNode(); 78 79 NodeList childs = node.getChildNodes(); 80 ArrayList al = new ArrayList (); 81 82 int count = childs.getLength(); 83 for ( int i=0 ; i<count ; i++ ){ 84 Node child = (Node )childs.item(i); 85 al.add(child); 86 } 87 for ( int i=0; i<count ; i++ ){ 88 Node child = (Node )al.get(i); 89 removeMarkups(child,markups); 90 } 91 92 if ( toRemove ){ 93 parentNode.removeChild(node); 94 } else{ 95 Node refNode = node; 96 while ( matchMarkup(parentNode.getNodeName(),markups) ){ 97 refNode = parentNode; 98 parentNode = parentNode.getParentNode(); 99 } 100 101 if ( !refNode.equals(node) ){ 102 parentNode.insertBefore(node,refNode); 103 } 104 } 105 } 106 107 private boolean matchMarkup(String name, Set markups){ 108 if ( name == null || markups == null || markups.size()==0 ){ 109 return false; 110 } 111 Iterator iterator = markups.iterator(); 112 String tag = null; 113 while ( iterator.hasNext() ){ 114 tag = (String )iterator.next(); 115 if ( tag.toLowerCase().equals(name.toLowerCase()) ){ 116 return true; 117 } 118 } 119 return false; 120 } 121 } 122 | Popular Tags |