1 17 18 19 20 package org.apache.lenya.util; 21 22 import java.util.ArrayList ; 23 24 import javax.swing.text.MutableAttributeSet ; 25 import javax.swing.text.html.HTML ; 26 import javax.swing.text.html.HTML.Tag; 27 import javax.swing.text.html.HTMLEditorKit.ParserCallback; 28 29 import org.apache.log4j.Category; 30 31 32 35 public class HTMLHandler extends ParserCallback { 36 Category log = Category.getInstance(HTMLHandler.class); 37 private ArrayList img_src; 38 private ArrayList img_src_all; 39 private ArrayList a_href; 40 private ArrayList a_href_all; 41 private ArrayList link_href; 42 private ArrayList link_href_all; 43 44 47 public HTMLHandler() { 48 img_src_all = new ArrayList (); 49 img_src = new ArrayList (); 50 a_href_all = new ArrayList (); 51 a_href = new ArrayList (); 52 link_href_all = new ArrayList (); 53 link_href = new ArrayList (); 54 } 55 56 63 public void handleStartTag(Tag tag, MutableAttributeSet attributes, int pos) { 64 if (tag.equals(HTML.Tag.A)) { 65 String href = (String ) attributes.getAttribute(HTML.Attribute.HREF); 66 67 if (href != null) { 68 a_href_all.add(href); 69 70 if (!a_href.contains(href)) { 71 a_href.add(href); 72 } 73 } 74 } 75 } 76 77 84 public void handleSimpleTag(Tag tag, MutableAttributeSet attributes, int pos) { 85 if (tag.equals(HTML.Tag.IMG)) { 86 String src = (String ) attributes.getAttribute(HTML.Attribute.SRC); 87 88 if (src != null) { 89 img_src_all.add(src); 90 91 if (!img_src.contains(src)) { 92 img_src.add(src); 93 } 94 } 95 } 96 97 if (tag.equals(HTML.Tag.LINK)) { 98 String href = (String ) attributes.getAttribute(HTML.Attribute.HREF); 99 100 if (href != null) { 101 link_href_all.add(href); 102 103 if (!link_href.contains(href)) { 104 link_href.add(href); 105 } 106 } 107 } 108 } 109 110 115 public ArrayList getImageSrcs() { 116 return img_src; 117 } 118 119 124 public ArrayList getAllImageSrcs() { 125 return img_src_all; 126 } 127 128 133 public ArrayList getLinkHRefs() { 134 return link_href; 135 } 136 137 142 public ArrayList getAllLinkHRefs() { 143 return link_href_all; 144 } 145 146 151 public ArrayList getAHRefs() { 152 return a_href; 153 } 154 155 160 public ArrayList getAllAHRefs() { 161 return a_href_all; 162 } 163 } 164 | Popular Tags |