1 18 19 package org.apache.jmeter.protocol.http.parser; 20 21 import java.io.StringReader ; 22 import java.net.MalformedURLException ; 23 import java.net.URL ; 24 import java.util.Iterator ; 25 26 import org.apache.jorphan.logging.LoggingManager; 27 import org.apache.log.Logger; 28 29 import org.htmlparser.Node; 30 import org.htmlparser.NodeReader; 31 import org.htmlparser.Parser; 32 import org.htmlparser.scanners.AppletScanner; 33 import org.htmlparser.scanners.BaseHrefScanner; 34 import org.htmlparser.scanners.BodyScanner; 35 import org.htmlparser.scanners.FrameScanner; 36 import org.htmlparser.scanners.InputTagScanner; 37 import org.htmlparser.scanners.LinkScanner; 38 import org.htmlparser.scanners.LinkTagScanner; 39 import org.htmlparser.scanners.ScriptScanner; 40 import org.htmlparser.tags.AppletTag; 41 import org.htmlparser.tags.BaseHrefTag; 42 import org.htmlparser.tags.BodyTag; 43 import org.htmlparser.tags.FrameTag; 44 import org.htmlparser.tags.ImageTag; 45 import org.htmlparser.tags.InputTag; 46 import org.htmlparser.tags.LinkTag; 47 import org.htmlparser.tags.LinkTagTag; 48 import org.htmlparser.tags.ScriptTag; 49 import org.htmlparser.util.DefaultParserFeedback; 50 import org.htmlparser.util.NodeIterator; 51 import org.htmlparser.util.ParserException; 52 53 58 class HtmlParserHTMLParser extends HTMLParser 59 { 60 61 transient private static Logger log= LoggingManager.getLoggerForClass(); 62 63 protected HtmlParserHTMLParser(){ 64 super(); 65 } 66 67 protected boolean isReusable() 68 { 69 return true; 70 } 71 72 75 public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection urls) 76 throws HTMLParseException 77 { 78 Parser htmlParser= null; 79 try 80 { 81 String contents= new String (html); 82 StringReader reader= new StringReader (contents); 83 NodeReader nreader= new NodeReader(reader, contents.length()); 84 htmlParser= new Parser(nreader, new DefaultParserFeedback()); 85 addTagListeners(htmlParser); 86 } 87 catch (Exception e) 88 { 89 throw new HTMLParseException(e); 90 } 91 92 94 96 100 try 101 { 102 for (NodeIterator e= htmlParser.elements(); e.hasMoreNodes();) 104 { 105 Node node= e.nextNode(); 106 String binUrlStr= null; 107 108 if (node instanceof BodyTag) 112 { 113 BodyTag body= (BodyTag)node; 114 binUrlStr= body.getAttribute("background"); 115 e= body.elements(); 125 } 126 else if (node instanceof BaseHrefTag) 127 { 128 BaseHrefTag baseHref= (BaseHrefTag)node; 129 try 130 { 131 baseUrl= new URL (baseUrl, baseHref.getBaseUrl()+"/"); 132 } 133 catch (MalformedURLException e1) 134 { 135 throw new HTMLParseException(e1); 136 } 137 } 138 else if (node instanceof ImageTag) 139 { 140 ImageTag image= (ImageTag)node; 141 binUrlStr= image.getImageURL(); 142 } 143 else if (node instanceof AppletTag) 144 { 145 AppletTag applet= (AppletTag)node; 146 binUrlStr= applet.getAppletClass(); 147 } 148 else if (node instanceof InputTag) 149 { 150 InputTag input= (InputTag)node; 151 String strType= input.getAttribute("type"); 153 if (strType != null && strType.equalsIgnoreCase("image")) 154 { 155 binUrlStr= input.getAttribute("src"); 157 } 158 } else if (node instanceof LinkTag){ 159 LinkTag link = (LinkTag)node; 160 if (link.getChild(0) instanceof ImageTag){ 161 ImageTag img = (ImageTag)link.getChild(0); 162 binUrlStr = img.getImageURL(); 163 } 164 } else if (node instanceof ScriptTag){ 165 ScriptTag script = (ScriptTag)node; 166 binUrlStr = script.getAttribute("src"); 167 } else if (node instanceof FrameTag){ 168 FrameTag tag = (FrameTag)node; 169 binUrlStr = tag.getAttribute("src"); 170 } else if (node instanceof LinkTagTag){ 171 LinkTagTag script = (LinkTagTag)node; 172 if (script.getAttribute("rel").equalsIgnoreCase("stylesheet")){ 173 binUrlStr = script.getAttribute("href"); 174 } 175 } 176 177 if (binUrlStr == null) 178 { 179 continue; 180 } 181 182 urls.addURL(binUrlStr,baseUrl); 183 } 184 log.debug("End : parseNodes"); 185 } 186 catch (ParserException e) 187 { 188 throw new HTMLParseException(e); 189 } 190 191 return urls.iterator(); 192 } 193 194 202 private static void addTagListeners(Parser parser) 203 { 204 log.debug("Start : addTagListeners"); 205 parser.addScanner(new BodyScanner()); 207 parser.addScanner(new BaseHrefScanner()); 209 LinkScanner linkScanner= new LinkScanner(LinkTag.LINK_TAG_FILTER); 211 parser.addScanner( 213 linkScanner.createImageScanner(ImageTag.IMAGE_TAG_FILTER)); 214 parser.addScanner( 215 linkScanner.createBaseHREFScanner("-b")); 216 parser.addScanner(new InputTagScanner()); 219 parser.addScanner(new AppletScanner()); 221 parser.addScanner(new ScriptScanner()); 222 parser.addScanner(new LinkTagScanner()); 223 parser.addScanner(new FrameScanner()); 224 } 225 } 226 | Popular Tags |