KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > jmeter > protocol > http > parser > JTidyHTMLParser


1 // $Header: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java,v 1.12.2.1 2005/03/02 01:34:14 sebb Exp $
2
/*
3  * Copyright 2003-2004 The Apache Software Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17 */

18
19 package org.apache.jmeter.protocol.http.parser;
20
21 import java.io.ByteArrayInputStream JavaDoc;
22 import java.net.MalformedURLException JavaDoc;
23 import java.net.URL JavaDoc;
24 import java.util.Iterator JavaDoc;
25
26 import org.apache.jorphan.logging.LoggingManager;
27 import org.apache.log.Logger;
28 import org.w3c.dom.Document JavaDoc;
29 import org.w3c.dom.NamedNodeMap JavaDoc;
30 import org.w3c.dom.Node JavaDoc;
31 import org.w3c.dom.NodeList JavaDoc;
32 import org.w3c.tidy.Tidy;
33 import org.xml.sax.SAXException JavaDoc;
34
35 /**
36  * HtmlParser implementation using JTidy.
37  *
38  * @version $Revision: 1.12.2.1 $ updated on $Date: 2005/03/02 01:34:14 $
39  */

40 class JTidyHTMLParser extends HTMLParser
41 {
42     /** Used to store the Logger (used for debug and error messages). */
43     transient private static Logger log = LoggingManager.getLoggerForClass();
44
45     protected JTidyHTMLParser()
46     {
47         super();
48     }
49
50     protected boolean isReusable()
51     {
52         return true;
53     }
54
55     /* (non-Javadoc)
56      * @see org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(byte[], java.net.URL)
57      */

58     public Iterator JavaDoc getEmbeddedResourceURLs(byte[] html, URL JavaDoc baseUrl, URLCollection urls)
59         throws HTMLParseException
60     {
61         Document JavaDoc dom = null;
62         try
63         {
64             dom = (Document JavaDoc)getDOM(html);
65         }
66         catch(SAXException JavaDoc se)
67         {
68             throw new HTMLParseException(se);
69         }
70         
71         // Now parse the DOM tree
72

73         scanNodes(dom,urls, baseUrl);
74
75         return urls.iterator();
76     }
77
78     /**
79      * Scan nodes recursively, looking for embedded resources
80      * @param node - initial node
81      * @param urls - container for URLs
82      * @param baseUrl - used to create absolute URLs
83      *
84      * @return new base URL
85      */

86     private URL JavaDoc scanNodes(Node JavaDoc node, URLCollection urls, URL JavaDoc baseUrl) throws HTMLParseException
87     {
88         if ( node == null ) {
89           return baseUrl;
90         }
91
92         String JavaDoc name = node.getNodeName();
93
94         int type = node.getNodeType();
95
96         switch ( type ) {
97
98         case Node.DOCUMENT_NODE:
99           scanNodes(((Document JavaDoc)node).getDocumentElement(),urls,baseUrl);
100           break;
101
102         case Node.ELEMENT_NODE:
103        
104           NamedNodeMap JavaDoc attrs = node.getAttributes();
105           if (name.equalsIgnoreCase("base"))
106           {
107             String JavaDoc tmp=getValue(attrs,"href");
108             if (tmp!=null) try
109             {
110                 baseUrl= new URL JavaDoc(baseUrl, tmp);
111             }
112             catch (MalformedURLException JavaDoc e)
113             {
114                 throw new HTMLParseException(e);
115             }
116             break;
117           }
118           
119           if (name.equalsIgnoreCase("img"))
120           {
121             urls.addURL(getValue(attrs,"src"),baseUrl);
122             break;
123           }
124           
125           if (name.equalsIgnoreCase("applet"))
126           {
127             urls.addURL(getValue(attrs,"code"),baseUrl);
128               break;
129             }
130             if (name.equalsIgnoreCase("input"))
131             {
132                 String JavaDoc SRC=getValue(attrs,"src");
133                 String JavaDoc typ=getValue(attrs,"type");
134                 if ((src!=null) &&(typ.equalsIgnoreCase("image")) ){
135                     urls.addURL(src,baseUrl);
136                 }
137               break;
138             }
139             if (name.equalsIgnoreCase("link")
140                     && getValue(attrs,"rel").equalsIgnoreCase("stylesheet"))
141             {
142                 urls.addURL(getValue(attrs,"href"),baseUrl);
143               break;
144             }
145             if (name.equalsIgnoreCase("script"))
146             {
147                 urls.addURL(getValue(attrs,"src"),baseUrl);
148               break;
149             }
150             if (name.equalsIgnoreCase("frame"))
151             {
152                 urls.addURL(getValue(attrs,"src"),baseUrl);
153               break;
154             }
155             String JavaDoc back=getValue(attrs,"background");
156             if (back != null){
157                 urls.addURL(back,baseUrl);
158                 break;
159             }
160
161           NodeList JavaDoc children = node.getChildNodes();
162           if ( children != null ) {
163              int len = children.getLength();
164              for ( int i = 0; i < len; i++ ) {
165                 baseUrl= scanNodes(children.item(i),urls,baseUrl);
166              }
167           }
168           break;
169
170 // case Node.TEXT_NODE:
171
// break;
172

173        }
174        
175        return baseUrl;
176
177     }
178
179     /*
180      * Helper method to get an attribute value, if it exists
181      * @param attrs list of attributs
182      * @param attname attribute name
183      * @return
184      */

185     private String JavaDoc getValue(NamedNodeMap JavaDoc attrs, String JavaDoc attname)
186     {
187         String JavaDoc v=null;
188         Node JavaDoc n = attrs.getNamedItem(attname);
189         if (n != null) v=n.getNodeValue();
190         return v;
191     }
192
193     /**
194      * Returns <code>tidy</code> as HTML parser.
195      *
196      * @return a <code>tidy</code> HTML parser
197      */

198     private static Tidy getTidyParser()
199     {
200         log.debug("Start : getParser");
201         Tidy tidy = new Tidy();
202         tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
203         tidy.setQuiet(true);
204         tidy.setShowWarnings(false);
205         if(log.isDebugEnabled())
206         {
207             log.debug("getParser : tidy parser created - " + tidy);
208         }
209         log.debug("End : getParser");
210         return tidy;
211     }
212
213     /**
214      * Returns a node representing a whole xml given an xml document.
215      *
216      * @param text an xml document (as a byte array)
217      * @return a node representing a whole xml
218      *
219      * @throws SAXException indicates an error parsing the xml document
220      */

221     private static Node JavaDoc getDOM(byte [] text) throws SAXException JavaDoc
222     {
223         log.debug("Start : getDOM");
224         Node JavaDoc node = getTidyParser().parseDOM(new
225           ByteArrayInputStream JavaDoc(text), null);
226         if(log.isDebugEnabled())
227         {
228             log.debug("node : " + node);
229         }
230         log.debug("End : getDOM");
231         return node;
232     }
233 }
234
Popular Tags