KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > BenchmarkTidy


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/BenchmarkTidy.java,v 1.2 2004/02/10 13:41:08 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser.tests;
34
35 import java.io.BufferedReader JavaDoc;
36 import java.io.ByteArrayInputStream JavaDoc;
37 import java.io.File JavaDoc;
38 import java.io.FileReader JavaDoc;
39 import java.io.IOException JavaDoc;
40 import java.io.UnsupportedEncodingException JavaDoc;
41
42 import org.w3c.dom.Document JavaDoc;
43 import org.w3c.dom.NamedNodeMap JavaDoc;
44 import org.w3c.dom.Node JavaDoc;
45 import org.w3c.dom.NodeList JavaDoc;
46 import org.w3c.tidy.Tidy;
47 import org.xml.sax.SAXException JavaDoc;
48
49 /**
50  * Title: Apache Jakarta JMeter<br>
51  * Copyright: Copyright (c) Apache<br>
52  * Company: Apache<br>
53  * License:<br>
54  * <br>
55  * The license is at the top!<br>
56  * <br>
57  * Description:<br>
58  * <br>
59  * This is a quick class to benchmark tidy against htmlparser.
60  * It is pretty basic and uses the same process as the original
61  * image parsing code in JMeter 1.9.0 and earlier.
62  * <p>
63  * Author: pete<br>
64  * Version: 0.1<br>
65  * Created on: Sep 30, 2003<br>
66  * Last Modified: 7:41:39 AM<br>
67  */

68 public class BenchmarkTidy
69 {
70
71     protected static String JavaDoc utfEncodingName;
72
73     /**
74      *
75      */

76     public BenchmarkTidy(String JavaDoc data)
77     {
78         try
79         {
80             Document JavaDoc doc = (Document JavaDoc) getDOM(data);
81             parseNodes(doc, "img", false, "src");
82         }
83         catch (SAXException JavaDoc e)
84         {
85             e.printStackTrace();
86         }
87     }
88
89     protected void parseNodes(
90         Document JavaDoc html,
91         String JavaDoc htmlTag,
92         boolean type,
93         String JavaDoc srcTag)
94     {
95
96         NodeList JavaDoc nodeList = html.getElementsByTagName(htmlTag);
97         boolean uniqueBinary;
98
99         for (int i = 0; i < nodeList.getLength(); i++)
100         {
101             uniqueBinary = true;
102             Node JavaDoc tempNode = nodeList.item(i);
103
104             // get the url of the Binary
105
NamedNodeMap JavaDoc nnm = tempNode.getAttributes();
106             Node JavaDoc namedItem = null;
107
108             if (type)
109             {
110                 // if type is set, we need 'type=image'
111
namedItem = nnm.getNamedItem("type");
112                 if (namedItem == null)
113                 {
114                     break;
115                 }
116                 String JavaDoc inputType = namedItem.getNodeValue();
117
118                 if (inputType != null && inputType.equalsIgnoreCase("image"))
119                 {
120                     // then we need to download the binary
121
}
122                 else
123                 {
124                     break;
125                 }
126             }
127             namedItem = nnm.getNamedItem(srcTag);
128             System.out.println("Image Tag: " + htmlTag + " SRC=" + namedItem);
129         }
130     }
131
132     protected static Tidy getParser()
133     {
134         Tidy tidy = new Tidy();
135         tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
136         tidy.setQuiet(true);
137         tidy.setShowWarnings(false);
138
139         return tidy;
140     }
141
142     protected static Node JavaDoc getDOM(String JavaDoc text) throws SAXException JavaDoc
143     {
144
145         try
146         {
147             Node JavaDoc node =
148                 getParser().parseDOM(
149                     new ByteArrayInputStream JavaDoc(
150                         text.getBytes(getUTFEncodingName())),
151                     null);
152
153             return node;
154         }
155         catch (UnsupportedEncodingException JavaDoc e)
156         {
157
158             throw new RuntimeException JavaDoc("UTF-8 encoding failed - " + e);
159         }
160     }
161
162     protected static String JavaDoc getUTFEncodingName()
163     {
164         if (utfEncodingName == null)
165         {
166             String JavaDoc versionNum = System.getProperty("java.version");
167             if (versionNum.startsWith("1.1"))
168             {
169                 utfEncodingName = "UTF8";
170             }
171             else
172             {
173                 utfEncodingName = "UTF-8";
174             }
175         }
176         return utfEncodingName;
177     }
178
179     public static void main(String JavaDoc[] args)
180     {
181         if (args != null && args.length > 0)
182         {
183             try
184             {
185                 File JavaDoc input = new File JavaDoc(args[0]);
186
187                 StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
188                 BufferedReader JavaDoc reader =
189                     new BufferedReader JavaDoc(new FileReader JavaDoc(input));
190                 String JavaDoc line = null;
191                 while ((line = reader.readLine()) != null)
192                 {
193                     buff.append(line);
194                 }
195                 long start = System.currentTimeMillis();
196                 BenchmarkTidy test = new BenchmarkTidy(buff.toString());
197                 System.out.println(
198                     "Elapsed time ms: " + (System.currentTimeMillis() - start));
199             }
200             catch (IOException JavaDoc e)
201             {
202                 e.printStackTrace();
203             }
204         }
205         else
206         {
207             System.out.println("Please provide a filename");
208         }
209     }
210 }
211
Popular Tags