KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > jmeter > protocol > http > parser > HtmlParsingUtils


1 // $Header: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParsingUtils.java,v 1.10.2.2 2004/07/03 02:07:54 sebb Exp $
2
/*
3  * Copyright 2003-2004 The Apache Software Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17 */

18
19 package org.apache.jmeter.protocol.http.parser;
20
21 import java.io.ByteArrayInputStream JavaDoc;
22 import java.io.UnsupportedEncodingException JavaDoc;
23 import java.net.MalformedURLException JavaDoc;
24 import java.net.URL JavaDoc;
25 import java.util.LinkedList JavaDoc;
26 import java.util.List JavaDoc;
27
28 import org.apache.jmeter.config.Argument;
29 import org.apache.jmeter.junit.JMeterTestCase;
30 import org.apache.jmeter.protocol.http.sampler.HTTPSampler;
31 import org.apache.jmeter.protocol.http.sampler.HTTPSamplerBase;
32 import org.apache.jmeter.testelement.property.PropertyIterator;
33 import org.apache.jorphan.logging.LoggingManager;
34 import org.apache.jorphan.util.JOrphanUtils;
35 import org.apache.log.Logger;
36 import org.apache.oro.text.PatternCacheLRU;
37 import org.apache.oro.text.regex.MalformedPatternException;
38 import org.apache.oro.text.regex.Perl5Compiler;
39 import org.apache.oro.text.regex.Perl5Matcher;
40 import org.w3c.dom.Document JavaDoc;
41 import org.w3c.dom.NamedNodeMap JavaDoc;
42 import org.w3c.dom.Node JavaDoc;
43 import org.w3c.dom.NodeList JavaDoc;
44 import org.w3c.tidy.Tidy;
45 import org.xml.sax.SAXException JavaDoc;
46
47 /**
48  * @author Michael Stover
49  * Created June 14, 2001
50  * @version $Revision: 1.10.2.2 $ Last updated: $Date: 2004/07/03 02:07:54 $
51  */

52 public final class HtmlParsingUtils
53 {
54     transient private static Logger log = LoggingManager.getLoggerForClass();
55
56     private static String JavaDoc utfEncodingName;
57     /* NOTUSED
58     private int compilerOptions =
59         Perl5Compiler.CASE_INSENSITIVE_MASK
60             | Perl5Compiler.MULTILINE_MASK
61             | Perl5Compiler.READ_ONLY_MASK;
62     */

63     
64     private static PatternCacheLRU patternCache =
65         new PatternCacheLRU(1000, new Perl5Compiler());
66
67     private static ThreadLocal JavaDoc localMatcher = new ThreadLocal JavaDoc()
68     {
69         protected Object JavaDoc initialValue()
70         {
71             return new Perl5Matcher();
72         }
73     };
74
75     /**
76      * Private constructor to prevent instantiation.
77      */

78     private HtmlParsingUtils()
79     {
80     }
81
82     public static synchronized boolean isAnchorMatched(
83         HTTPSamplerBase newLink,
84         HTTPSamplerBase config)
85         throws MalformedPatternException
86     {
87         boolean ok = true;
88         Perl5Matcher matcher = (Perl5Matcher) localMatcher.get();
89         PropertyIterator iter = config.getArguments().iterator();
90
91         String JavaDoc query = null;
92         try
93         {
94             query = JOrphanUtils.decode(newLink.getQueryString(),"UTF-8");
95         }
96         catch (UnsupportedEncodingException JavaDoc e)
97         {
98             // UTF-8 unsupported? You must be joking!
99
log.error("UTF-8 encoding not supported!");
100             throw new Error JavaDoc("Should not happen: "+e.toString());
101         }
102
103         if (query == null && config.getArguments().getArgumentCount() > 0)
104         {
105             return false;
106         }
107         
108         while (iter.hasNext())
109         {
110             Argument item = (Argument) iter.next().getObjectValue();
111             if (query.indexOf(item.getName() + "=") == -1)
112             {
113                 if (!(ok =
114                     ok
115                         && matcher.contains(
116                             query,
117                             patternCache.getPattern(
118                                 item.getName(),
119                                 Perl5Compiler.READ_ONLY_MASK))))
120                 {
121                     return false;
122                 }
123             }
124         }
125
126         if (config.getDomain() != null
127             && config.getDomain().length() > 0
128             && !newLink.getDomain().equals(config.getDomain()))
129         {
130             if (!(ok =
131                 ok
132                     && matcher.matches(
133                         newLink.getDomain(),
134                         patternCache.getPattern(
135                             config.getDomain(),
136                             Perl5Compiler.READ_ONLY_MASK))))
137             {
138                 return false;
139             }
140         }
141
142         if (!newLink.getPath().equals(config.getPath())
143             && !matcher.matches(
144                 newLink.getPath(),
145                 patternCache.getPattern(
146                     "[/]*" + config.getPath(),
147                     Perl5Compiler.READ_ONLY_MASK)))
148         {
149             return false;
150         }
151
152         if (!(ok =
153             ok
154                 && matcher.matches(
155                     newLink.getProtocol(),
156                     patternCache.getPattern(
157                         config.getProtocol(),
158                         Perl5Compiler.READ_ONLY_MASK))))
159         {
160             return false;
161         }
162
163         return ok;
164     }
165
166     public static synchronized boolean isArgumentMatched(
167         Argument arg,
168         Argument patternArg)
169         throws MalformedPatternException
170     {
171         Perl5Matcher matcher = (Perl5Matcher) localMatcher.get();
172         return (
173             arg.getName().equals(patternArg.getName())
174                 || matcher.matches(
175                     arg.getName(),
176                     patternCache.getPattern(
177                         patternArg.getName(),
178                         Perl5Compiler.READ_ONLY_MASK)))
179             && (arg.getValue().equals(patternArg.getValue())
180                 || matcher.matches(
181                     (String JavaDoc) arg.getValue(),
182                     patternCache.getPattern(
183                         (String JavaDoc) patternArg.getValue(),
184                         Perl5Compiler.READ_ONLY_MASK)));
185     }
186
187     /**
188      * Returns <code>tidy</code> as HTML parser.
189      *
190      * @return a <code>tidy</code> HTML parser
191      */

192     public static Tidy getParser()
193     {
194         log.debug("Start : getParser1");
195         Tidy tidy = new Tidy();
196         tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
197         tidy.setQuiet(true);
198         tidy.setShowWarnings(false);
199
200         if (log.isDebugEnabled())
201         {
202             log.debug("getParser1 : tidy parser created - " + tidy);
203         }
204
205         log.debug("End : getParser1");
206
207         return tidy;
208     }
209
210     /**
211      * Returns a node representing a whole xml given an xml document.
212      *
213      * @param text an xml document
214      * @return a node representing a whole xml
215      */

216     public static Node JavaDoc getDOM(String JavaDoc text) throws SAXException JavaDoc
217     {
218         log.debug("Start : getDOM1");
219
220         try
221         {
222             Node JavaDoc node =
223                 getParser().parseDOM(
224                     new ByteArrayInputStream JavaDoc(
225                         text.getBytes("UTF-8")),
226                     null);
227
228             if (log.isDebugEnabled())
229             {
230                 log.debug("node : " + node);
231             }
232
233             log.debug("End : getDOM1");
234
235             return node;
236         }
237         catch (UnsupportedEncodingException JavaDoc e)
238         {
239             log.error("getDOM1 : Unsupported encoding exception - " + e);
240             log.debug("End : getDOM1");
241             throw new RuntimeException JavaDoc("UTF-8 encoding failed");
242         }
243     }
244
245     public static Document JavaDoc createEmptyDoc()
246     {
247         return Tidy.createEmptyDocument();
248     }
249
250     /**
251      * Create a new URL based on an HREF string plus a contextual URL object.
252      * Given that an HREF string might be of three possible forms, some
253      * processing is required.
254      */

255     public static HTTPSampler createUrlFromAnchor(
256         String JavaDoc parsedUrlString,
257         URL JavaDoc context)
258         throws MalformedURLException JavaDoc
259     {
260         if (log.isDebugEnabled())
261         {
262             log.debug("Creating URL from Anchor: "+parsedUrlString
263                 +", base: "+context);
264         }
265         URL JavaDoc url= new URL JavaDoc(context, parsedUrlString);
266         HTTPSampler sampler = new HTTPSampler();
267         sampler.setDomain(url.getHost());
268         sampler.setProtocol(url.getProtocol());
269         sampler.setPort(url.getPort());
270         sampler.setPath(url.getPath());
271         sampler.parseArguments(url.getQuery());
272
273         return sampler;
274     }
275
276     public static List JavaDoc createURLFromForm(
277             Node JavaDoc doc,
278             URL JavaDoc context)
279     {
280         String JavaDoc selectName = null;
281         LinkedList JavaDoc urlConfigs = new LinkedList JavaDoc();
282         recurseForm(doc, urlConfigs, context, selectName, false);
283         /*
284          * NamedNodeMap atts = formNode.getAttributes();
285          * if(atts.getNamedItem("action") == null)
286          * {
287          * throw new MalformedURLException();
288          * }
289          * String action = atts.getNamedItem("action").getNodeValue();
290          * UrlConfig url = createUrlFromAnchor(action, context);
291          * recurseForm(doc, url, selectName,true,formStart);
292          */

293         return urlConfigs;
294     }
295
296     private static boolean recurseForm(
297         Node JavaDoc tempNode,
298         LinkedList JavaDoc urlConfigs,
299         URL JavaDoc context,
300         String JavaDoc selectName,
301         boolean inForm)
302     {
303         NamedNodeMap JavaDoc nodeAtts = tempNode.getAttributes();
304         String JavaDoc tag = tempNode.getNodeName();
305         try
306         {
307             if (inForm)
308             {
309                 HTTPSampler url = (HTTPSampler) urlConfigs.getLast();
310                 if (tag.equalsIgnoreCase("form"))
311                 {
312                     try
313                     {
314                         urlConfigs.add(createFormUrlConfig(tempNode, context));
315                     }
316                     catch (MalformedURLException JavaDoc e)
317                     {
318                         inForm = false;
319                     }
320                 }
321                 else if (tag.equalsIgnoreCase("input"))
322                 {
323                     url.addArgument(
324                         getAttributeValue(nodeAtts, "name"),
325                         getAttributeValue(nodeAtts, "value"));
326                 }
327                 else if (tag.equalsIgnoreCase("textarea"))
328                 {
329                     try
330                     {
331                         url.addArgument(
332                             getAttributeValue(nodeAtts, "name"),
333                             tempNode.getFirstChild().getNodeValue());
334                     }
335                     catch (NullPointerException JavaDoc e)
336                     {
337                         url.addArgument(
338                             getAttributeValue(nodeAtts, "name"),
339                             "");
340                     }
341                 }
342                 else if (tag.equalsIgnoreCase("select"))
343                 {
344                     selectName = getAttributeValue(nodeAtts, "name");
345                 }
346                 else if (tag.equalsIgnoreCase("option"))
347                 {
348                     String JavaDoc value = getAttributeValue(nodeAtts, "value");
349                     if (value == null)
350                     {
351                         try
352                         {
353                             value = tempNode.getFirstChild().getNodeValue();
354                         }
355                         catch (NullPointerException JavaDoc e)
356                         {
357                             value = "";
358                         }
359                     }
360                     url.addArgument(selectName, value);
361                 }
362             }
363             else if (tag.equalsIgnoreCase("form"))
364             {
365                 try
366                 {
367                     urlConfigs.add(createFormUrlConfig(tempNode, context));
368                     inForm = true;
369                 }
370                 catch (MalformedURLException JavaDoc e)
371                 {
372                     inForm = false;
373                 }
374                 // I can't see the point for this code being here. Looks like
375
// a really obscure performance optimization feature :-)
376
// Seriously: I'll comment it out... I just don't dare to
377
// remove it completely, in case there *is* a reason.
378
/*try
379                 {
380                     Thread.sleep(5000);
381                 }
382                 catch (Exception e)
383                 {
384                 }*/

385             }
386         }
387         catch (Exception JavaDoc ex)
388         {
389             log.warn("Some bad HTML " + printNode(tempNode), ex);
390         }
391         NodeList JavaDoc childNodes = tempNode.getChildNodes();
392         for (int x = 0; x < childNodes.getLength(); x++)
393         {
394             inForm =
395                 recurseForm(
396                     childNodes.item(x),
397                     urlConfigs,
398                     context,
399                     selectName,
400                     inForm);
401         }
402         return inForm;
403     }
404
405     private static String JavaDoc getAttributeValue(NamedNodeMap JavaDoc att, String JavaDoc attName)
406     {
407         try
408         {
409             return att.getNamedItem(attName).getNodeValue();
410         }
411         catch (Exception JavaDoc ex)
412         {
413             return "";
414         }
415     }
416
417     private static String JavaDoc printNode(Node JavaDoc node)
418     {
419         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
420         buf.append("<");
421         buf.append(node.getNodeName());
422         NamedNodeMap JavaDoc atts = node.getAttributes();
423         for (int x = 0; x < atts.getLength(); x++)
424         {
425             buf.append(" ");
426             buf.append(atts.item(x).getNodeName());
427             buf.append("=\"");
428             buf.append(atts.item(x).getNodeValue());
429             buf.append("\"");
430         }
431
432         buf.append(">");
433
434         return buf.toString();
435     }
436     private static HTTPSampler createFormUrlConfig(
437         Node JavaDoc tempNode,
438         URL JavaDoc context)
439         throws MalformedURLException JavaDoc
440     {
441         NamedNodeMap JavaDoc atts = tempNode.getAttributes();
442         if (atts.getNamedItem("action") == null)
443         {
444             throw new MalformedURLException JavaDoc();
445         }
446         String JavaDoc action = atts.getNamedItem("action").getNodeValue();
447         HTTPSampler url = createUrlFromAnchor(action, context);
448         return url;
449     }
450     
451 ///////////////////// Start of Test Code /////////////////
452

453 // TODO: need more tests
454

455     public static class Test extends JMeterTestCase
456     {
457
458         public Test(String JavaDoc name)
459         {
460             super(name);
461         }
462
463         protected void setUp()
464         {
465         }
466         
467         public void testGetParser() throws Exception JavaDoc
468         {
469             getParser();
470         }
471         public void testGetDom() throws Exception JavaDoc
472         {
473             getDOM("<HTML></HTML>");
474             getDOM("");
475         }
476         public void testIsArgumentMatched() throws Exception JavaDoc
477         {
478             Argument arg = new Argument();
479             Argument argp = new Argument();
480             assertTrue(isArgumentMatched(arg,argp));
481
482             arg = new Argument("test","abcd");
483             argp = new Argument("test","a.*d");
484             assertTrue(isArgumentMatched(arg,argp));
485
486             arg = new Argument("test","abcd");
487             argp = new Argument("test","a.*e");
488             assertFalse(isArgumentMatched(arg,argp));
489         }
490     }
491 }
492
Popular Tags