KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > scanners > FormScanner


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/scanners/FormScanner.java,v 1.2 2004/02/10 13:41:09 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser.scanners;
34
35 //////////////////
36
// Java Imports //
37
//////////////////
38
import org.htmlparser.Parser;
39 import org.htmlparser.tags.FormTag;
40 import org.htmlparser.tags.Tag;
41 import org.htmlparser.tags.data.CompositeTagData;
42 import org.htmlparser.tags.data.TagData;
43 import org.htmlparser.util.LinkProcessor;
44 import org.htmlparser.util.ParserException;
45
46 /**
47  * Scans for the Image Tag. This is a subclass of TagScanner, and is called using a
48  * variant of the template method. If the evaluate() method returns true, that means the
49  * given string contains an image tag. Extraction is done by the scan method thereafter
50  * by the user of this class.
51  */

52 public class FormScanner extends CompositeTagScanner
53 {
54     private static final String JavaDoc[] MATCH_ID = { "FORM" };
55     public static final String JavaDoc PREVIOUS_DIRTY_LINK_MESSAGE =
56         "Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this..";
57     private boolean linkScannerAlreadyOpen = false;
58     private static final String JavaDoc[] formTagEnders = { "HTML", "BODY" };
59     /**
60     * HTMLFormScanner constructor comment.
61     */

62     public FormScanner(Parser parser)
63     {
64         this("", parser);
65     }
66     /**
67      * Overriding the constructor to accept the filter
68      */

69     public FormScanner(String JavaDoc filter, Parser parser)
70     {
71         super(filter, MATCH_ID, formTagEnders, false);
72         parser.addScanner(new InputTagScanner("-i"));
73         parser.addScanner(new TextareaTagScanner("-t"));
74         parser.addScanner(new SelectTagScanner("-select"));
75         parser.addScanner(new OptionTagScanner("-option"));
76     }
77
78     /**
79      * Extract the location of the image, given the string to be parsed, and the url
80      * of the html page in which this tag exists.
81      * @param s String to be parsed
82      * @param url URL of web page being parsed
83      */

84     public String JavaDoc extractFormLocn(Tag tag, String JavaDoc url) throws ParserException
85     {
86         try
87         {
88             String JavaDoc formURL = tag.getAttribute("ACTION");
89             if (formURL == null)
90                 return "";
91             else
92                 return (new LinkProcessor()).extract(formURL, url);
93         }
94         catch (Exception JavaDoc e)
95         {
96             String JavaDoc msg;
97             if (tag != null)
98                 msg = tag.getText();
99             else
100                 msg = "";
101             throw new ParserException(
102                 "HTMLFormScanner.extractFormLocn() : Error in extracting form location, tag = "
103                     + msg
104                     + ", url = "
105                     + url,
106                 e);
107         }
108     }
109
110     public String JavaDoc extractFormName(Tag tag)
111     {
112         return tag.getAttribute("NAME");
113     }
114
115     public String JavaDoc extractFormMethod(Tag tag)
116     {
117         String JavaDoc method = tag.getAttribute("METHOD");
118         if (method == null)
119             method = FormTag.GET;
120         return method.toUpperCase();
121
122     }
123
124     /**
125      * Scan the tag and extract the information related to the <IMG> tag. The url of the
126      * initiating scan has to be provided in case relative links are found. The initial
127      * url is then prepended to it to give an absolute link.
128      * The NodeReader is provided in order to do a lookahead operation. We assume that
129      * the identification has already been performed using the evaluate() method.
130      * @param tag HTML Tag to be scanned for identification
131      * @param url The initiating url of the scan (Where the html page lies)
132      * @param reader The reader object responsible for reading the html page
133      * @param currentLine The current line (automatically provided by Tag)
134      */

135     // public Tag scan(Tag tag,String url,NodeReader reader,String currentLine) throws ParserException
136
// {
137
// if (linkScannerAlreadyOpen) {
138
// String newLine = insertEndTagBeforeNode(tag, currentLine);
139
// reader.changeLine(newLine);
140
// return new EndTag(
141
// new TagData(
142
// tag.elementBegin(),
143
// tag.elementBegin()+3,
144
// "A",
145
// currentLine
146
// )
147
// );
148
// }
149
// return super.scan(tag,url,reader,currentLine);
150
// }
151

152     /**
153      * @see org.htmlparser.scanners.TagScanner#getID()
154      */

155     public String JavaDoc[] getID()
156     {
157         return MATCH_ID;
158     }
159
160     public boolean evaluate(String JavaDoc s, TagScanner previousOpenScanner)
161     {
162         if (previousOpenScanner instanceof LinkScanner)
163         {
164             linkScannerAlreadyOpen = true;
165             StringBuffer JavaDoc msg = new StringBuffer JavaDoc();
166             msg.append("<");
167             msg.append(s);
168             msg.append(">");
169             msg.append(PREVIOUS_DIRTY_LINK_MESSAGE);
170             feedback.warning(msg.toString());
171             // This is dirty HTML. Assume the current tag is
172
// not a new link tag - but an end tag. This is actually a really wild bug -
173
// Internet Explorer actually parses such tags.
174
// So - we shall then proceed to fool the scanner into sending an endtag of type </A>
175
// For this - set the dirty flag to true and return
176
}
177         else
178             linkScannerAlreadyOpen = false;
179         return super.evaluate(s, previousOpenScanner);
180     }
181
182     public Tag createTag(TagData tagData, CompositeTagData compositeTagData)
183         throws ParserException
184     {
185         String JavaDoc formUrl =
186             extractFormLocn(
187                 compositeTagData.getStartTag(),
188                 tagData.getUrlBeingParsed());
189         if (formUrl != null && formUrl.length() > 0)
190             compositeTagData.getStartTag().setAttribute("ACTION", formUrl);
191         return new FormTag(tagData, compositeTagData);
192     }
193
194 }
195
Popular Tags