KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > taglibs > scrape > PageTag


1 /*
2  * Copyright 1999,2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.taglibs.scrape;
18
19 import java.util.*;
20 import java.io.*;
21 import java.net.*;
22 import javax.servlet.jsp.*;
23 import javax.servlet.jsp.tagext.*;
24 import org.apache.taglibs.scrape.*;
25
26 /**
27  * PageTag - JSP tag <b>page</b> is used for generating an http request to a web
28  * page, it then scrapes the page for user specified text and stores
29  * the results from the scrape for later retrieval by another tag.
30  * <p>
31  * JSP Tag Lib Descriptor
32  * <p><pre>
33  * &lt;name&gt;page&lt;/name&gt;
34  * &lt;tagclass&gt;org.apache.taglibs.scrape.PageTag&lt/tagclass&gt;
35  * &lt;bodycontent&gtJSP&lt;/bodycontent&gt;
36  * &lt;info&gt;Set the page that will be scraped&lt;/info&gt;
37  *
38  * &lt;attribute&gt;
39  * &gt;name&gt;url&lt;/name&gt;
40  * &lt;required&gt;false&lt;/required&gt;
41  * &lt;rtexprval&gt;false&lt;/rtsprval&gt;
42  * &lt;/attribute&gt;
43  * &lt;attribute&gt;
44  * &lt; name&gt;time&lt;/name&gt;
45  * &lt;required&gt;false&lt;/required&gt;
46  * &lt;rtexprval&gt;false&lt;/rtexprval&gt;
47  * &lt;/attribute&gt;
48  * &lt;attribute&gt;
49  * &gt;name&gt;useProxy&lt;/name&gt;
50  * &lt;required&gt;false&lt;/required&gt;
51  * &lt;rtexprval&gt;false&lt;/rtsprval&gt;
52  * &lt;/attribute&gt;
53  * &lt;attribute&gt;
54  * &gt;name&gt;proxyPort&lt;/name&gt;
55  * &lt;required&gt;false&lt;/required&gt;
56  * &lt;rtexprval&gt;false&lt;/rtsprval&gt;
57  * &lt;/attribute&gt;
58  * &lt;attribute&gt;
59  * &lt; name&gt;proxyServer&lt;/name&gt;
60  * &lt;required&gt;false&lt;/required&gt;
61  * &lt;rtexprval&gt;false&lt;/rtexprval&gt;
62  * &lt;/attribute&gt;
63  * &lt;attribute&gt;
64  * &gt;name&gt;proxyName&lt;/name&gt;
65  * &lt;required&gt;false&lt;/required&gt;
66  * &lt;rtexprval&gt;false&lt;/rtsprval&gt;
67  * &lt;/attribute&gt;
68  * &lt;attribute&gt;
69  * &lt; name&gt;proxyPass&lt;/name&gt;
70  * &lt;required&gt;false&lt;/required&gt;
71  * &lt;rtexprval&gt;false&lt;/rtexprval&gt;
72  * &lt;/attribute&gt;
73  * &lt;attribute&gt;
74  * &lt; name&gt;proxyEncode&lt;/name&gt;
75  * &lt;required&gt;false&lt;/required&gt;
76  * &lt;rtexprval&gt;false&lt;/rtexprval&gt;
77  * &lt;/attribute&gt;
78  * &lt;attribute&gt;
79  * &lt; name&gt;charset&lt;/name&gt;
80  * &lt;required&gt;false&lt;/required&gt;
81  * &lt;rtexprval&gt;true&lt;/rtexprval&gt;
82  * &lt;/attribute&gt;
83  * </pre></p></p>
84  *
85  * @author Rich Catlett
86  *
87  * @version 1.0
88  *
89  * @see PageData
90  * @see ScrapeData
91  *
92  */

93 public class PageTag extends TagSupport {
94
95     // url for the web page the user wants to scrape it gets stored in a
96
// pagedata object
97
private String JavaDoc url;
98     //default time before http connection is created again
99
private long time = 600000;
100     // holds an instance of the application scope data object that stores the
101
// data on this scrape
102
private PageData pagedata;
103     // the port to use for the proxy connection
104
private int pport = -1;
105     // the proxy server to use for the connection
106
private String JavaDoc pserver = null;
107     // username for proxy server if it requires basic authentication
108
private String JavaDoc pname = null;
109     // password for proxy server if it requires basic authentication
110
private String JavaDoc ppass = null;
111     // boolean value determines if the connection to to travel via a secure
112
// connection
113
private boolean ssl = false;
114     // the password to the client keystore for client side ssl authentication
115
private String JavaDoc sslpass = null;
116     // charset of the page scrapped
117
private String JavaDoc charset = null;
118
119
120     /**
121      * implementation of method from the tag interface that tells the JSP what
122      * to do upon encounteringa the start tag for this tag
123      *
124      * @throws JspException thrown when an error occurs with client request
125      * processing
126      *
127      * @return integer value that tells the JSP engine to evaluate the body
128      * of this tag
129      *
130      */

131     public final int doStartTag() throws JspException {
132         // if attribute url was given get pagedata object keyed to url from
133
// static hashmap
134
if (url != null)
135         getPage();
136     return EVAL_BODY_INCLUDE; // evaluate the body
137
}
138
139     /**
140      * implementation of method from the Tag interface that tells the JSP what
141      * to do upon encountering the end tag for this tag
142      *
143      * @throws JspException thrown when error occurs in processing the body of
144      * this method
145      *
146      * @return integer value telling the JSP engine to evaluate the rest of the
147      * jsp page
148      *
149      */

150     public final int doEndTag() throws JspException {
151         // attempt to scrape from the page named by url
152
pagedata.scrapePage(url, time, pageContext, charset);
153     // put scrape results in the pagescope for access by result tag
154
putScrapes();
155     return EVAL_PAGE;
156     }
157
158     /**
159      * method gets the page object from the from the static hashmap keyed to the
160      * url
161      *
162      */

163     public final void getPage() {
164       //pagedata = PageData.getPage(url, pport, pserver, ssl, pname, ppass);
165
pagedata = PageData.getPage(url, pport, pserver, pname, ppass);
166     }
167
168     /**
169      * sets the time user would like the tag to wait before making a new http
170      * connection default is 10 minutes. method is used by the JSP container
171      * to set the time attribute given in the page tag
172      *
173      * @param string time in minutes must be greater than 10
174      *
175      */

176     public final void setTime(String JavaDoc wait) throws JspException {
177     long temp; // temporary variable for converting value
178
try {
179         Long JavaDoc num = new Long JavaDoc(wait);
180         temp = num.longValue();
181         if (temp > 10) {
182         time = temp * 60000;
183         }
184     } catch(NumberFormatException JavaDoc nfe) {
185         // throw a JspException so that jsp page author can see error
186
throw new JspException("Scrape: Page tag: the time attribute needs"
187                                   + " to be an integer");
188     }
189     }
190
191     /**
192      * sets the url for the http request. method is used by the JSP container
193      * to set the time attribute given in the page tag
194      *
195      * @param url the url for the http request
196      *
197      */

198     public final void setUrl(String JavaDoc url) throws JspException {
199     this.url = url.trim();
200     if (url.startsWith("https"))
201         ssl = true;
202     }
203
204     /**
205      * set the value of proxy port
206      *
207      * @param value the proxy port to use for the connection as a String
208      *
209      */

210     public final void setuseProxy(String JavaDoc value) throws JspException {
211     if (value.equalsIgnoreCase("true")) {
212         pserver = System.getProperty("http.proxyHost");
213         pport = Integer.getInteger("http.proxyPort").intValue();
214     }
215     }
216
217     /**
218      * set the value of proxy port
219      *
220      * @param value the proxy port to use for the connection as a String
221      *
222      */

223     public final void setProxyPort(String JavaDoc value) throws JspException {
224     try {
225         pport = new Integer JavaDoc(value).intValue();
226     } catch(NumberFormatException JavaDoc nfe) {
227         // throw a JspException so that jsp page author can see error
228
throw new JspException("Scrape: Page tag: the proxyPort attribute needs"
229                                   + " to be an integer");
230     }
231     }
232
233     /**
234      * set the value of proxy server
235      *
236      * @param value the proxy server to use for the connection
237      *
238      */

239     public final void setProxyServer(String JavaDoc value) {
240     pserver = value;
241     }
242
243     /**
244      * set the value of the password for authentication to the proxy server
245      *
246      * @param value the proxy port to use for the connection as a String
247      *
248      */

249     public final void setProxyPass(String JavaDoc value) {
250     ppass = value;
251     }
252
253     /**
254      * set the value of the username for authentication to the proxy server
255      *
256      * @param value the proxy server to use for the connection
257      *
258      */

259     public final void setProxyName(String JavaDoc value) {
260     pname = value;
261     }
262
263     /**
264      * set the pass word to access the client keystore
265      *
266      * @param value password to the client keystore
267      *
268      */

269     public final void setClientPass(String JavaDoc value) {
270     sslpass = value;
271     }
272
273     /**
274      * set the name and value of any extra headers to be sent
275      *
276      * @param name string that is the name of an extra header to be sent
277      * @param value string that is the value of an extra header to be sent
278      */

279     public final void setHeader(String JavaDoc name, String JavaDoc value) {
280     pagedata.setHeader(name, value);
281     }
282
283     /**
284      * set the value of the charset to be used
285      *
286      * @param value charset to be used to scrape the page
287      *
288      */

289     public final void setCharset(String JavaDoc value) {
290     charset = value;
291     }
292     /**
293      * method sets the scrapedata object in the hashmap scrapes in the
294      * application scope pagedata object
295      *
296      * @param id unique identifier of the scrape the following attributes
297      * define
298      * @param begin beginning anchor for the scrape refered to by id
299      * @param end ending anchor for the scrape refered to by id
300      * @param anchors boolean flag that determines if begin and end anchors are
301      * part of the result
302      * @param strip boolean flag that determines if tags are to be striped from
303      * the result
304      *
305      */

306     public final void setScrape(String JavaDoc id, String JavaDoc begin, String JavaDoc end,
307                              String JavaDoc anchors, String JavaDoc strip) throws JspException {
308
309     // set the scrape in the hashmap scrapes in PageData
310
pagedata.setScrape(id, begin, end, anchors, strip);
311     }
312
313     /**
314      * method to trace through the hashmap of scrapes and place the result from
315      * each scrape in the pagescope for access by the result tag
316      *
317      */

318     private final void putScrapes() {
319         // set of keys for the hashmap scrapes
320
Set scrapedatakeys = pagedata.getKeySet();
321         // iterators for scrapedatakeys two are needed one for getting
322
// scrapedata object and one for setting result in pagescope
323
Iterator scrapesit1 = scrapedatakeys.iterator();
324         Iterator scrapesit2 = scrapedatakeys.iterator();
325
326         // iterate through the scrapedata objects and perform a scrape for each
327
// one
328
while(scrapesit1.hasNext()) {
329
330         // get result from scrapedata object and place in pagescope
331
pageContext.setAttribute((String JavaDoc)scrapesit2.next(),
332         pagedata.getScrape((String JavaDoc)scrapesit1.next()).getResult());
333     }
334     }
335 }
336
Popular Tags