KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > datamodel > credential > HtmlFormCredential


1 /* HtmlFormCredential
2  *
3  * Created on Apr 7, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.datamodel.credential;
24
25 import java.util.HashMap JavaDoc;
26 import java.util.Iterator JavaDoc;
27 import java.util.Map JavaDoc;
28 import java.util.logging.Logger JavaDoc;
29
30 import javax.management.Attribute JavaDoc;
31 import javax.management.AttributeNotFoundException JavaDoc;
32
33 import org.apache.commons.httpclient.HttpClient;
34 import org.apache.commons.httpclient.HttpMethod;
35 import org.apache.commons.httpclient.HttpMethodBase;
36 import org.apache.commons.httpclient.NameValuePair;
37 import org.apache.commons.httpclient.URIException;
38 import org.apache.commons.httpclient.methods.GetMethod;
39 import org.apache.commons.httpclient.methods.PostMethod;
40 import org.archive.crawler.datamodel.CrawlURI;
41 import org.archive.crawler.settings.MapType;
42 import org.archive.crawler.settings.SimpleType;
43 import org.archive.crawler.settings.Type;
44 import org.archive.net.UURI;
45 import org.archive.net.UURIFactory;
46
47
48
49 /**
50  * Credential that holds all needed to do a GET/POST to a HTML form.
51  *
52  * @author stack
53  * @version $Revision: 1.14.16.1 $, $Date: 2007/01/13 01:31:12 $
54  */

55 public class HtmlFormCredential extends Credential {
56
57     private static final long serialVersionUID = -4732570804435453949L;
58
59     private static final Logger JavaDoc logger =
60         Logger.getLogger(HtmlFormCredential.class.getName());
61
62     private static final String JavaDoc ATTR_LOGIN_URI = "login-uri";
63     private static final String JavaDoc ATTR_FORM_ITEMS = "form-items";
64     private static final String JavaDoc ATTR_FORM_METHOD = "http-method";
65     private static final String JavaDoc [] METHODS = {"POST", "GET"};
66
67     /**
68      * Constructor.
69      *
70      * A constructor that takes name of the credential is required by settings
71      * framework.
72      *
73      * @param name Name of this credential.
74      */

75     public HtmlFormCredential(final String JavaDoc name)
76     {
77         super(name, "Credential that has all necessary" +
78             " for running a POST/GET to an HTML login form.");
79
80         Type t = addElementToDefinition(new SimpleType("login-uri",
81             "Full URI of page that contains the HTML login form we're to" +
82             " apply these credentials too: E.g. http://www.archive.org", ""));
83         t.setOverrideable(false);
84         t.setExpertSetting(true);
85
86
87         t = addElementToDefinition(new SimpleType(ATTR_FORM_METHOD,
88             "GET or POST", METHODS[0], METHODS));
89         t.setOverrideable(false);
90         t.setExpertSetting(true);
91
92         t = addElementToDefinition(new MapType(ATTR_FORM_ITEMS, "Form items.",
93             String JavaDoc.class));
94         t.setOverrideable(false);
95         t.setExpertSetting(true);
96     }
97
98     /**
99      * @param context CrawlURI context to use.
100      * @return login-uri.
101      * @throws AttributeNotFoundException
102      */

103     public String JavaDoc getLoginUri(final CrawlURI context)
104             throws AttributeNotFoundException JavaDoc {
105         return (String JavaDoc)getAttribute(ATTR_LOGIN_URI, context);
106     }
107
108     /**
109      * @param context CrawlURI context to use.
110      * @return login-uri.
111      * @throws AttributeNotFoundException
112      */

113     public String JavaDoc getHttpMethod(final CrawlURI context)
114             throws AttributeNotFoundException JavaDoc {
115         return (String JavaDoc)getAttribute(ATTR_FORM_METHOD, context);
116     }
117
118     /**
119      * @param context CrawlURI context to use.
120      * @return Form inputs as convenient map. Returns null if no form items.
121      * @throws AttributeNotFoundException
122      */

123     public Map JavaDoc<String JavaDoc,Object JavaDoc> getFormItems(final CrawlURI context)
124             throws AttributeNotFoundException JavaDoc {
125         Map JavaDoc<String JavaDoc,Object JavaDoc> result = null;
126         MapType items = (MapType)getAttribute(ATTR_FORM_ITEMS, context);
127         if (items != null) {
128             for (Iterator JavaDoc i = items.iterator(context); i.hasNext();) {
129                 Attribute JavaDoc a = (Attribute JavaDoc)i.next();
130                 if (result == null) {
131                     result = new HashMap JavaDoc<String JavaDoc,Object JavaDoc>();
132                 }
133                 result.put(a.getName(), a.getValue());
134             }
135         }
136         return result;
137     }
138
139     public boolean isPrerequisite(final CrawlURI curi) {
140         boolean result = false;
141         String JavaDoc curiStr = curi.getUURI().toString();
142         String JavaDoc loginUri = getPrerequisite(curi);
143         if (loginUri != null) {
144             try {
145                 UURI uuri = UURIFactory.getInstance(curi.getUURI(), loginUri);
146                 if (uuri != null && curiStr != null &&
147                     uuri.toString().equals(curiStr)) {
148                     result = true;
149                     if (!curi.isPrerequisite()) {
150                         curi.setPrerequisite(true);
151                         logger.fine(curi + " is prereq.");
152                     }
153                 }
154             } catch (URIException e) {
155                 logger.severe("Failed to uuri: " + curi + ", " +
156                     e.getMessage());
157             }
158         }
159         return result;
160     }
161
162     public boolean hasPrerequisite(CrawlURI curi) {
163         return getPrerequisite(curi) != null;
164     }
165
166     public String JavaDoc getPrerequisite(CrawlURI curi) {
167         String JavaDoc loginUri = null;
168         try {
169             loginUri = getLoginUri(curi);
170         } catch (AttributeNotFoundException JavaDoc e) {
171             logger.severe("Failed to getLoginUri: " + this + ", " + curi + ","
172                 + e.getMessage());
173             // Not much I can do here. What if I fail every time? Then
174
// this prereq. will not ever be processed. We'll never get on to
175
// this server.
176
}
177         return loginUri;
178     }
179
180     public String JavaDoc getKey(CrawlURI curi) throws AttributeNotFoundException JavaDoc {
181         return getLoginUri(curi);
182     }
183
184     public boolean isEveryTime() {
185         // This authentication is one time only.
186
return false;
187     }
188
189     public boolean populate(CrawlURI curi, HttpClient http, HttpMethod method,
190             String JavaDoc payload) {
191         // http is not used.
192
// payload is not used.
193
boolean result = false;
194         Map JavaDoc formItems = null;
195         try {
196             formItems = getFormItems(curi);
197         }
198         catch (AttributeNotFoundException JavaDoc e1) {
199             logger.severe("Failed get of form items for " + curi);
200         }
201         if (formItems == null || formItems.size() <= 0) {
202             try {
203                 logger.severe("No form items for " + method.getURI());
204             }
205             catch (URIException e) {
206                 logger.severe("No form items and exception getting uri: " +
207                     e.getMessage());
208             }
209             return result;
210         }
211
212         NameValuePair[] data = new NameValuePair[formItems.size()];
213         int index = 0;
214         String JavaDoc key = null;
215         for (Iterator JavaDoc i = formItems.keySet().iterator(); i.hasNext();) {
216             key = (String JavaDoc)i.next();
217             data[index++] = new NameValuePair(key, (String JavaDoc)formItems.get(key));
218         }
219         if (method instanceof PostMethod) {
220             ((PostMethod)method).setRequestBody(data);
221             result = true;
222         } else if (method instanceof GetMethod) {
223             // Append these values to the query string.
224
// Get current query string, then add data, then get it again
225
// only this time its our data only... then append.
226
HttpMethodBase hmb = (HttpMethodBase)method;
227             String JavaDoc currentQuery = hmb.getQueryString();
228             hmb.setQueryString(data);
229             String JavaDoc newQuery = hmb.getQueryString();
230             hmb.setQueryString(((currentQuery != null)? currentQuery: "") +
231                     "&" + newQuery);
232             result = true;
233         } else {
234             logger.severe("Unknown method type: " + method);
235         }
236         return result;
237     }
238
239     public boolean isPost(CrawlURI curi) {
240         String JavaDoc method = null;
241         try {
242             method = getHttpMethod(curi);
243         }
244         catch (AttributeNotFoundException JavaDoc e) {
245             logger.severe("Failed to get method for " + curi + ", " + this);
246         }
247         return method != null && method.equalsIgnoreCase("POST");
248     }
249 }
250
Popular Tags