1 23 package org.archive.crawler.datamodel.credential; 24 25 import java.util.HashMap ; 26 import java.util.Iterator ; 27 import java.util.Map ; 28 import java.util.logging.Logger ; 29 30 import javax.management.Attribute ; 31 import javax.management.AttributeNotFoundException ; 32 33 import org.apache.commons.httpclient.HttpClient; 34 import org.apache.commons.httpclient.HttpMethod; 35 import org.apache.commons.httpclient.HttpMethodBase; 36 import org.apache.commons.httpclient.NameValuePair; 37 import org.apache.commons.httpclient.URIException; 38 import org.apache.commons.httpclient.methods.GetMethod; 39 import org.apache.commons.httpclient.methods.PostMethod; 40 import org.archive.crawler.datamodel.CrawlURI; 41 import org.archive.crawler.settings.MapType; 42 import org.archive.crawler.settings.SimpleType; 43 import org.archive.crawler.settings.Type; 44 import org.archive.net.UURI; 45 import org.archive.net.UURIFactory; 46 47 48 49 55 public class HtmlFormCredential extends Credential { 56 57 private static final long serialVersionUID = -4732570804435453949L; 58 59 private static final Logger logger = 60 Logger.getLogger(HtmlFormCredential.class.getName()); 61 62 private static final String ATTR_LOGIN_URI = "login-uri"; 63 private static final String ATTR_FORM_ITEMS = "form-items"; 64 private static final String ATTR_FORM_METHOD = "http-method"; 65 private static final String [] METHODS = {"POST", "GET"}; 66 67 75 public HtmlFormCredential(final String name) 76 { 77 super(name, "Credential that has all necessary" + 78 " for running a POST/GET to an HTML login form."); 79 80 Type t = addElementToDefinition(new SimpleType("login-uri", 81 "Full URI of page that contains the HTML login form we're to" + 82 " apply these credentials too: E.g. http://www.archive.org", "")); 83 t.setOverrideable(false); 84 t.setExpertSetting(true); 85 86 87 t = addElementToDefinition(new SimpleType(ATTR_FORM_METHOD, 88 "GET or POST", METHODS[0], METHODS)); 89 t.setOverrideable(false); 90 t.setExpertSetting(true); 91 92 t = addElementToDefinition(new MapType(ATTR_FORM_ITEMS, "Form items.", 93 String .class)); 94 t.setOverrideable(false); 95 t.setExpertSetting(true); 96 } 97 98 103 public String getLoginUri(final CrawlURI context) 104 throws AttributeNotFoundException { 105 return (String )getAttribute(ATTR_LOGIN_URI, context); 106 } 107 108 113 public String getHttpMethod(final CrawlURI context) 114 throws AttributeNotFoundException { 115 return (String )getAttribute(ATTR_FORM_METHOD, context); 116 } 117 118 123 public Map <String ,Object > getFormItems(final CrawlURI context) 124 throws AttributeNotFoundException { 125 Map <String ,Object > result = null; 126 MapType items = (MapType)getAttribute(ATTR_FORM_ITEMS, context); 127 if (items != null) { 128 for (Iterator i = items.iterator(context); i.hasNext();) { 129 Attribute a = (Attribute )i.next(); 130 if (result == null) { 131 result = new HashMap <String ,Object >(); 132 } 133 result.put(a.getName(), a.getValue()); 134 } 135 } 136 return result; 137 } 138 139 public boolean isPrerequisite(final CrawlURI curi) { 140 boolean result = false; 141 String curiStr = curi.getUURI().toString(); 142 String loginUri = getPrerequisite(curi); 143 if (loginUri != null) { 144 try { 145 UURI uuri = UURIFactory.getInstance(curi.getUURI(), loginUri); 146 if (uuri != null && curiStr != null && 147 uuri.toString().equals(curiStr)) { 148 result = true; 149 if (!curi.isPrerequisite()) { 150 curi.setPrerequisite(true); 151 logger.fine(curi + " is prereq."); 152 } 153 } 154 } catch (URIException e) { 155 logger.severe("Failed to uuri: " + curi + ", " + 156 e.getMessage()); 157 } 158 } 159 return result; 160 } 161 162 public boolean hasPrerequisite(CrawlURI curi) { 163 return getPrerequisite(curi) != null; 164 } 165 166 public String getPrerequisite(CrawlURI curi) { 167 String loginUri = null; 168 try { 169 loginUri = getLoginUri(curi); 170 } catch (AttributeNotFoundException e) { 171 logger.severe("Failed to getLoginUri: " + this + ", " + curi + "," 172 + e.getMessage()); 173 } 177 return loginUri; 178 } 179 180 public String getKey(CrawlURI curi) throws AttributeNotFoundException { 181 return getLoginUri(curi); 182 } 183 184 public boolean isEveryTime() { 185 return false; 187 } 188 189 public boolean populate(CrawlURI curi, HttpClient http, HttpMethod method, 190 String payload) { 191 boolean result = false; 194 Map formItems = null; 195 try { 196 formItems = getFormItems(curi); 197 } 198 catch (AttributeNotFoundException e1) { 199 logger.severe("Failed get of form items for " + curi); 200 } 201 if (formItems == null || formItems.size() <= 0) { 202 try { 203 logger.severe("No form items for " + method.getURI()); 204 } 205 catch (URIException e) { 206 logger.severe("No form items and exception getting uri: " + 207 e.getMessage()); 208 } 209 return result; 210 } 211 212 NameValuePair[] data = new NameValuePair[formItems.size()]; 213 int index = 0; 214 String key = null; 215 for (Iterator i = formItems.keySet().iterator(); i.hasNext();) { 216 key = (String )i.next(); 217 data[index++] = new NameValuePair(key, (String )formItems.get(key)); 218 } 219 if (method instanceof PostMethod) { 220 ((PostMethod)method).setRequestBody(data); 221 result = true; 222 } else if (method instanceof GetMethod) { 223 HttpMethodBase hmb = (HttpMethodBase)method; 227 String currentQuery = hmb.getQueryString(); 228 hmb.setQueryString(data); 229 String newQuery = hmb.getQueryString(); 230 hmb.setQueryString(((currentQuery != null)? currentQuery: "") + 231 "&" + newQuery); 232 result = true; 233 } else { 234 logger.severe("Unknown method type: " + method); 235 } 236 return result; 237 } 238 239 public boolean isPost(CrawlURI curi) { 240 String method = null; 241 try { 242 method = getHttpMethod(curi); 243 } 244 catch (AttributeNotFoundException e) { 245 logger.severe("Failed to get method for " + curi + ", " + this); 246 } 247 return method != null && method.equalsIgnoreCase("POST"); 248 } 249 } 250 | Popular Tags |