1 37 package org.webharvest.runtime.processors; 38 39 import java.io.UnsupportedEncodingException ; 40 import java.util.*; 41 42 import org.apache.commons.httpclient.NameValuePair; 43 import org.webharvest.definition.HttpDef; 44 import org.webharvest.exception.HttpException; 45 import org.webharvest.runtime.Scraper; 46 import org.webharvest.runtime.ScraperContext; 47 import org.webharvest.runtime.scripting.ScriptEngine; 48 import org.webharvest.runtime.templaters.BaseTemplater; 49 import org.webharvest.runtime.variables.*; 50 import org.webharvest.runtime.web.HttpClientManager; 51 import org.webharvest.runtime.web.HttpResponseWrapper; 52 53 56 public class HttpProcessor extends BaseProcessor { 57 58 private HttpDef httpDef; 59 60 List httpParams = new ArrayList(); 61 Map httpHeaderMap = new HashMap(); 62 63 public HttpProcessor(HttpDef httpDef) { 64 super(httpDef); 65 this.httpDef = httpDef; 66 } 67 68 public IVariable execute(Scraper scraper, ScraperContext context) { 69 scraper.setRunningHttpProcessor(this); 70 71 ScriptEngine scriptEngine = scraper.getScriptEngine(); 72 String url = BaseTemplater.execute( httpDef.getUrl(), scriptEngine); 73 String method = BaseTemplater.execute( httpDef.getMethod(), scriptEngine); 74 String charset = BaseTemplater.execute( httpDef.getCharset(), scriptEngine); 75 String username = BaseTemplater.execute( httpDef.getUsername(), scriptEngine); 76 String password = BaseTemplater.execute( httpDef.getPassword(), scriptEngine); 77 78 if (charset == null) { 79 charset = scraper.getConfiguration().getCharset(); 80 } 81 82 executeBody(httpDef, scraper, context); 84 85 HttpClientManager manager = scraper.getHttpClientManager(); 86 87 HttpResponseWrapper res = manager.execute(method, url, charset, username, password, httpParams, httpHeaderMap); 88 89 scraper.removeRunningHttpProcessor(); 90 91 String mimeType = res.getMimeType(); 92 93 log.info("Downloaded: " + url + ", mime type = " + mimeType + ", length = " + res.getBody().length + "B."); 94 95 IVariable result; 96 97 if (mimeType == null || mimeType.toLowerCase().indexOf("text") == 0) { 98 String text; 99 try { 100 text = new String (res.getBody(), charset); 101 } catch (UnsupportedEncodingException e) { 102 throw new HttpException("Charset " + charset + " is not supported!", e); 103 } 104 105 result = new NodeVariable(text); 106 } else { 107 result = new NodeVariable( res.getBody() ); 108 } 109 110 return result; 111 } 112 113 protected void addHttpParam(String name, String value) { 114 httpParams.add( new NameValuePair(name, value) ); 115 } 116 117 protected void addHttpHeader(String name, String value) { 118 httpHeaderMap.put(name, value); 119 } 120 121 } | Popular Tags |