1 37 package org.webharvest.runtime; 38 39 import org.apache.log4j.Logger; 40 import org.webharvest.definition.IElementDef; 41 import org.webharvest.definition.ScraperConfiguration; 42 import org.webharvest.runtime.processors.BaseProcessor; 43 import org.webharvest.runtime.processors.CallProcessor; 44 import org.webharvest.runtime.processors.HttpProcessor; 45 import org.webharvest.runtime.processors.ProcessorResolver; 46 import org.webharvest.runtime.scripting.ScriptEngine; 47 import org.webharvest.runtime.variables.IVariable; 48 import org.webharvest.runtime.variables.NodeVariable; 49 import org.webharvest.runtime.web.HttpClientManager; 50 import org.webharvest.utils.CommonUtil; 51 import org.webharvest.utils.Stack; 52 53 import java.util.Iterator ; 54 import java.util.List ; 55 56 59 public class Scraper { 60 61 protected static Logger log = Logger.getLogger(Scraper.class); 62 63 private ScraperConfiguration configuration; 64 private String workingDir; 65 private ScraperContext context; 66 67 private transient boolean isDebugMode = false; 68 69 private HttpClientManager httpClientManager; 70 71 private transient Stack runningFunctions = new Stack(); 73 74 private transient Stack runningHttpProcessors = new Stack(); 76 77 private transient int runningLevel = 1; 79 80 ScriptEngine scriptEngine = null; 82 83 88 public Scraper(ScraperConfiguration configuration, String workingDir) { 89 this.configuration = configuration; 90 this.workingDir = CommonUtil.adaptFilename(workingDir); 91 92 this.httpClientManager = new HttpClientManager(); 93 94 this.context = new ScraperContext(); 95 this.scriptEngine = new ScriptEngine(this.context); 96 } 97 98 105 public void addVariableToContext(String name, Object value) { 106 this.context.put(name, new NodeVariable(value)); 107 } 108 109 public IVariable execute(List ops) { 110 Iterator it = ops.iterator(); 111 while (it.hasNext()) { 112 IElementDef elementDef = (IElementDef) it.next(); 113 BaseProcessor processor = ProcessorResolver.createProcessor(elementDef); 114 115 if (processor != null) { 116 processor.run(this, context); 117 } 118 } 119 120 return new NodeVariable(""); 121 } 122 123 public void execute() { 124 long startTime = System.currentTimeMillis(); 125 execute( configuration.getOperations() ); 126 log.info("Configuration executed in " + (System.currentTimeMillis() - startTime) + "ms."); 127 } 128 129 public ScraperContext getContext() { 130 return context; 131 } 132 133 public ScraperConfiguration getConfiguration() { 134 return configuration; 135 } 136 137 public String getWorkingDir() { 138 return this.workingDir; 139 } 140 141 public HttpClientManager getHttpClientManager() { 142 return httpClientManager; 143 } 144 145 public void addRunningFunction(CallProcessor callProcessor) { 146 runningFunctions.push(callProcessor); 147 } 148 149 public CallProcessor getRunningFunction() { 150 return (CallProcessor) runningFunctions.peek(); 151 } 152 153 public void removeRunningFunction() { 154 if (runningFunctions.size() > 0) { 155 runningFunctions.pop(); 156 } 157 } 158 159 public HttpProcessor getRunningHttpProcessor() { 160 return (HttpProcessor) runningHttpProcessors.peek(); 161 } 162 163 public void setRunningHttpProcessor(HttpProcessor httpProcessor) { 164 runningHttpProcessors.push(httpProcessor); 165 } 166 167 public void removeRunningHttpProcessor() { 168 if (runningHttpProcessors.size() > 0) { 169 runningHttpProcessors.pop(); 170 } 171 } 172 public void increaseRunningLevel() { 173 this.runningLevel++; 174 } 175 176 public void decreaseRunningLevel() { 177 this.runningLevel--; 178 } 179 180 public int getRunningLevel() { 181 return runningLevel; 182 } 183 184 public boolean isDebugMode() { 185 return isDebugMode; 186 } 187 188 public void setDebug(boolean debug) { 189 this.isDebugMode = debug; 190 } 191 192 public ScriptEngine getScriptEngine() { 193 return runningFunctions.size() > 0 ? getRunningFunction().getScriptEngine() : this.scriptEngine; 194 } 195 196 } | Popular Tags |