1 23 package org.archive.crawler.processor; 24 25 import java.io.File ; 26 import java.io.IOException ; 27 import java.util.Collections ; 28 import java.util.HashMap ; 29 import java.util.Map ; 30 import java.util.logging.Level ; 31 import java.util.logging.Logger ; 32 33 import org.archive.crawler.datamodel.CrawlURI; 34 import org.archive.crawler.datamodel.FetchStatusCodes; 35 import org.archive.crawler.framework.Processor; 36 import org.archive.crawler.settings.SimpleType; 37 import org.archive.crawler.settings.Type; 38 39 import bsh.EvalError; 40 import bsh.Interpreter; 41 42 58 public class BeanShellProcessor extends Processor implements FetchStatusCodes { 59 60 private static final long serialVersionUID = 6926589944337050754L; 61 62 private static final Logger logger = 63 Logger.getLogger(BeanShellProcessor.class.getName()); 64 65 66 public final static String ATTR_SCRIPT_FILE = "script-file"; 67 68 70 public final static String ATTR_ISOLATE_THREADS = "isolate-threads"; 71 72 protected ThreadLocal <Interpreter> threadInterpreter; 73 protected Interpreter sharedInterpreter; 74 public Map <Object ,Object > sharedMap = Collections.synchronizedMap( 75 new HashMap <Object ,Object >()); 76 77 81 public BeanShellProcessor(String name) { 82 super(name, "BeanShellProcessor. Runs the BeanShell script source " + 83 "(supplied directly or via a file path) against the " + 84 "current URI. Source should define a script method " + 85 "'process(curi)' which will be passed the current CrawlURI. " + 86 "The script may also access this BeanShellProcessor via" + 87 "the 'self' variable and the CrawlController via the " + 88 "'controller' variable."); 89 Type t = addElementToDefinition(new SimpleType(ATTR_SCRIPT_FILE, 90 "BeanShell script file", "")); 91 t.setOverrideable(false); 92 t = addElementToDefinition(new SimpleType(ATTR_ISOLATE_THREADS, 93 "Whether each ToeThread should get its own independent " + 94 "script context, or they should share synchronized access " + 95 "to one context. Default is true, meaning each threads " + 96 "gets its own isolated context.", true)); 97 t.setOverrideable(false); 98 99 } 100 101 protected synchronized void innerProcess(CrawlURI curi) { 102 Interpreter interpreter = getInterpreter(); 105 synchronized(interpreter) { 106 try { 109 interpreter.set("curi",curi); 110 interpreter.eval("process(curi)"); 111 } catch (EvalError e) { 112 e.printStackTrace(); 114 } 115 } 116 } 117 118 123 protected Interpreter getInterpreter() { 124 if(sharedInterpreter!=null) { 125 return sharedInterpreter; 126 } 127 Interpreter interpreter = threadInterpreter.get(); 128 if(interpreter==null) { 129 interpreter = newInterpreter(); 130 threadInterpreter.set(interpreter); 131 } 132 return interpreter; 133 } 134 135 142 protected Interpreter newInterpreter() { 143 Interpreter interpreter = new Interpreter(); 144 try { 145 interpreter.set("self", this); 146 interpreter.set("controller", getController()); 147 148 String filePath = (String ) getUncheckedAttribute(null, ATTR_SCRIPT_FILE); 149 if(filePath.length()>0) { 150 try { 151 File file = getSettingsHandler().getPathRelativeToWorkingDirectory(filePath); 152 interpreter.source(file.getPath()); 153 } catch (IOException e) { 154 logger.log(Level.SEVERE,"unable to read script file",e); 155 } 156 } 157 } catch (EvalError e) { 158 e.printStackTrace(); 160 } 161 162 return interpreter; 163 } 164 165 protected void initialTasks() { 166 super.initialTasks(); 167 kickUpdate(); 168 } 169 170 174 public void kickUpdate() { 175 if((Boolean )getUncheckedAttribute(null,ATTR_ISOLATE_THREADS)) { 178 sharedInterpreter = null; 179 threadInterpreter = new ThreadLocal <Interpreter>(); 180 } else { 181 sharedInterpreter = newInterpreter(); 182 threadInterpreter = null; 183 } 184 } 185 } 186 | Popular Tags |