KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > BeanShellDecideRule


1 /* BeanShellDecideRule
2 *
3 * $Id: BeanShellDecideRule.java,v 1.4.2.1 2007/01/13 01:31:13 stack-sf Exp $
4 *
5 * Created on Aug 7, 2006
6 *
7 * Copyright (C) 2006 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.crawler.deciderules;
26
27 import java.io.File JavaDoc;
28 import java.io.IOException JavaDoc;
29 import java.util.Collections JavaDoc;
30 import java.util.HashMap JavaDoc;
31 import java.util.Map JavaDoc;
32 import java.util.logging.Level JavaDoc;
33 import java.util.logging.Logger JavaDoc;
34
35 import org.archive.crawler.settings.SimpleType;
36 import org.archive.crawler.settings.Type;
37
38 import bsh.EvalError;
39 import bsh.Interpreter;
40
41
42 /**
43  * Rule which runs a groovy script to make its decision.
44  *
45  * Script source may be provided via a file local to the crawler.
46  *
47  * Variables available to the script include 'object' (the object to be
48  * evaluated, typically a CandidateURI or CrawlURI), 'self'
49  * (this GroovyDecideRule instance), and 'controller' (the crawl's
50  * CrawlController instance).
51  *
52  * TODO: reduce copy & paste with GroovyProcessor
53  *
54  * @author gojomo
55  */

56 public class BeanShellDecideRule extends DecideRule {
57
58     private static final long serialVersionUID = -8433859929199308527L;
59
60     private static final Logger JavaDoc logger =
61         Logger.getLogger(BeanShellDecideRule.class.getName());
62     
63     /** setting for script file */
64     public final static String JavaDoc ATTR_SCRIPT_FILE = "script-file";
65
66     /** whether each thread should have its own script runner (true), or
67      * they should share a single script runner with synchronized access */

68     public final static String JavaDoc ATTR_ISOLATE_THREADS = "isolate-threads";
69
70     protected ThreadLocal JavaDoc<Interpreter> threadInterpreter =
71         new ThreadLocal JavaDoc<Interpreter>();;
72     protected Interpreter sharedInterpreter;
73     public Map JavaDoc<Object JavaDoc,Object JavaDoc> sharedMap =
74         Collections.synchronizedMap(new HashMap JavaDoc<Object JavaDoc,Object JavaDoc>());
75     protected boolean initialized = false;
76     
77     public BeanShellDecideRule(String JavaDoc name) {
78         super(name);
79         setDescription("BeanShellDecideRule. Runs the BeanShell script " +
80                 "source (supplied via a file path) against " +
81                 "the current URI. Source should define a script method " +
82                 "'decisionFor(object)' which will be passed the object" +
83                 "to be evaluated and returns one of self.ACCEPT, " +
84                 "self.REJECT, or self.PASS. " +
85                 "The script may access this BeanShellDecideRule via" +
86                 "the 'self' variable and the CrawlController via the " +
87                 "'controller' variable. Runs the groovy script source " +
88                 "(supplied via a file path) against the " +
89                 "current URI.");
90         Type t = addElementToDefinition(new SimpleType(ATTR_SCRIPT_FILE,
91                 "BeanShell script file", ""));
92         t.setOverrideable(false);
93         t = addElementToDefinition(new SimpleType(ATTR_ISOLATE_THREADS,
94                 "Whether each ToeThread should get its own independent " +
95                 "script context, or they should share synchronized access " +
96                 "to one context. Default is true, meaning each threads " +
97                 "gets its own isolated context.", true));
98         t.setOverrideable(false);
99     }
100
101     public synchronized Object JavaDoc decisionFor(Object JavaDoc object) {
102         // depending on previous configuration, interpreter may
103
// be local to this thread or shared
104
Interpreter interpreter = getInterpreter();
105         synchronized(interpreter) {
106             // synchronization is harmless for local thread interpreter,
107
// necessary for shared interpreter
108
try {
109                 interpreter.set("object",object);
110                 return interpreter.eval("decisionFor(object)");
111             } catch (EvalError e) {
112                 // TODO Auto-generated catch block
113
e.printStackTrace();
114                 return PASS;
115             }
116         }
117     }
118
119     /**
120      * Get the proper Interpreter instance -- either shared or local
121      * to this thread.
122      * @return Interpreter to use
123      */

124     protected Interpreter getInterpreter() {
125         if(sharedInterpreter==null
126            && !(Boolean JavaDoc)getUncheckedAttribute(null,ATTR_ISOLATE_THREADS)) {
127             // initialize
128
sharedInterpreter = newInterpreter();
129         }
130         if(sharedInterpreter!=null) {
131             return sharedInterpreter;
132         }
133         Interpreter interpreter = threadInterpreter.get();
134         if(interpreter==null) {
135             interpreter = newInterpreter();
136             threadInterpreter.set(interpreter);
137         }
138         return interpreter;
139     }
140
141     /**
142      * Create a new Interpreter instance, preloaded with any supplied
143      * source file and the variables 'self' (this
144      * BeanShellProcessor) and 'controller' (the CrawlController).
145      *
146      * @return the new Interpreter instance
147      */

148     protected Interpreter newInterpreter() {
149         Interpreter interpreter = new Interpreter();
150         try {
151             interpreter.set("self", this);
152             interpreter.set("controller", getController());
153             
154             String JavaDoc filePath = (String JavaDoc) getUncheckedAttribute(null, ATTR_SCRIPT_FILE);
155             if(filePath.length()>0) {
156                 try {
157                     File JavaDoc file = getSettingsHandler().getPathRelativeToWorkingDirectory(filePath);
158                     interpreter.source(file.getPath());
159                 } catch (IOException JavaDoc e) {
160                     logger.log(Level.SEVERE,"unable to read script file",e);
161                 }
162             }
163         } catch (EvalError e) {
164             // TODO Auto-generated catch block
165
e.printStackTrace();
166         }
167         
168         return interpreter;
169     }
170     
171     
172     /**
173      * Setup (or reset) Intepreter variables, as appropraite based on
174      * thread-isolation setting.
175      */

176     public void kickUpdate() {
177         // TODO make it so running state (tallies, etc.) isn't lost on changes
178
// unless unavoidable
179
if((Boolean JavaDoc)getUncheckedAttribute(null,ATTR_ISOLATE_THREADS)) {
180             sharedInterpreter = null;
181             threadInterpreter = new ThreadLocal JavaDoc<Interpreter>();
182         } else {
183             sharedInterpreter = newInterpreter();
184             threadInterpreter = null;
185         }
186     }
187 }
188
Popular Tags