KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > processor > BeanShellProcessor


1 /* BeanShellProcessor
2  *
3  * Created on Aug 4, 2006
4  *
5  * Copyright (C) 2006 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.processor;
24
25 import java.io.File JavaDoc;
26 import java.io.IOException JavaDoc;
27 import java.util.Collections JavaDoc;
28 import java.util.HashMap JavaDoc;
29 import java.util.Map JavaDoc;
30 import java.util.logging.Level JavaDoc;
31 import java.util.logging.Logger JavaDoc;
32
33 import org.archive.crawler.datamodel.CrawlURI;
34 import org.archive.crawler.datamodel.FetchStatusCodes;
35 import org.archive.crawler.framework.Processor;
36 import org.archive.crawler.settings.SimpleType;
37 import org.archive.crawler.settings.Type;
38
39 import bsh.EvalError;
40 import bsh.Interpreter;
41
42 /**
43  * A processor which runs a BeanShell script on the CrawlURI.
44  *
45  * Script source may be provided via a file
46  * local to the crawler.
47  * Script source should define
48  * a method with one argument, 'run(curi)'. Each processed CrawlURI is
49  * passed to this script method.
50  *
51  * Other variables available to the script include 'self' (this
52  * BeanShellProcessor instance) and 'controller' (the crawl's
53  * CrawlController instance).
54  *
55  * @author gojomo
56  * @version $Date: 2007/01/13 01:31:25 $, $Revision: 1.4.2.1 $
57  */

58 public class BeanShellProcessor extends Processor implements FetchStatusCodes {
59
60     private static final long serialVersionUID = 6926589944337050754L;
61
62     private static final Logger JavaDoc logger =
63         Logger.getLogger(BeanShellProcessor.class.getName());
64
65     /** setting for script file */
66     public final static String JavaDoc ATTR_SCRIPT_FILE = "script-file";
67
68     /** whether each thread should have its own script runner (true), or
69      * they should share a single script runner with synchronized access */

70     public final static String JavaDoc ATTR_ISOLATE_THREADS = "isolate-threads";
71
72     protected ThreadLocal JavaDoc<Interpreter> threadInterpreter;
73     protected Interpreter sharedInterpreter;
74     public Map JavaDoc<Object JavaDoc,Object JavaDoc> sharedMap = Collections.synchronizedMap(
75             new HashMap JavaDoc<Object JavaDoc,Object JavaDoc>());
76     
77     /**
78      * Constructor.
79      * @param name Name of this processor.
80      */

81     public BeanShellProcessor(String JavaDoc name) {
82         super(name, "BeanShellProcessor. Runs the BeanShell script source " +
83                 "(supplied directly or via a file path) against the " +
84                 "current URI. Source should define a script method " +
85                 "'process(curi)' which will be passed the current CrawlURI. " +
86                 "The script may also access this BeanShellProcessor via" +
87                 "the 'self' variable and the CrawlController via the " +
88                 "'controller' variable.");
89         Type t = addElementToDefinition(new SimpleType(ATTR_SCRIPT_FILE,
90                 "BeanShell script file", ""));
91         t.setOverrideable(false);
92         t = addElementToDefinition(new SimpleType(ATTR_ISOLATE_THREADS,
93                 "Whether each ToeThread should get its own independent " +
94                 "script context, or they should share synchronized access " +
95                 "to one context. Default is true, meaning each threads " +
96                 "gets its own isolated context.", true));
97         t.setOverrideable(false);
98
99     }
100
101     protected synchronized void innerProcess(CrawlURI curi) {
102         // depending on previous configuration, interpreter may
103
// be local to this thread or shared
104
Interpreter interpreter = getInterpreter();
105         synchronized(interpreter) {
106             // synchronization is harmless for local thread interpreter,
107
// necessary for shared interpreter
108
try {
109                 interpreter.set("curi",curi);
110                 interpreter.eval("process(curi)");
111             } catch (EvalError e) {
112                 // TODO Auto-generated catch block
113
e.printStackTrace();
114             }
115         }
116     }
117
118     /**
119      * Get the proper Interpreter instance -- either shared or local
120      * to this thread.
121      * @return Interpreter to use
122      */

123     protected Interpreter getInterpreter() {
124         if(sharedInterpreter!=null) {
125             return sharedInterpreter;
126         }
127         Interpreter interpreter = threadInterpreter.get();
128         if(interpreter==null) {
129             interpreter = newInterpreter();
130             threadInterpreter.set(interpreter);
131         }
132         return interpreter;
133     }
134
135     /**
136      * Create a new Interpreter instance, preloaded with any supplied
137      * source code or source file and the variables 'self' (this
138      * BeanShellProcessor) and 'controller' (the CrawlController).
139      *
140      * @return the new Interpreter instance
141      */

142     protected Interpreter newInterpreter() {
143         Interpreter interpreter = new Interpreter();
144         try {
145             interpreter.set("self", this);
146             interpreter.set("controller", getController());
147             
148             String JavaDoc filePath = (String JavaDoc) getUncheckedAttribute(null, ATTR_SCRIPT_FILE);
149             if(filePath.length()>0) {
150                 try {
151                     File JavaDoc file = getSettingsHandler().getPathRelativeToWorkingDirectory(filePath);
152                     interpreter.source(file.getPath());
153                 } catch (IOException JavaDoc e) {
154                     logger.log(Level.SEVERE,"unable to read script file",e);
155                 }
156             }
157         } catch (EvalError e) {
158             // TODO Auto-generated catch block
159
e.printStackTrace();
160         }
161         
162         return interpreter;
163     }
164
165     protected void initialTasks() {
166         super.initialTasks();
167         kickUpdate();
168     }
169
170     /**
171      * Setup (or reset) Intepreter variables, as appropraite based on
172      * thread-isolation setting.
173      */

174     public void kickUpdate() {
175         // TODO make it so running state (tallies, etc.) isn't lost on changes
176
// unless unavoidable
177
if((Boolean JavaDoc)getUncheckedAttribute(null,ATTR_ISOLATE_THREADS)) {
178             sharedInterpreter = null;
179             threadInterpreter = new ThreadLocal JavaDoc<Interpreter>();
180         } else {
181             sharedInterpreter = newInterpreter();
182             threadInterpreter = null;
183         }
184     }
185 }
186
Popular Tags