KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > matuschek > jobo > JoBoBase


1 package net.matuschek.jobo;
2
3 /************************************************
4     Copyright (c) 2001/2002 by Daniel Matuschek
5 *************************************************/

6
7 import java.io.File JavaDoc;
8 import java.io.FileWriter JavaDoc;
9 import java.io.Writer JavaDoc;
10
11 import net.matuschek.http.DownloadRuleSet;
12 import net.matuschek.http.HttpDocToFile;
13 import net.matuschek.http.HttpToolCallback;
14 import net.matuschek.spider.RegExpURLCheck;
15 import net.matuschek.spider.WebRobot;
16 import net.matuschek.spider.WebRobotCallback;
17 import net.matuschek.spider.docfilter.FilterChain;
18 import net.matuschek.spider.docfilter.LinkLocalizer;
19
20 import org.apache.log4j.Category;
21
22 import org.exolab.castor.mapping.Mapping;
23 import org.exolab.castor.xml.Marshaller;
24 import org.exolab.castor.xml.Unmarshaller;
25
26 import org.xml.sax.InputSource JavaDoc;
27
28 /**
29  * This is a simple class that contains all needed features for JoBo
30  * (the web robot, the download rules, RegExpUrlCheck ...)
31  *
32  * @author Daniel Matuschek
33  * @version $Revision: 1.21 $
34  */

35 public class JoBoBase {
36
37   /** Log4J logging */
38   private static Category log = Category.getInstance("");
39
40   /** The file used for XML->Java mapping */
41   private static String JavaDoc mappingfile="mapping.xml";
42
43   /** The jobo configuration in XML */
44   private static String JavaDoc xmlconfig="jobo.xml";
45
46   /** Start URL for the robot */
47 // private static String startUrl=null;
48

49   private String JavaDoc storageDirectory = "/tmp";
50   private WebRobot robot = null;
51   private RegExpURLCheck urlcheck = null;
52   private DownloadRuleSet downloadrules = null;
53   private HttpDocToFile docstore = null;
54
55   /** Filter to localize included links */
56   private LinkLocalizer linkLocalizer = null;
57
58   /** FilterChains with all filters */
59   private FilterChain filters = null;
60       
61
62   /**
63    * @exception ClassNotFoundException if the Robot could not be instantiated
64    * for some reason
65    */

66   public JoBoBase()
67     throws ClassNotFoundException JavaDoc
68   {
69     log = Category.getInstance(this.getClass());
70     docstore = new HttpDocToFile(storageDirectory);
71     initializeFilters();
72     robot = new WebRobot();
73     robot.setFilters(filters);
74   }
75
76
77   /**
78    * Set the default filter chain
79    */

80   public void initializeFilters() {
81     filters = new FilterChain();
82     linkLocalizer = new LinkLocalizer();
83     filters.add(linkLocalizer);
84   }
85
86
87   /**
88    * write the settings to an XML file
89    */

90   public void saveConfig(String JavaDoc filename) {
91     File JavaDoc f1 = new File JavaDoc(mappingfile);
92
93     if (f1.exists()) {
94       Mapping mapping = new Mapping();
95       try {
96     mapping.loadMapping(mappingfile);
97     Writer JavaDoc writer = new FileWriter JavaDoc(filename);
98     Marshaller marshaller = new Marshaller(writer);
99     marshaller.setMapping(mapping);
100     marshaller.marshal(this);
101     writer.close();
102     
103     log.info("written to XML");
104       } catch (Exception JavaDoc e) {
105     log.error(e.getMessage());
106     e.printStackTrace();
107       }
108     } else {
109       log.error("mapping and/or configfile not found");
110     }
111   }
112   
113
114
115   public void registerHttpToolCallback(HttpToolCallback cb) {
116     robot.setHttpToolCallback(cb);
117   }
118
119   public void registerWebRobotCallback(WebRobotCallback cb) {
120     robot.setWebRobotCallback(cb);
121   }
122
123   /**
124    * registers the regexpurlcheck and the download rules with the robot
125    */

126   public void configureRobot() {
127     robot.setURLCheck(urlcheck);
128     robot.setDownloadRuleSet(downloadrules);
129     robot.setDocManager(docstore);
130     robot.setFilters(filters);
131   }
132
133
134   
135   /**
136    * Get the value of urlcheck.
137    * @return Value of urlcheck.
138    */

139   public RegExpURLCheck getURLCheck () {
140     return urlcheck;
141   }
142   
143   /**
144    * Set the value of urlcheck.
145    * @param v Value to assign to urlcheck.
146    */

147   public void setURLCheck(RegExpURLCheck urlcheck ) {
148     this.urlcheck = urlcheck;
149   }
150   
151   /**
152    * Get the value of robot.
153    * @return Value of robot.
154    */

155   public WebRobot getRobot () {
156     return robot;
157   }
158   
159   /**
160    * Set the value of robot. The new Robot will use the
161    * filter that are defined in JoBoBase, even if he had
162    * its own FilterChain before.
163    *
164    * @param robot WebRobot object to use
165    */

166   public void setRobot(WebRobot robot) {
167     this.robot = robot;
168     robot.setFilters(filters);
169   }
170
171
172   /**
173    * Localize links ?
174    *
175    * @param localize if this is true, JoBo will trz to replace
176    * absolute links by relative
177    */

178   public void setLocalizeLinks(boolean localize)
179   {
180     if (localize) {
181       linkLocalizer.enable();
182     } else {
183       linkLocalizer.disable();
184     }
185   }
186
187
188   /**
189    * is link localization enabled ?
190    */

191   public boolean getLocalizeLinks() {
192     return linkLocalizer.isEnabled();
193   }
194
195
196   /**
197    * Get the value of downloadRules.
198    * @return Value of downloadRules.
199    */

200   public DownloadRuleSet getDownloadRuleSet () {
201     return downloadrules;
202   }
203   
204   /**
205    * Set the value of downloadRules.
206    * @param v Value to assign to downloadRules.
207    */

208   public void setDownloadRuleSet(DownloadRuleSet downloadRuleSet) {
209     this.downloadrules = downloadRuleSet;
210   }
211   
212   /**
213    * Get the value of storageDirectory.
214    * @return Value of storageDirectory.
215    */

216   public String JavaDoc getStorageDirectory () {
217     return storageDirectory;
218   }
219   
220   /**
221    * Set the value of storageDirectory.
222    * @param v Value to assign to storageDirectory.
223    */

224   public void setStorageDirectory(String JavaDoc storageDirectory ) {
225     this.storageDirectory = storageDirectory;
226     docstore.setBaseDir(storageDirectory);
227   }
228
229
230   /**
231    * Enable/disable storing of dynamic documents (with an "?"
232    * somewhere in the URL
233    *
234    * @param v true: enable storing of <b>all</b> documents,
235    * false: store only documents with an URL without "?"
236    */

237   public void setStoreCGI(boolean storeCGI) {
238     this.docstore.setStoreCGI(storeCGI);
239   }
240
241
242   /**
243    * Get the status of storeCGI
244    *
245    * @return the current status of storeCGI
246    * @see #setStoreCGI for more information
247    */

248   public boolean getStoreCGI() {
249     return this.docstore.getStoreCGI();
250   }
251
252
253
254   /**
255    * Unmarshall the object from an XML file (jobo.xml) in the current
256    * directory
257    *
258    * @exception ClassNotFoundException if the Robot could not be instantiated
259    * for some reason
260    */

261   public static JoBoBase createFromXML()
262     throws ClassNotFoundException JavaDoc
263   {
264     return createFromXML(".");
265   }
266
267
268   /**
269    * Unmarshall the object from an XML file
270    *
271    * @param configDirectory name of the directory where jobo.xml and
272    * mapping.xml should be read from.
273    * @exception ClassNotFoundException if the Robot could not be instantiated
274    * for some reason
275    */

276   public static JoBoBase createFromXML(String JavaDoc configDirectory)
277     throws ClassNotFoundException JavaDoc
278   {
279     JoBoBase baseobj = null;
280     
281     xmlconfig="jobo.xml";
282
283     File JavaDoc f1 = new File JavaDoc(configDirectory+File.separatorChar+mappingfile);
284     File JavaDoc f2 = new File JavaDoc(configDirectory+File.separatorChar+xmlconfig);
285
286     if (f1.exists() && f2.exists()) {
287       Mapping mapping = new Mapping();
288       try {
289     mapping.loadMapping(f1.getPath());
290     Unmarshaller unmar = new Unmarshaller(mapping);
291     unmar.setDebug(true);
292     baseobj=(JoBoBase)unmar.unmarshal(new InputSource JavaDoc(f2.getPath()));
293     
294     log.info("configured from XML");
295         
296       } catch (Exception JavaDoc e) {
297     log.error(e.getMessage());
298     e.printStackTrace();
299       }
300     } else {
301       log.error("mapping and/or configfile not found");
302     }
303       
304     if (baseobj==null) {
305       baseobj = new JoBoBase();
306     }
307
308     baseobj.configureRobot();
309
310     return baseobj;
311   }
312
313
314
315
316 } // JoBoBase
317
Popular Tags