1 package net.matuschek.jobo; 2 3 6 7 import java.io.File ; 8 import java.io.FileWriter ; 9 import java.io.Writer ; 10 11 import net.matuschek.http.DownloadRuleSet; 12 import net.matuschek.http.HttpDocToFile; 13 import net.matuschek.http.HttpToolCallback; 14 import net.matuschek.spider.RegExpURLCheck; 15 import net.matuschek.spider.WebRobot; 16 import net.matuschek.spider.WebRobotCallback; 17 import net.matuschek.spider.docfilter.FilterChain; 18 import net.matuschek.spider.docfilter.LinkLocalizer; 19 20 import org.apache.log4j.Category; 21 22 import org.exolab.castor.mapping.Mapping; 23 import org.exolab.castor.xml.Marshaller; 24 import org.exolab.castor.xml.Unmarshaller; 25 26 import org.xml.sax.InputSource ; 27 28 35 public class JoBoBase { 36 37 38 private static Category log = Category.getInstance(""); 39 40 41 private static String mappingfile="mapping.xml"; 42 43 44 private static String xmlconfig="jobo.xml"; 45 46 47 49 private String storageDirectory = "/tmp"; 50 private WebRobot robot = null; 51 private RegExpURLCheck urlcheck = null; 52 private DownloadRuleSet downloadrules = null; 53 private HttpDocToFile docstore = null; 54 55 56 private LinkLocalizer linkLocalizer = null; 57 58 59 private FilterChain filters = null; 60 61 62 66 public JoBoBase() 67 throws ClassNotFoundException 68 { 69 log = Category.getInstance(this.getClass()); 70 docstore = new HttpDocToFile(storageDirectory); 71 initializeFilters(); 72 robot = new WebRobot(); 73 robot.setFilters(filters); 74 } 75 76 77 80 public void initializeFilters() { 81 filters = new FilterChain(); 82 linkLocalizer = new LinkLocalizer(); 83 filters.add(linkLocalizer); 84 } 85 86 87 90 public void saveConfig(String filename) { 91 File f1 = new File (mappingfile); 92 93 if (f1.exists()) { 94 Mapping mapping = new Mapping(); 95 try { 96 mapping.loadMapping(mappingfile); 97 Writer writer = new FileWriter (filename); 98 Marshaller marshaller = new Marshaller(writer); 99 marshaller.setMapping(mapping); 100 marshaller.marshal(this); 101 writer.close(); 102 103 log.info("written to XML"); 104 } catch (Exception e) { 105 log.error(e.getMessage()); 106 e.printStackTrace(); 107 } 108 } else { 109 log.error("mapping and/or configfile not found"); 110 } 111 } 112 113 114 115 public void registerHttpToolCallback(HttpToolCallback cb) { 116 robot.setHttpToolCallback(cb); 117 } 118 119 public void registerWebRobotCallback(WebRobotCallback cb) { 120 robot.setWebRobotCallback(cb); 121 } 122 123 126 public void configureRobot() { 127 robot.setURLCheck(urlcheck); 128 robot.setDownloadRuleSet(downloadrules); 129 robot.setDocManager(docstore); 130 robot.setFilters(filters); 131 } 132 133 134 135 139 public RegExpURLCheck getURLCheck () { 140 return urlcheck; 141 } 142 143 147 public void setURLCheck(RegExpURLCheck urlcheck ) { 148 this.urlcheck = urlcheck; 149 } 150 151 155 public WebRobot getRobot () { 156 return robot; 157 } 158 159 166 public void setRobot(WebRobot robot) { 167 this.robot = robot; 168 robot.setFilters(filters); 169 } 170 171 172 178 public void setLocalizeLinks(boolean localize) 179 { 180 if (localize) { 181 linkLocalizer.enable(); 182 } else { 183 linkLocalizer.disable(); 184 } 185 } 186 187 188 191 public boolean getLocalizeLinks() { 192 return linkLocalizer.isEnabled(); 193 } 194 195 196 200 public DownloadRuleSet getDownloadRuleSet () { 201 return downloadrules; 202 } 203 204 208 public void setDownloadRuleSet(DownloadRuleSet downloadRuleSet) { 209 this.downloadrules = downloadRuleSet; 210 } 211 212 216 public String getStorageDirectory () { 217 return storageDirectory; 218 } 219 220 224 public void setStorageDirectory(String storageDirectory ) { 225 this.storageDirectory = storageDirectory; 226 docstore.setBaseDir(storageDirectory); 227 } 228 229 230 237 public void setStoreCGI(boolean storeCGI) { 238 this.docstore.setStoreCGI(storeCGI); 239 } 240 241 242 248 public boolean getStoreCGI() { 249 return this.docstore.getStoreCGI(); 250 } 251 252 253 254 261 public static JoBoBase createFromXML() 262 throws ClassNotFoundException 263 { 264 return createFromXML("."); 265 } 266 267 268 276 public static JoBoBase createFromXML(String configDirectory) 277 throws ClassNotFoundException 278 { 279 JoBoBase baseobj = null; 280 281 xmlconfig="jobo.xml"; 282 283 File f1 = new File (configDirectory+File.separatorChar+mappingfile); 284 File f2 = new File (configDirectory+File.separatorChar+xmlconfig); 285 286 if (f1.exists() && f2.exists()) { 287 Mapping mapping = new Mapping(); 288 try { 289 mapping.loadMapping(f1.getPath()); 290 Unmarshaller unmar = new Unmarshaller(mapping); 291 unmar.setDebug(true); 292 baseobj=(JoBoBase)unmar.unmarshal(new InputSource (f2.getPath())); 293 294 log.info("configured from XML"); 295 296 } catch (Exception e) { 297 log.error(e.getMessage()); 298 e.printStackTrace(); 299 } 300 } else { 301 log.error("mapping and/or configfile not found"); 302 } 303 304 if (baseobj==null) { 305 baseobj = new JoBoBase(); 306 } 307 308 baseobj.configureRobot(); 309 310 return baseobj; 311 } 312 313 314 315 316 } | Popular Tags |