| 1 package net.javacoding.jspider.core.task.work; 2 3 4 import net.javacoding.jspider.api.model.*; 5 import net.javacoding.jspider.api.event.resource.*; 6 import net.javacoding.jspider.core.SpiderContext; 7 import net.javacoding.jspider.core.model.EMailAddressInternal; 8 import net.javacoding.jspider.core.logging.LogFactory; 9 import net.javacoding.jspider.core.event.CoreEvent; 10 import net.javacoding.jspider.core.event.impl.*; 11 import net.javacoding.jspider.core.task.WorkerTask; 12 import net.javacoding.jspider.core.util.html.URLFinder; 13 import net.javacoding.jspider.core.util.html.URLFinderCallback; 14 import net.javacoding.jspider.core.util.EMailAddressUtil; 15 16 import java.io.*; 17 import java.net.URL ; 18 19 20 26 public class InterpreteHTMLTask extends BaseWorkerTaskImpl implements URLFinderCallback { 27 28 protected FetchedResource spideredResource; 29 protected URL url; 30 31 protected URL contextURL; 32 33 public InterpreteHTMLTask(SpiderContext context, FetchedResource resource) { 34 super(context, WorkerTask.WORKERTASK_THINKERTASK); 35 this.spideredResource = resource; 36 url = spideredResource.getURL(); 37 contextURL = url; 38 } 39 40 public void prepare() { 41 } 42 43 public void execute() { 44 CoreEvent event = null; 45 try { 46 InputStream inputStream = spideredResource.getInputStream(); 47 BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); 48 String line = br.readLine(); 49 while (line != null) { 50 URLFinder.findURLs(this, line); 51 line = br.readLine(); 52 } 53 event = new ResourceParsedOkEvent(context, url); 54 } catch (IOException e) { 55 LogFactory.getLog(InterpreteHTMLTask.class).error("i/o exception during parse", e); 56 event = new ResourceParsedErrorEvent(context, url, e); 57 } catch (Exception e) { 58 LogFactory.getLog(InterpreteHTMLTask.class).error("exception during parse", e); 59 event = new ResourceParsedErrorEvent(context, url, e); 60 } finally { 61 notifyEvent(url, event ); 62 } 63 } 64 65 public void urlFound(URL foundURL) { 66 if (EMailAddressUtil.isEMailAddress(foundURL)) { 67 String emailAddress = EMailAddressUtil.getEMailAddress(foundURL); 68 EMailAddress address = context.getStorage().getEMailAddressDAO().find(emailAddress); 69 if (address == null) { 70 address = new EMailAddressInternal(emailAddress); 71 context.getEventDispatcher().dispatch(new EMailAddressDiscoveredEvent(this.spideredResource, emailAddress)); 72 } 73 context.getStorage().getEMailAddressDAO().register(spideredResource, address); 74 context.getEventDispatcher().dispatch(new EMailAddressReferenceDiscoveredEvent(this.spideredResource, address)); 75 } else { 76 notifyEvent(url, new URLFoundEvent(context, url, foundURL)); 77 } 78 } 79 80 public void malformedUrlFound(String malformedURL) { 81 context.getEventDispatcher().dispatch(new MalformedURLFoundEvent(context.getStorage().getResourceDAO().getResource(url), malformedURL)); 82 } 83 84 public URL getContextURL() { 85 return contextURL; 86 } 87 88 public void setContextURL(URL url) { 89 this.contextURL = url; 90 } 91 92 public void malformedContextURLFound(String malformedURL) { 93 context.getEventDispatcher().dispatch(new MalformedBaseURLFoundEvent(spideredResource, malformedURL)); 94 } 95 96 } 97 | Popular Tags |