KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > javacoding > jspider > core > task > work > InterpreteHTMLTask


1 package net.javacoding.jspider.core.task.work;
2
3
4 import net.javacoding.jspider.api.model.*;
5 import net.javacoding.jspider.api.event.resource.*;
6 import net.javacoding.jspider.core.SpiderContext;
7 import net.javacoding.jspider.core.model.EMailAddressInternal;
8 import net.javacoding.jspider.core.logging.LogFactory;
9 import net.javacoding.jspider.core.event.CoreEvent;
10 import net.javacoding.jspider.core.event.impl.*;
11 import net.javacoding.jspider.core.task.WorkerTask;
12 import net.javacoding.jspider.core.util.html.URLFinder;
13 import net.javacoding.jspider.core.util.html.URLFinderCallback;
14 import net.javacoding.jspider.core.util.EMailAddressUtil;
15
16 import java.io.*;
17 import java.net.URL JavaDoc;
18
19
20 /**
21  *
22  * $Id: InterpreteHTMLTask.java,v 1.15 2003/04/10 16:19:14 vanrogu Exp $
23  *
24  * @author Günther Van Roey
25  */

26 public class InterpreteHTMLTask extends BaseWorkerTaskImpl implements URLFinderCallback {
27
28     protected FetchedResource spideredResource;
29     protected URL JavaDoc url;
30
31     protected URL JavaDoc contextURL;
32
33     public InterpreteHTMLTask(SpiderContext context, FetchedResource resource) {
34         super(context, WorkerTask.WORKERTASK_THINKERTASK);
35         this.spideredResource = resource;
36         url = spideredResource.getURL();
37         contextURL = url;
38     }
39
40     public void prepare() {
41     }
42
43     public void execute() {
44         CoreEvent event = null;
45         try {
46             InputStream inputStream = spideredResource.getInputStream();
47             BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
48             String JavaDoc line = br.readLine();
49             while (line != null) {
50                 URLFinder.findURLs(this, line);
51                 line = br.readLine();
52             }
53             event = new ResourceParsedOkEvent(context, url);
54         } catch (IOException e) {
55             LogFactory.getLog(InterpreteHTMLTask.class).error("i/o exception during parse", e);
56             event = new ResourceParsedErrorEvent(context, url, e);
57         } catch (Exception JavaDoc e) {
58             LogFactory.getLog(InterpreteHTMLTask.class).error("exception during parse", e);
59             event = new ResourceParsedErrorEvent(context, url, e);
60         } finally {
61             notifyEvent(url, event );
62         }
63     }
64
65     public void urlFound(URL JavaDoc foundURL) {
66         if (EMailAddressUtil.isEMailAddress(foundURL)) {
67             String JavaDoc emailAddress = EMailAddressUtil.getEMailAddress(foundURL);
68             EMailAddress address = context.getStorage().getEMailAddressDAO().find(emailAddress);
69             if (address == null) {
70                 address = new EMailAddressInternal(emailAddress);
71                 context.getEventDispatcher().dispatch(new EMailAddressDiscoveredEvent(this.spideredResource, emailAddress));
72             }
73             context.getStorage().getEMailAddressDAO().register(spideredResource, address);
74             context.getEventDispatcher().dispatch(new EMailAddressReferenceDiscoveredEvent(this.spideredResource, address));
75         } else {
76             notifyEvent(url, new URLFoundEvent(context, url, foundURL));
77         }
78     }
79
80     public void malformedUrlFound(String JavaDoc malformedURL) {
81         context.getEventDispatcher().dispatch(new MalformedURLFoundEvent(context.getStorage().getResourceDAO().getResource(url), malformedURL));
82     }
83
84     public URL JavaDoc getContextURL() {
85         return contextURL;
86     }
87
88     public void setContextURL(URL JavaDoc url) {
89         this.contextURL = url;
90     }
91
92     public void malformedContextURLFound(String JavaDoc malformedURL) {
93         context.getEventDispatcher().dispatch(new MalformedBaseURLFoundEvent(spideredResource, malformedURL));
94     }
95
96 }
97
Popular Tags