KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > javacoding > jspider > core > task > work > SpiderHttpURLTask


1 package net.javacoding.jspider.core.task.work;
2
3
4 import net.javacoding.jspider.api.model.HTTPHeader;
5 import net.javacoding.jspider.api.model.Site;
6 import net.javacoding.jspider.core.SpiderContext;
7 import net.javacoding.jspider.core.logging.LogFactory;
8 import net.javacoding.jspider.core.event.CoreEvent;
9 import net.javacoding.jspider.core.event.impl.*;
10 import net.javacoding.jspider.core.task.WorkerTask;
11 import net.javacoding.jspider.core.util.http.HTTPHeaderUtil;
12 import net.javacoding.jspider.core.util.URLUtil;
13
14 import java.io.*;
15 import java.net.*;
16
17
18 /**
19  *
20  * $Id: SpiderHttpURLTask.java,v 1.19 2003/04/10 16:19:14 vanrogu Exp $
21  *
22  * @author Günther Van Roey
23  */

24 public class SpiderHttpURLTask extends BaseWorkerTaskImpl {
25
26     protected URL url;
27     protected Site site;
28
29
30     public SpiderHttpURLTask(SpiderContext context, URL url, Site site) {
31         super(context, WorkerTask.WORKERTASK_SPIDERTASK);
32         this.url = url;
33         this.site = site;
34     }
35
36     public void prepare() {
37         context.throttle(site);
38     }
39
40     public void execute() {
41
42         CoreEvent event = null;
43         URLConnection connection = null;
44
45         InputStream inputStream = null;
46
47         int httpStatus = 0;
48         HTTPHeader[] headers = null;
49
50         try {
51
52             connection = url.openConnection();
53
54             if (connection instanceof HttpURLConnection) {
55                 ((HttpURLConnection) connection).setInstanceFollowRedirects(false);
56             }
57
58             connection.setRequestProperty("User-agent", site.getUserAgent());
59             context.preHandle(connection, site);
60
61             long start = System.currentTimeMillis();
62             connection.connect();
63
64             if (connection instanceof HttpURLConnection) {
65                 httpStatus = ((HttpURLConnection) connection).getResponseCode();
66                 switch (httpStatus) {
67                     case HttpURLConnection.HTTP_MOVED_PERM:
68                     case HttpURLConnection.HTTP_MOVED_TEMP:
69                         String JavaDoc redirectURL = connection.getHeaderField("location");
70                         notifyEvent(url, new URLFoundEvent(context, url, URLUtil.normalize(new URL(redirectURL))));
71                         break;
72                     default:
73                         break;
74                 }
75             }
76             inputStream = new BufferedInputStream(connection.getInputStream());
77
78             ByteArrayOutputStream os = new ByteArrayOutputStream();
79             InputStream is = new BufferedInputStream(inputStream);
80             //int size = connection.getContentLength();
81
int size = 0;
82             try {
83                     int i = is.read();
84                     while (i != -1) {
85                         size++;
86                         os.write(i);
87                         i = is.read();
88                     }
89             } catch (IOException e) {
90                 LogFactory.getLog(SpiderHttpURLTask.class).error("i/o exception during fetch",e);
91             }
92
93             String JavaDoc contentType = connection.getContentType();
94             int timeMs = (int) (System.currentTimeMillis() - start);
95
96             headers = HTTPHeaderUtil.getHeaders(connection);
97
98             if (httpStatus >= 200 && httpStatus < 303) {
99                 event = new URLSpideredOkEvent(context, url, httpStatus, connection, contentType, timeMs, size, os.toByteArray(), headers);
100             } else {
101                 event = new URLSpideredErrorEvent(context, url, httpStatus, connection, headers, null);
102             }
103
104             context.postHandle(connection, site);
105
106         } catch (FileNotFoundException e) {
107             headers = HTTPHeaderUtil.getHeaders(connection);
108             event = new URLSpideredErrorEvent(context, url, 404, connection, headers, e);
109         } catch (Exception JavaDoc e) {
110             LogFactory.getLog(this.getClass()).error("exception during spidering", e);
111             event = new URLSpideredErrorEvent(context, url, httpStatus, connection, headers, e);
112         } finally {
113             notifyEvent(url, event);
114             if (inputStream != null) {
115                 try {
116                     inputStream.close();
117                 } catch (IOException e) {
118                     LogFactory.getLog(SpiderHttpURLTask.class).error("i/o exception closing inputstream",e);
119                 }
120             }
121         }
122     }
123
124 }
125
Popular Tags