KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > javacoding > jspider > core > model > ResourceInternal


1 package net.javacoding.jspider.core.model;
2
3
4 import net.javacoding.jspider.api.model.*;
5 import net.javacoding.jspider.core.storage.exception.InvalidStateForActionException;
6 import net.javacoding.jspider.core.storage.exception.InvalidStateTransitionException;
7 import net.javacoding.jspider.core.storage.spi.StorageSPI;
8 import net.javacoding.jspider.core.logging.LogFactory;
9 import net.javacoding.jspider.core.util.URLUtil;
10
11 import java.io.InputStream JavaDoc;
12 import java.net.URL JavaDoc;
13 import java.util.*;
14
15
16 /**
17  *
18  * $Id: ResourceInternal.java,v 1.13 2003/04/11 16:37:04 vanrogu Exp $
19  *
20  * @author Günther Van Roey
21  */

22 public class ResourceInternal implements ParsedResource, ParseErrorResource, ParseIgnoredResource, ForbiddenResource, FetchIgnoredResource, FetchErrorResource {
23
24     protected StorageSPI storage;
25
26     protected int site;
27
28     protected URL JavaDoc url;
29     protected Date discoveryTime;
30     protected FolderInternal folder;
31     protected int state;
32     protected int id;
33
34     protected int httpStatus;
35     protected int size;
36     protected int timeMs;
37     protected String JavaDoc mimeType;
38     protected Date fetchTime;
39     protected HTTPHeader[] headers;
40
41     protected Decision spiderDecision;
42     protected Decision parseDecision;
43
44
45     public ResourceInternal(StorageSPI storage, int id, int siteId, URL JavaDoc url, Date discoveryTime, FolderInternal folder) {
46         this.site = siteId;
47         this.storage = storage;
48         this.id = id;
49         this.url = url;
50         this.discoveryTime = discoveryTime;
51         this.folder = folder;
52
53         this.state = Resource.STATE_DISCOVERED;
54     }
55
56     public ResourceInternal(StorageSPI storage, int id, Site site, URL JavaDoc url, Date discoveryTime, FolderInternal folder) {
57         this(storage, id, ((SiteInternal) site).getId(), url, discoveryTime, folder);
58     }
59
60     public ResourceInternal(StorageSPI storage, Site site, URL JavaDoc url, Date discoveryTime, FolderInternal folder) {
61         this(storage, 0, site, url, discoveryTime, folder);
62     }
63
64     public void setFetched(int httpStatus, int size, int timeMs, String JavaDoc mimeType, Date fetchTime, HTTPHeader[] headers) {
65         if (state != Resource.STATE_DISCOVERED) {
66             LogFactory.getLog(Resource.class).error("error in state transition for resource " + url + ":\n" + this);
67             throw new InvalidStateTransitionException("cannot set resource fetched - it's not in the discovered state - was " + state);
68         }
69         this.httpStatus = httpStatus;
70         this.size = size;
71         this.timeMs = timeMs;
72         this.mimeType = mimeType;
73         this.fetchTime = fetchTime;
74         this.headers = headers;
75         state = Resource.STATE_FETCHED;
76     }
77
78     public void setFetchError(int httpStatus, HTTPHeader[] headers) {
79         if (state != Resource.STATE_DISCOVERED && state != Resource.STATE_FETCH_ERROR) {
80             LogFactory.getLog(Resource.class).error("error in state transition for resource " + url + ":\n" + this);
81             throw new InvalidStateTransitionException("cannot set resource fetch error - it's not in the discovered state - was" + state);
82         }
83         this.httpStatus = httpStatus;
84         this.headers = headers;
85         state = Resource.STATE_FETCH_ERROR;
86     }
87
88     public void setParseError() {
89         if (state != Resource.STATE_FETCHED && state != Resource.STATE_PARSE_ERROR) {
90             LogFactory.getLog(Resource.class).error("error in state transition for resource " + url + ":\n" + this);
91             throw new InvalidStateTransitionException("cannot set resource parse error - it's not in the fetched state - was " + state);
92         }
93         state = Resource.STATE_PARSE_ERROR;
94     }
95
96     public void setParsed() {
97         if (state != Resource.STATE_FETCHED && state != Resource.STATE_PARSED) {
98             LogFactory.getLog(Resource.class).error("error in state transition for resource " + url + ":\n" + this);
99             throw new InvalidStateTransitionException("cannot set resource parsed - it's not in the fetched state - was " + state);
100         }
101         state = Resource.STATE_PARSED;
102     }
103
104     public void setFetchIgnored() {
105         if (state != Resource.STATE_DISCOVERED && state != Resource.STATE_FETCH_IGNORED) {
106             LogFactory.getLog(Resource.class).error("error in state transition for resource " + url + ":\n" + this);
107             throw new InvalidStateTransitionException("cannot set resource fetch_ignored - it's not in the discovered state - was " + state);
108         }
109         state = Resource.STATE_FETCH_IGNORED;
110     }
111
112     public void setParseIgnored() {
113         if (state != Resource.STATE_FETCHED && state != Resource.STATE_PARSE_IGNORED) {
114             LogFactory.getLog(Resource.class).error("error in state transition for resource " + url + ":\n" + this);
115             throw new InvalidStateTransitionException("cannot set resource parse_ignored - it's not in the fetched state - was " + state);
116         }
117         state = Resource.STATE_PARSE_IGNORED;
118     }
119
120     public void setForbidden() {
121         if (state != Resource.STATE_DISCOVERED && state != Resource.STATE_FETCH_FORBIDDEN) {
122             LogFactory.getLog(Resource.class).error("error in state transition for resource " + url + ":\n" + this);
123             throw new InvalidStateTransitionException("cannot set resource forbidden - it's not in the discovered state - was " + state);
124         }
125         state = Resource.STATE_FETCH_FORBIDDEN;
126     }
127
128     public int getId() {
129         return id;
130     }
131
132     public void setInt(int id) {
133         this.id = id;
134     }
135
136     public int getState() {
137         return state;
138     }
139
140     public String JavaDoc getFileName() {
141         return URLUtil.getFileName(url);
142     }
143
144     public URL JavaDoc getURL() {
145         return url;
146     }
147
148     public Site getSite() {
149         return folder.getSite();
150     }
151
152     public Folder getFolder() {
153         return folder;
154     }
155
156     public String JavaDoc getName() {
157         return url.getFile();
158     }
159
160     public Date getDiscoveryTime() {
161         return discoveryTime;
162     }
163
164     public Resource[] getReferers() {
165         return storage.getResourceDAO().getRefereringResources(this);
166     }
167
168     public Resource[] getReferencedResources() {
169         if (state != Resource.STATE_PARSED) {
170             throw new InvalidStateForActionException("cannot get referenced resources if not parsed");
171         }
172         return storage.getResourceDAO().getReferencedResources(this);
173     }
174
175     public int getHttpStatus() {
176         if (state == Resource.STATE_DISCOVERED) {
177             throw new InvalidStateForActionException("cannot get http status for a resource that's not fetched");
178         }
179         return httpStatus;
180     }
181
182     public int getHttpStatusInternal() {
183         return httpStatus;
184     }
185
186     public void setHttpStatus(int status) {
187         this.httpStatus = status;
188     }
189
190     public HTTPHeader[] getHeaders() {
191         return headers;
192     }
193
194     public int getTimeMs() {
195         if (state < Resource.STATE_FETCHED) {
196             throw new InvalidStateForActionException("cannot get timing for non-fetched resource");
197         }
198         return timeMs;
199     }
200
201     public int getTimeMsInternal() {
202         return timeMs;
203     }
204
205     public int getSize() {
206         if (state < Resource.STATE_FETCHED) {
207             throw new InvalidStateForActionException("cannot get size for non-fetched resource");
208         }
209         return size;
210     }
211
212     public int getSizeInternal() {
213         return size;
214     }
215
216     public String JavaDoc getMime() {
217         if (state < Resource.STATE_FETCHED) {
218             throw new InvalidStateForActionException("cannot get mime type for non-fetched resource");
219         }
220         return mimeType;
221     }
222
223     public String JavaDoc getMimeInternal() {
224         return mimeType;
225     }
226
227     public Date getFetchTime() {
228         if (state < Resource.STATE_FETCHED) {
229             throw new InvalidStateForActionException("cannot get fetch time for non-fetched resource");
230         }
231         return fetchTime;
232     }
233
234     public Date getFetchTimeInternal() {
235         return fetchTime;
236     }
237
238     public String JavaDoc toString() {
239         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
240         sb.append(url.toString());
241         sb.append("\n STATUS : ");
242         sb.append(translateState(state));
243         sb.append("\n ");
244         sb.append("\n SPIDER DECISION : ");
245         Decision sd = getSpiderDecision();
246         if (sd == null) {
247             sb.append("\n ");
248             sb.append("[Not yet taken]");
249         } else {
250             DecisionStep[] steps = sd.getSteps();
251             for (int i = 0; i < steps.length; i++) {
252                 DecisionStep step = steps[i];
253                 sb.append("\n ");
254                 sb.append(step.toString());
255             }
256         }
257         sb.append("\n ");
258         sb.append("\n PARSE DECISION : ");
259         Decision pd = getParseDecision();
260         if (pd == null) {
261             sb.append("\n ");
262             sb.append("[Not yet taken]");
263         } else {
264             DecisionStep[] steps = pd.getSteps();
265             for (int i = 0; i < steps.length; i++) {
266                 DecisionStep step = steps[i];
267                 sb.append("\n ");
268                 sb.append(step.toString());
269             }
270         }
271         sb.append("\n");
272
273         switch (state) {
274             case STATE_DISCOVERED:
275                 break;
276             case STATE_FETCH_ERROR:
277                 sb.append(" HTTP Status: ");
278                 sb.append(this.getHttpStatus());
279                 Resource[] referers = this.getReferers();
280                 sb.append("\n REFERERS: " + referers.length);
281                 for (int i = 0; i < referers.length; i++) {
282                     Resource referer = referers[i];
283                     sb.append("\n ");
284                     sb.append(referer.getURL());
285                 }
286                 break;
287             case STATE_FETCH_IGNORED:
288                 break;
289             case STATE_FETCH_FORBIDDEN:
290                 break;
291             case STATE_FETCHED:
292                 sb.append(" HTTP Status: ");
293                 sb.append(this.getHttpStatus());
294                 sb.append(", Content size: ");
295                 sb.append(this.getSize());
296                 sb.append(", Mime Type: ");
297                 sb.append(this.getMime());
298                 sb.append(", Fetch time: ");
299                 sb.append(this.getTimeMs());
300                 break;
301             case STATE_PARSE_ERROR:
302                 break;
303             case STATE_PARSE_IGNORED:
304                 break;
305             case STATE_PARSED:
306                 sb.append(" HTTP Status: ");
307                 sb.append(this.getHttpStatus());
308                 sb.append(", Content size: ");
309                 sb.append(this.getSize());
310                 sb.append(", Mime Type: ");
311                 sb.append(this.getMime());
312                 sb.append(", Fetch time: ");
313                 sb.append(this.getTimeMs());
314
315                 referers = this.getReferers();
316                 sb.append("\n REFERERS: " + referers.length);
317                 for (int i = 0; i < referers.length; i++) {
318                     Resource referer = referers[i];
319                     sb.append("\n ");
320                     sb.append(referer.getURL());
321                 }
322
323                 if (state == STATE_PARSED) {
324
325                     Resource[] references = this.getReferencedResources();
326                     sb.append("\n REFERENCES: " + references.length);
327                     for (int i = 0; i < references.length; i++) {
328                         Resource reference = references[i];
329                         sb.append("\n ");
330                         sb.append(reference.getURL());
331                     }
332
333                     EMailAddress[] emails = this.getEmailAddresses();
334                     sb.append("\n E-MAIL ADDRESSES: " + emails.length);
335                     for (int i = 0; i < emails.length; i++) {
336                         EMailAddress email = emails[i];
337                         sb.append("\n ");
338                         sb.append(email.getAddress());
339                     }
340
341                 } else {
342                     sb.append("\n EMAIL ADDRESSES and REFERENCES not known [Resource not parsed]");
343                 }
344                 break;
345         }
346
347         sb.append("\n");
348
349         return sb.toString();
350     }
351
352     protected String JavaDoc translateState(int state) {
353         switch (state) {
354             case Resource.STATE_DISCOVERED:
355                 return "DISCOVERED";
356             case Resource.STATE_FETCH_ERROR:
357                 return "FETCH_ERROR";
358             case Resource.STATE_PARSE_ERROR:
359                 return "PARSE_ERROR";
360             case Resource.STATE_FETCHED:
361                 return "FETCHED";
362             case Resource.STATE_FETCH_FORBIDDEN:
363                 return "FETCH_FORBIDDEN";
364             case Resource.STATE_FETCH_IGNORED:
365                 return "FETCH_IGNORED";
366             case Resource.STATE_PARSE_IGNORED:
367                 return "PARSE_IGNORED";
368             case Resource.STATE_PARSED:
369                 return "PARSED";
370             default:
371                 return "?!? UNKNOWN STATE ?!?";
372
373         }
374     }
375
376     public InputStream JavaDoc getInputStream() {
377         return storage.getContentDAO().getInputStream(id);
378     }
379
380     public void setBytes(byte[] bytes) {
381         storage.getContentDAO().setBytes(id, bytes);
382     }
383
384     public Date getFetchTimeStamp() {
385         return fetchTime;
386     }
387
388     public String JavaDoc getStateName() {
389         return translateState(state);
390     }
391
392     public Decision getSpiderDecision() {
393         return storage.getDecisionDAO().findSpiderDecision(this);
394     }
395
396     public Decision getParseDecision() {
397         return storage.getDecisionDAO().findParseDecision(this);
398     }
399
400     public void setState(int state) {
401         this.state = state;
402     }
403
404     public void setMime(String JavaDoc mime) {
405         this.mimeType = mime;
406     }
407
408     public void setTime(int ms) {
409         this.timeMs = ms;
410     }
411
412     public void setSize(int size) {
413         this.size = size;
414     }
415
416     public int getSiteId() {
417         return site;
418     }
419
420     public ResourceReference[] getOutgoingReferences() {
421         return storage.getResourceDAO().getOutgoingReferences(this) ;
422     }
423
424     public ResourceReference[] getIncomingReferences() {
425         return storage.getResourceDAO().getIncomingReferences(this) ;
426     }
427
428     public EMailAddress[] getEmailAddresses() {
429         return storage.getEMailAddressDAO().findByResource(this);
430     }
431
432     public EMailAddressReference[] getEmailAddressReferences() {
433         return storage.getEMailAddressDAO().findReferencesByResource(this);
434     }
435 }
436
Popular Tags