KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > event > CrawlURIDispositionListener


1 /* Copyright (C) 2003 Internet Archive.
2  *
3  * This file is part of the Heritrix web crawler (crawler.archive.org).
4  *
5  * Heritrix is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser Public License as published by
7  * the Free Software Foundation; either version 2.1 of the License, or
8  * any later version.
9  *
10  * Heritrix is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser Public License
16  * along with Heritrix; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */

19 package org.archive.crawler.event;
20
21 import org.archive.crawler.datamodel.CrawlURI;
22
23 /**
24  * An interface for objects that want to be notified
25  * of a CrawlURI disposition (happens each time a
26  * curi has been through the processors).
27  * Classes implementing this interface can register with
28  * the CrawlController to receive these events.
29  * <p>
30  * This interface is to facilitate the gathering of
31  * statistics on a running crawl.
32  * <p>
33  * <b>WARNING:</b> One of these methods <i>will</i> be
34  * called for <b>each</b> CrawlURI that is processed.
35  * It is therefor imperative that the methods execute
36  * quickly!
37  * <p>
38  * Also note that the object implementing this interface
39  * must under <b>no circumstances</b> maintain a reference
40  * to the CrawlURI beyond the scope of the relevant method
41  * body!
42  *
43  * @author Kristinn Sigurdsson
44  *
45  * @see org.archive.crawler.framework.CrawlController
46  */

47 public interface CrawlURIDispositionListener
48 {
49     /**
50      * Notification of a successfully crawled URI
51      *
52      * @param curi The relevant CrawlURI
53      */

54     public void crawledURISuccessful(CrawlURI curi);
55
56     /**
57      * Notification of a failed crawl of a URI that
58      * will be retried (failure due to possible transient
59      * problems).
60      *
61      * @param curi The relevant CrawlURI
62      */

63     public void crawledURINeedRetry(CrawlURI curi);
64
65     /**
66      * Notification of a crawled URI that is to be disregarded.
67      * Usually this means that the robots.txt file for the
68      * relevant site forbids this from being crawled and we are
69      * therefor not going to keep it. Other reasons may apply.
70      * In all cases this means that it <i>was</i> successfully
71      * downloaded but will not be stored.
72      *
73      * @param curi The relevant CrawlURI
74      */

75     public void crawledURIDisregard(CrawlURI curi);
76
77     /**
78      * Notification of a failed crawling of a URI. The failure
79      * is of a type that precludes retries (either by it's very
80      * nature or because it has been retried to many times)
81      *
82      * @param curi The relevant CrawlURI
83      */

84     public void crawledURIFailure(CrawlURI curi);
85
86 }
87
Popular Tags