KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > frontier > FrontierJournal


1 /* FrontierJournal
2  *
3  * Created on Oct 26, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.frontier;
24
25 import java.io.File JavaDoc;
26 import java.io.IOException JavaDoc;
27
28 import org.archive.crawler.datamodel.CrawlURI;
29 import org.archive.net.UURI;
30
31 /**
32  * Record of key Frontier happenings.
33  * @author stack
34  * @version $Date: 2005/09/07 22:35:26 $, $Revision: 1.8 $
35  */

36 public interface FrontierJournal {
37     public static final String JavaDoc LOGNAME_RECOVER = "recover";
38
39     /**
40      * @param curi CrawlURI that has been scheduled to be added to the
41      * Frontier.
42      */

43     public abstract void added(CrawlURI curi);
44
45     /**
46      * @param curi CrawlURI that finished successfully.
47      */

48     public abstract void finishedSuccess(CrawlURI curi);
49
50     /**
51      * @param uuri UURI that finished successfully.
52      */

53     public abstract void finishedSuccess(UURI uuri);
54
55     /**
56      * Note that a CrawlURI was emitted for processing.
57      * If not followed by a finished or rescheduled notation in
58      * the journal, the CrawlURI was still in-process when the journal ended.
59      *
60      * @param curi CrawlURI emitted.
61      */

62     public abstract void emitted(CrawlURI curi);
63
64     /**
65      * @param u UURI that finished unsuccessfully
66      */

67     public abstract void finishedFailure(UURI u);
68     
69     /**
70      * @param curi CrawlURI finished unsuccessfully.
71      */

72     public abstract void finishedFailure(CrawlURI curi);
73
74     /**
75      * @param curi CrawlURI that was returned to the Frontier for
76      * another try.
77      */

78     public abstract void rescheduled(CrawlURI curi);
79
80     /**
81      * Flush and close any held objects.
82      */

83     public abstract void close();
84     
85     /**
86      * Checkpoint.
87      * @param checkpointDir Directory we're checkpointing into.
88      * @throws IOException
89      */

90     public abstract void checkpoint(final File JavaDoc checkpointDir)
91     throws IOException JavaDoc;
92
93     /**
94      * Add a line noting a serious crawl error.
95      *
96      * @param string
97      */

98     public abstract void seriousError(String JavaDoc string);
99 }
Popular Tags