KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > framework > FrontierHostStatistics


1 /* URIFrontierHostStatistics
2  *
3  * $Id: FrontierHostStatistics.java,v 1.1 2004/09/02 02:16:08 gojomo Exp $
4  *
5  * Created on Mar 30, 2004
6  *
7  * Copyright (C) 2004 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.framework;
26
27
28 /**
29  * An optional interface the Frontiers can implement to provide information
30  * about specific hosts.
31  *
32  * <p>Some URIFrontier implmentations will want to provide a number of
33  * statistics relating to the progress of particular hosts. This only applies
34  * to those Frontiers whose internal structure uses hosts to split up the
35  * workload and (for example) implement politeness. Some other Frontiers may
36  * also provide this info based on calculations.
37  *
38  * <ul>
39  * <li> {@link #activeHosts() Active hosts}
40  * <li> {@link #inactiveHosts() Inactive hosts}
41  * <li> {@link #deferredHosts() deferred hosts}
42  * <li> {@link #inProcessHosts() In process hosts}
43  * <li> {@link #readyHosts() Ready hosts}
44  * <li> {@link #hostStatus(String) Host status}
45  * </ul>
46  *
47  * @author Kristinn Sigurdsson
48  *
49  * @see org.archive.crawler.framework.Frontier
50  */

51 public interface FrontierHostStatistics {
52
53     /**
54      * Host has not been encountered by the Frontier, or has been encountered
55      * but has been inactive so long that it has expired.
56      */

57     public static final int HOST_UNKNOWN = 0;
58     /** Host has URIs ready to be emited. */
59     public static final int HOST_READY = 1;
60     /** Host has URIs currently being proessed. */
61     public static final int HOST_INPROCESS = 2;
62     /**
63      * Host has been deferred for some amount of time, will become ready once
64      * once that time has elapsed. This is most likely due to politeness or
65      * waiting between retries. Other conditions may exist.
66      */

67     public static final int HOST_DEFERRED = 3;
68     /**
69      * Host has been encountered and all availible URIs for it have been
70      * processed already. More URIs may become availible later or not.
71      * Inactive hosts may eventually become 'forgotten'.
72      */

73     public static final int HOST_INACTIVE = 4;
74
75     /**
76      * Total number of hosts that are currently active.
77      *
78      * <p>Active hosts are considered to be those that are ready, deferred or
79      * in process.
80      *
81      * @return Total number of hosts that are currently active.
82      */

83     public int activeHosts();
84
85     /**
86      * Total number of inactive hosts.
87      *
88      * <p>Inactive hosts are those hosts that have been active but have now been
89      * exhausted and contain no more additional URIs.
90      *
91      * @return Total number of inactive hosts.
92      */

93     public int inactiveHosts();
94
95     /**
96      * Total number of deferred hosts.
97      *
98      * <p>Deferred hosts are currently active hosts that have been deferred
99      * from processing for the time being (becausee of politeness or waiting
100      * before retrying.
101      *
102      * @return Total number of deferred hosts.
103      */

104     public int deferredHosts();
105
106     /**
107      * Total number of hosts with URIs in process.
108      *
109      * <p>It is generally assumed that each host can have only 1 URI in
110      * process at the same time. However some frontiers may implement
111      * politeness differently meaning that the same host is both ready and
112      * in process. {@link #activeHosts() activeHosts()} will not count them
113      * twice though.
114      *
115      * @return Total number of hosts with URIs in process.
116      */

117     public int inProcessHosts();
118
119     /**
120      * Total number of hosts that have a URI ready for processing.
121      *
122      * @return Total number of hosts that have a URI ready for processing.
123      */

124     public int readyHosts();
125
126     /**
127      * Get the status of a host.
128      *
129      * <p>Hosts can be in one of the following states:
130      * <ul>
131      * <li> {@link #HOST_READY Ready}
132      * <li> {@link #HOST_INPROCESS In process}
133      * <li> {@link #HOST_DEFERRED deferred}
134      * <li> {@link #HOST_INACTIVE Inactive}
135      * <li> {@link #HOST_UNKNOWN Unknown}
136      * </ul>
137      *
138      * <p>Some Frontiers may allow a host to have more then one URI in process
139      * at the same time. In those cases it will be reported as
140      * {@link #HOST_READY Ready} as long as it is has more URIs ready for
141      * processing. Only once it has no more possible URIs for processing will
142      * it be reported as {@link #HOST_INPROCESS In process}
143      *
144      * @param host The name of the host to lookup the status for.
145      * @return The status of the specified host.
146      *
147      * @see #HOST_DEFERRED
148      * @see #HOST_INACTIVE
149      * @see #HOST_INPROCESS
150      * @see #HOST_READY
151      * @see #HOST_UNKNOWN
152      */

153     public int hostStatus(String JavaDoc host);
154
155 }
156
Popular Tags