KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > scope > BroadScope


1 /* Copyright (C) 2003 Internet Archive.
2  *
3  * This file is part of the Heritrix web crawler (crawler.archive.org).
4  *
5  * Heritrix is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser Public License as published by
7  * the Free Software Foundation; either version 2.1 of the License, or
8  * any later version.
9  *
10  * Heritrix is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser Public License
16  * along with Heritrix; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  *
19  * CrawlScope.java
20  * Created on Oct 1, 2003
21  *
22  * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/crawler/scope/BroadScope.java,v 1.18.16.1 2007/01/13 01:31:25 stack-sf Exp $
23  */

24 package org.archive.crawler.scope;
25
26
27 /**
28  * A CrawlScope instance defines which URIs are "in"
29  * a particular crawl.
30  *
31  * It is essentially a Filter which determines, looking at
32  * the totality of information available about a
33  * CandidateURI/CrawlURI instamce, if that URI should be
34  * scheduled for crawling.
35  *
36  * <p>Dynamic information inherent in the discovery of the
37  * URI -- such as the path by which it was discovered --
38  * may be considered.
39  *
40  * <p>Dynamic information which requires the consultation
41  * of external and potentially volatile information --
42  * such as current robots.txt requests and the history
43  * of attempts to crawl the same URI -- should NOT be
44  * considered. Those potentially high-latency decisions
45  * should be made at another step. .
46  *
47  * @author gojomo
48  *
49  */

50 public class BroadScope extends ClassicScope {
51
52     private static final long serialVersionUID = -2354234238454865888L;
53
54     /**
55      * Constructor.
56      *
57      * @param name Name of this crawlscope.
58      */

59     public BroadScope(String JavaDoc name) {
60         super(name);
61         setDescription("BroadScope: A scope for broad crawls. Crawls made" +
62         " with this scope will not be limited to the hosts or domains of" +
63         " its seeds. NOTE: BroadScoped crawls will eventually run out of" +
64         " memory (See Release Notes).");
65     }
66
67     /**
68      * @param o the URI to check.
69      * @return True if transitive filter accepts passed object.
70      */

71     protected boolean transitiveAccepts(Object JavaDoc o) {
72         return true;
73     }
74
75     /** Check if URI is accepted by the focus of this scope.
76      *
77      * This method should be overridden in subclasses.
78      *
79      * @param o the URI to check.
80      * @return True if focus filter accepts passed object.
81      */

82     protected boolean focusAccepts(Object JavaDoc o) {
83         return true;
84     }
85 }
86
Popular Tags