KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > TooManyHopsDecideRule


1 /* AcceptRule
2 *
3 * $Id: TooManyHopsDecideRule.java,v 1.8.18.1 2007/01/13 01:31:15 stack-sf Exp $
4 *
5 * Created on Apr 1, 2005
6 *
7 * Copyright (C) 2005 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.crawler.deciderules;
26
27 import org.archive.crawler.datamodel.CandidateURI;
28 import org.archive.crawler.settings.SimpleType;
29 import org.archive.crawler.settings.Type;
30
31
32
33 /**
34  * Rule REJECTs any CrawlURIs whose total number of hops (length of the
35  * hopsPath string, traversed links of any type) is over a threshold.
36  * Otherwise returns PASS.
37  *
38  * @author gojomo
39  */

40 public class TooManyHopsDecideRule extends PredicatedDecideRule {
41
42     private static final long serialVersionUID = -5429536193865916670L;
43
44     private static final String JavaDoc ATTR_MAX_HOPS = "max-hops";
45     
46     /**
47      * Default access so available to test code.
48      */

49     static final Integer JavaDoc DEFAULT_MAX_HOPS = new Integer JavaDoc(20);
50
51     /**
52      * Usual constructor.
53      * @param name Name of this DecideRule.
54      */

55     public TooManyHopsDecideRule(String JavaDoc name) {
56         super(name);
57         setDescription("TooManyHopsDecideRule. REJECTs URIs discovered " +
58                 "after too many hops (followed links of any type) from seed.");
59         addElementToDefinition(new SimpleType(ATTR_MAX_HOPS, "Max path" +
60                 " depth for which this filter will match", DEFAULT_MAX_HOPS));
61         // make default REJECT (overriding superclass) & always-default
62
Type type = addElementToDefinition(new SimpleType(ATTR_DECISION,
63                 "Decision to be applied", REJECT, ALLOWED_TYPES));
64         type.setTransient(true);
65     }
66
67     /**
68      * Evaluate whether given object is over the threshold number of
69      * hops.
70      *
71      * @param object
72      * @return true if the mx-hops is exceeded
73      */

74     protected boolean evaluate(Object JavaDoc object) {
75         try {
76             CandidateURI curi = (CandidateURI)object;
77             return curi.getPathFromSeed() != null &&
78                 curi.getPathFromSeed().length() > getThresholdHops(object);
79         } catch (ClassCastException JavaDoc e) {
80             // if not CrawlURI, always disregard
81
return false;
82         }
83     }
84
85     /**
86      * @param obj Conext object.
87      * @return hops cutoff threshold
88      */

89     private int getThresholdHops(Object JavaDoc obj) {
90         return ((Integer JavaDoc)getUncheckedAttribute(obj,ATTR_MAX_HOPS)).intValue();
91     }
92 }
93
Popular Tags