KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > OnDomainsDecideRule


1 /* OnDomainsDecideRule
2 *
3 * $Id: OnDomainsDecideRule.java,v 1.5.4.1 2007/01/13 01:31:15 stack-sf Exp $
4 *
5 * Created on Apr 5, 2005
6 *
7 * Copyright (C) 2005 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.crawler.deciderules;
26
27
28 import org.archive.util.SurtPrefixSet;
29
30
31 /**
32  * Rule applies configured decision to any URIs that
33  * are on one of the domains in the configured set of
34  * domains, filled from the seed set.
35  *
36  * @author gojomo
37  */

38 public class OnDomainsDecideRule extends SurtPrefixedDecideRule {
39
40     private static final long serialVersionUID = -3872369060554558805L;
41     //private static final Logger logger =
42
// Logger.getLogger(OnDomainsDecideRule.class.getName());
43
/**
44      * Usual constructor.
45      * @param name
46      */

47     public OnDomainsDecideRule(String JavaDoc name) {
48         super(name);
49         setDescription(
50                  "OnDomainsDecideRule. Makes the configured decision " +
51                  "for any URI which is inside one of the domains in the " +
52                  "configured set of domains (derived from the seed" +
53                  "list, with 'www' removed when present).");
54         // disable direct setting of SURTs-related options
55
//getElementFromDefinition(ATTR_SEEDS_AS_SURT_PREFIXES).setTransient(true);
56
//getElementFromDefinition(ATTR_SURTS_SOURCE_FILE).setTransient(true);
57
// leaving surts-dump as option helpful for debugging/learning, for now
58
//getElementFromDefinition(ATTR_SURTS_DUMP_FILE).setTransient(true);
59
}
60
61     /**
62      * Patch the SURT prefix set so that it only includes host-enforcing prefixes
63      *
64      * @see org.archive.crawler.deciderules.SurtPrefixedDecideRule#readPrefixes()
65      */

66     protected void readPrefixes() {
67         buildSurtPrefixSet();
68         surtPrefixes.convertAllPrefixesToDomains();
69         dumpSurtPrefixSet();
70     }
71     
72     protected String JavaDoc prefixFrom(String JavaDoc uri) {
73         return SurtPrefixSet.convertPrefixToDomain(super.prefixFrom(uri));
74     }
75 }
76
Popular Tags