KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > ClassKeyMatchesRegExpDecideRule


1 /* ClassKeyMatchesRegExpDecideRule
2 *
3 * $Id: ClassKeyMatchesRegExpDecideRule.java,v 1.1.16.1 2007/01/13 01:31:13 stack-sf Exp $
4 *
5 * Created on Apr 4, 2005
6 *
7 * Copyright (C) 2005 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.crawler.deciderules;
26
27 import java.util.logging.Level JavaDoc;
28 import java.util.logging.Logger JavaDoc;
29
30 import org.archive.crawler.datamodel.CandidateURI;
31 import org.archive.util.TextUtils;
32
33
34
35 /**
36  * Rule applies configured decision to any CrawlURI class key -- i.e.
37  * {@link CandidateURI#getClassKey()} -- matches matches supplied regexp.
38  *
39  * @author gojomo
40  */

41 public class ClassKeyMatchesRegExpDecideRule extends MatchesRegExpDecideRule {
42
43     private static final long serialVersionUID = 1178873944436973294L;
44
45     private static final Logger JavaDoc logger =
46         Logger.getLogger(ClassKeyMatchesRegExpDecideRule.class.getName());
47
48     /**
49      * Usual constructor.
50      * @param name
51      */

52     public ClassKeyMatchesRegExpDecideRule(String JavaDoc name) {
53         super(name);
54         setDescription("ClassKeyMatchesRegExpDecideRule. " +
55             "Applies the configured " +
56             "decision to class keys matching the supplied " +
57             "regular expression. Class keys are values set into " +
58             "an URL by the Frontier. They are usually the names " +
59             "of queues used by the Frontier. Class keys can " +
60             "look like hostname + port or be plain IPs (It will " +
61             "depend on the Frontier implementation/configuration).");
62     }
63
64     /**
65      * Evaluate passed object.
66      * Test first that its CandidateURI. If so, does it have a class key.
67      * If not, ask frontier for its classkey. Then test against regex.
68      *
69      * @param object
70      * @return true if regexp is matched
71      */

72     protected boolean evaluate(Object JavaDoc object) {
73         try {
74             CandidateURI cauri = (CandidateURI)object;
75             String JavaDoc classKey = cauri.getClassKey();
76             if (classKey == null || classKey.length() <= 0) {
77                 classKey = getSettingsHandler().getOrder().getController().
78                     getFrontier().getClassKey(cauri);
79                 cauri.setClassKey(classKey);
80             }
81             String JavaDoc regexp = getRegexp(cauri);
82             boolean result = (regexp == null)?
83                 false: TextUtils.matches(regexp, cauri.getClassKey());
84             if (logger.isLoggable(Level.FINE)) {
85                 logger.fine("Tested '" + cauri.getClassKey() +
86                     "' match with regex '" + regexp + " and result was " +
87                     result);
88             }
89             return result;
90         } catch (ClassCastException JavaDoc e) {
91             // if not CrawlURI, always disregard
92
return false;
93         }
94     }
95 }
Popular Tags