KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > HopsPathMatchesRegExpDecideRule


1 /* HopsPathMatchesRegExpDecideRule
2 *
3 * $Id: HopsPathMatchesRegExpDecideRule.java,v 1.1.16.1 2007/01/13 01:31:14 stack-sf Exp $
4 *
5 * Created on June 23, 2005
6 *
7 * Copyright (C) 2005 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.crawler.deciderules;
26
27 import java.util.logging.Level JavaDoc;
28 import java.util.logging.Logger JavaDoc;
29
30 import javax.management.AttributeNotFoundException JavaDoc;
31
32 import org.archive.crawler.datamodel.CandidateURI;
33 import org.archive.crawler.settings.SimpleType;
34 import org.archive.util.TextUtils;
35
36 /**
37  * Rule applies configured decision to any CrawlURIs whose 'hops-path'
38  * (string like "LLXE" etc.) matches the supplied regexp.
39  *
40  * @author gojomo
41  */

42 public class HopsPathMatchesRegExpDecideRule extends PredicatedDecideRule {
43
44     private static final long serialVersionUID = -8881013912393934053L;
45
46     private static final Logger JavaDoc logger =
47         Logger.getLogger(HopsPathMatchesRegExpDecideRule.class.getName());
48     
49     public static final String JavaDoc ATTR_REGEXP = "regexp";
50
51     /**
52      * Usual constructor.
53      * @param name
54      */

55     public HopsPathMatchesRegExpDecideRule(String JavaDoc name) {
56         super(name);
57         setDescription("HopsPathMatchesRegExpDecideRule. Applies the " +
58                 "configured decision to URIs whose hops-path (string with " +
59                 "L E R X P etc) matches the supplied regular expression.");
60         addElementToDefinition(new SimpleType(ATTR_REGEXP, "Java regular" +
61             "expression to match.", ""));
62     }
63
64     /**
65      * Evaluate whether given object (if CandidateURI) has hops-path
66      * matching configured regexp
67      *
68      * @param object
69      * @return true if regexp is matched
70      */

71     protected boolean evaluate(Object JavaDoc object) {
72         try {
73             String JavaDoc regexp = getRegexp(object);
74             String JavaDoc str = ((CandidateURI)object).getPathFromSeed();
75             boolean result = (regexp == null)?
76                     false: TextUtils.matches(regexp, str);
77             if (logger.isLoggable(Level.FINE)) {
78                 logger.fine("Tested '" + str + "' match with regex '" +
79                         regexp + " and result was " + result);
80             }
81             return result;
82         } catch (ClassCastException JavaDoc e) {
83             // if not CrawlURI, always disregard
84
return false;
85         }
86     }
87     
88     /**
89      * Get the regular expression string to match the URI against.
90      *
91      * @param o the object for which the regular expression should be
92      * matched against.
93      * @return the regular expression to match against.
94      */

95     protected String JavaDoc getRegexp(Object JavaDoc o) {
96         try {
97             return (String JavaDoc) getAttribute(o, ATTR_REGEXP);
98         } catch (AttributeNotFoundException JavaDoc e) {
99             logger.severe(e.getMessage());
100             return null; // Basically the filter is inactive if this occurs.
101
}
102     }
103 }
104
Popular Tags