KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > MatchesRegExpDecideRule


1 /* MatchesRegExpDecideRule
2 *
3 * $Id: MatchesRegExpDecideRule.java,v 1.4.16.1 2007/01/13 01:31:14 stack-sf Exp $
4 *
5 * Created on Apr 4, 2005
6 *
7 * Copyright (C) 2005 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.crawler.deciderules;
26
27 import java.util.logging.Level JavaDoc;
28 import java.util.logging.Logger JavaDoc;
29
30 import javax.management.AttributeNotFoundException JavaDoc;
31
32 import org.archive.crawler.settings.SimpleType;
33 import org.archive.util.TextUtils;
34
35
36
37 /**
38  * Rule applies configured decision to any CrawlURIs whose String URI
39  * matches the supplied regexp.
40  *
41  * @author gojomo
42  */

43 public class MatchesRegExpDecideRule extends PredicatedDecideRule {
44
45     private static final long serialVersionUID = 6441410917074319295L;
46
47     private static final Logger JavaDoc logger =
48         Logger.getLogger(MatchesRegExpDecideRule.class.getName());
49     
50     public static final String JavaDoc ATTR_REGEXP = "regexp";
51
52     /**
53      * Usual constructor.
54      * @param name
55      */

56     public MatchesRegExpDecideRule(String JavaDoc name) {
57         super(name);
58         setDescription("MatchesRegExpDecideRule. Applies the configured " +
59             "decision to URIs matching the supplied regular expression.");
60         addElementToDefinition(new SimpleType(ATTR_REGEXP, "Java regular" +
61             "expression to match.", ""));
62     }
63
64     /**
65      * Evaluate whether given object's string version
66      * matches configured regexp
67      *
68      * @param object
69      * @return true if regexp is matched
70      */

71     protected boolean evaluate(Object JavaDoc object) {
72         try {
73             String JavaDoc regexp = getRegexp(object);
74             String JavaDoc str = object.toString();
75             boolean result = (regexp == null)?
76                     false: TextUtils.matches(regexp, str);
77             if (logger.isLoggable(Level.FINE)) {
78                 logger.fine("Tested '" + str + "' match with regex '" +
79                         regexp + " and result was " + result);
80             }
81             return result;
82         } catch (ClassCastException JavaDoc e) {
83             // if not CrawlURI, always disregard
84
return false;
85         }
86     }
87     
88     /**
89      * Get the regular expression string to match the URI against.
90      *
91      * @param o the object for which the regular expression should be
92      * matched against.
93      * @return the regular expression to match against.
94      */

95     protected String JavaDoc getRegexp(Object JavaDoc o) {
96         try {
97             return (String JavaDoc) getAttribute(o, ATTR_REGEXP);
98         } catch (AttributeNotFoundException JavaDoc e) {
99             logger.severe(e.getMessage());
100             return null; // Basically the filter is inactive if this occurs.
101
}
102     }
103 }
104
Popular Tags