KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > FetchStatusMatchesRegExpDecideRule


1 /* $Id: FetchStatusMatchesRegExpDecideRule.java,v 1.2.4.1 2007/01/13 01:31:14 stack-sf Exp $
2 *
3 * Created on Sep 4, 2006
4 *
5 * Copyright (C) 2006 Olaf Freyer.
6 *
7 * This file is part of the Heritrix web crawler (crawler.archive.org).
8 *
9 * Heritrix is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * any later version.
13 *
14 * Heritrix is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser Public License
20 * along with Heritrix; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */

23 package org.archive.crawler.deciderules;
24
25 import java.util.logging.Level JavaDoc;
26 import java.util.logging.Logger JavaDoc;
27 import javax.management.AttributeNotFoundException JavaDoc;
28
29 import org.archive.crawler.datamodel.CrawlURI;
30 import org.archive.crawler.settings.SimpleType;
31 import org.archive.util.TextUtils;
32
33
34 public class FetchStatusMatchesRegExpDecideRule extends PredicatedDecideRule {
35
36     private static final long serialVersionUID = -3088156729860241312L;
37
38     private final Logger JavaDoc logger = Logger.getLogger(this.getClass().getName());
39     
40     public static final String JavaDoc ATTR_REGEXP = "regexp";
41     
42     /**
43      * Usual constructor.
44      * @param name Name of this DecideRule.
45      */

46     public FetchStatusMatchesRegExpDecideRule(String JavaDoc name) {
47         super(name);
48         setDescription("FetchStatusMatchesRegExpDecideRule. Applies " +
49             "configured decision to any URI that has a fetch status matching " +
50             "the given regular expression.");
51         addElementToDefinition(new SimpleType(ATTR_REGEXP, "Java regular" +
52                 "expression to match.", ""));
53     }
54
55     protected boolean evaluate(Object JavaDoc object) {
56         try {
57             String JavaDoc regexp = getRegexp(object);
58             CrawlURI curi = (CrawlURI)object;
59             String JavaDoc str = String.valueOf(curi.getFetchStatus());
60             boolean result = (regexp == null)?
61                     false: TextUtils.matches(regexp, str);
62             if (logger.isLoggable(Level.FINE)) {
63                 logger.fine("Tested '" + str + "' match with regex '" +
64                         regexp + " and result was " + result);
65             }
66             return result;
67         } catch (ClassCastException JavaDoc e) {
68             // if not CrawlURI, always disregard
69
return false;
70         }
71     }
72     
73     /**
74      * Get the regular expression string to match the URI against.
75      *
76      * @param o the object for which the regular expression should be
77      * matched against.
78      * @return the regular expression to match against.
79      */

80     protected String JavaDoc getRegexp(Object JavaDoc o) {
81         try {
82             return (String JavaDoc) getAttribute(o, ATTR_REGEXP);
83         } catch (AttributeNotFoundException JavaDoc e) {
84             logger.severe(e.getMessage());
85             return null; // Basically the filter is inactive if this occurs.
86
}
87     }
88 }
Popular Tags