KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > filter > URIRegExpFilter


1 /* Copyright (C) 2003 Internet Archive.
2  *
3  * This file is part of the Heritrix web crawler (crawler.archive.org).
4  *
5  * Heritrix is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser Public License as published by
7  * the Free Software Foundation; either version 2.1 of the License, or
8  * any later version.
9  *
10  * Heritrix is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser Public License
16  * along with Heritrix; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  *
19  * RegExpFilter.java
20  * Created on Apr 16, 2003
21  *
22  * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/crawler/filter/URIRegExpFilter.java,v 1.21.2.1 2007/01/13 01:31:21 stack-sf Exp $
23  */

24 package org.archive.crawler.filter;
25
26 import java.util.logging.Level JavaDoc;
27 import java.util.logging.Logger JavaDoc;
28
29 import javax.management.AttributeNotFoundException JavaDoc;
30
31 import org.archive.crawler.datamodel.CrawlURI;
32 import org.archive.crawler.deciderules.DecideRule;
33 import org.archive.crawler.deciderules.DecidingFilter;
34 import org.archive.crawler.framework.Filter;
35 import org.archive.crawler.settings.SimpleType;
36 import org.archive.util.TextUtils;
37
38
39 /**
40  * Compares passed object -- a CrawlURI, UURI, or String --
41  * against a regular expression, accepting matches.
42  *
43  * @author Gordon Mohr
44  * @deprecated As of release 1.10.0. Replaced by {@link DecidingFilter} and
45  * equivalent {@link DecideRule}.
46  */

47 public class URIRegExpFilter
48 extends Filter {
49
50     private static final long serialVersionUID = 1878356276332865537L;
51
52     private static final Logger JavaDoc logger =
53         Logger.getLogger(URIRegExpFilter.class.getName());
54     public static final String JavaDoc ATTR_REGEXP = "regexp";
55     public static final String JavaDoc ATTR_MATCH_RETURN_VALUE = "if-match-return";
56
57     /**
58      * @param name Filter name.
59      */

60     public URIRegExpFilter(String JavaDoc name) {
61         this(name, "URI regexp filter *Deprecated* Use DecidingFilter and " +
62             "equivalent DecideRule instead. ", "");
63         addElementToDefinition(
64             new SimpleType(ATTR_MATCH_RETURN_VALUE, "What to return when" +
65                 " regular expression matches. \n", new Boolean JavaDoc(true)));
66         addElementToDefinition(
67             new SimpleType(ATTR_REGEXP, "Java regular expression.", ""));
68     }
69
70     public URIRegExpFilter(String JavaDoc name, String JavaDoc regexp) {
71         this(name, "URI regexp filter.", regexp);
72     }
73
74     protected URIRegExpFilter(String JavaDoc name, String JavaDoc description, String JavaDoc regexp) {
75         super(name, description);
76         addElementToDefinition(new SimpleType(ATTR_MATCH_RETURN_VALUE,
77             "What to return when" + " regular expression matches. \n",
78             new Boolean JavaDoc(true)));
79         addElementToDefinition(new SimpleType(ATTR_REGEXP,
80             "Java regular expression.", regexp));
81     }
82
83     protected boolean innerAccepts(Object JavaDoc o) {
84         String JavaDoc regexp = getRegexp(o);
85         String JavaDoc str = o.toString();
86         boolean result = (regexp == null)?
87             false: TextUtils.matches(regexp, str);
88         if (logger.isLoggable(Level.FINE)) {
89             logger.fine("Tested '" + str + "' match with regex '" +
90                 getRegexp(o) + " and result was " + result);
91         }
92         return result;
93     }
94
95     /**
96      * Get the regular expression string to match the URI against.
97      *
98      * @param o the object for which the regular expression should be
99      * matched against.
100      * @return the regular expression to match against.
101      */

102     protected String JavaDoc getRegexp(Object JavaDoc o) {
103         try {
104             return (String JavaDoc) getAttribute(o, ATTR_REGEXP);
105         } catch (AttributeNotFoundException JavaDoc e) {
106             logger.severe(e.getMessage());
107             // Basically the filter is inactive if this occurs
108
// (The caller should be returning false when regexp is null).
109
return null;
110         }
111     }
112
113     protected boolean returnTrueIfMatches(CrawlURI curi) {
114         try {
115             return ((Boolean JavaDoc)getAttribute(ATTR_MATCH_RETURN_VALUE, curi)).
116                 booleanValue();
117         } catch (AttributeNotFoundException JavaDoc e) {
118             logger.severe(e.getMessage());
119             return true;
120         }
121     }
122 }
123
Popular Tags