KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > framework > Filter


1 /* Copyright (C) 2003 Internet Archive.
2  *
3  * This file is part of the Heritrix web crawler (crawler.archive.org).
4  *
5  * Heritrix is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser Public License as published by
7  * the Free Software Foundation; either version 2.1 of the License, or
8  * any later version.
9  *
10  * Heritrix is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser Public License
16  * along with Heritrix; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  *
19  * Filter.java
20  * Created on Apr 16, 2003
21  *
22  * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/crawler/framework/Filter.java,v 1.21.18.1 2007/01/13 01:31:22 stack-sf Exp $
23  */

24 package org.archive.crawler.framework;
25
26 import java.util.logging.Level JavaDoc;
27 import java.util.logging.Logger JavaDoc;
28
29 import javax.management.AttributeNotFoundException JavaDoc;
30
31 import org.archive.crawler.datamodel.CrawlURI;
32 import org.archive.crawler.settings.ComplexType;
33 import org.archive.crawler.settings.MapType;
34 import org.archive.crawler.settings.ModuleType;
35 import org.archive.crawler.settings.SimpleType;
36
37 /**
38  * Base class for filter classes.
39  * <p>
40  * Several classes allow 'filters' to be applied to them. Filters are classes
41  * that, based on an arbitrary object passed to them, return a boolean stating
42  * if if passes the filter. Thus applying filters can affect the behavior of
43  * those classes. This class provides the basic framework for filters. All
44  * detailed implementation of filters inherit from it and it is considered to
45  * be a 'null' filter (always returns true).
46  *
47  * @author Gordon Mohr
48  *
49  * @see org.archive.crawler.framework.Processor
50  */

51 public class Filter extends ModuleType {
52
53     private static final long serialVersionUID = -356718306794776802L;
54
55     private static Logger JavaDoc logger =
56         Logger.getLogger("org.archive.crawler.framework.Filter");
57
58     public static final String JavaDoc ATTR_ENABLED = "enabled";
59
60     /**
61      * Creates a new 'null' filter.
62      * @param name the name of the filter.
63      * @param description an description of the filter suitable for showing in
64      * the user interface.
65      */

66     public Filter(String JavaDoc name, String JavaDoc description) {
67         super(name, description);
68         addElementToDefinition(
69             new SimpleType(ATTR_ENABLED,
70                 "Filter is enabled.", new Boolean JavaDoc(true)));
71     }
72
73     /**
74      * Creates a new 'null' filter.
75      * @param name the name of the filter.
76      */

77     public Filter(String JavaDoc name) {
78         this(name, "Null filter - accepts everything.");
79     }
80
81     public boolean accepts(Object JavaDoc o) {
82         CrawlURI curi = (o instanceof CrawlURI) ? (CrawlURI) o : null;
83
84         // Skip the evaluation if the filter is disabled
85
try {
86             if (!((Boolean JavaDoc)getAttribute(ATTR_ENABLED, curi)).booleanValue()) {
87                 return getFilterOffPosition(curi);
88             }
89         } catch (AttributeNotFoundException JavaDoc e) {
90             logger.severe(e.getMessage());
91         }
92
93         boolean accept = returnTrueIfMatches(curi) == innerAccepts(o);
94         if (accept && logger.isLoggable(Level.FINEST)) {
95             // Log if filter returns true
96
ComplexType p = this.getParent();
97             if (p instanceof MapType) {
98                 p = p.getParent();
99             }
100             String JavaDoc msg = this.toString() + " belonging to " + p.toString()
101                 + " accepted " + o.toString();
102             logger.finest(msg);
103         }
104         return accept;
105     }
106     
107     /**
108      * If the filter is disabled, the value returned by this method is
109      * what filters return as their disabled setting.
110      * Default is that we return 'true', continue processing, but some
111      * filters -- the exclude filters for example -- will want to return
112      * false if disabled so processing can continue.
113      * @param curi CrawlURI to use as context. Passed curi can be null.
114      * @return This filters 'off' position.
115      */

116     protected boolean getFilterOffPosition(CrawlURI curi) {
117         return true;
118     }
119
120     /**
121      * Checks to see if filter functionality should be inverted for this
122      * curi.<p>
123      *
124      * All filters will by default return true if curi is accepted by the
125      * filter. If this method returns false, then the filter will return true
126      * if doesn't match.<p>
127      *
128      * Classes extending this class should override this method with
129      * appropriate code.
130      *
131      * @param curi Current CrawlURI
132      * @return true for default behaviour, false otherwise.
133      */

134     protected boolean returnTrueIfMatches(CrawlURI curi){
135         return true;
136     }
137
138     /**
139      * Classes subclassing this one should override this method to perfrom
140      * their custom determination of whether or not the object given to it.
141      *
142      * @param o The object
143      * @return True if it passes the filter.
144      */

145     protected boolean innerAccepts(Object JavaDoc o) {
146         return true;
147     }
148
149     public String JavaDoc toString() {
150         return "Filter<" + getName() + ">";
151     }
152
153     public void kickUpdate() {
154         // by default, do nothing
155
}
156 }
157
Popular Tags