KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > filter > PathDepthFilter


1 /* Copyright (C) 2003 Internet Archive.
2  *
3  * This file is part of the Heritrix web crawler (crawler.archive.org).
4  *
5  * Heritrix is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser Public License as published by
7  * the Free Software Foundation; either version 2.1 of the License, or
8  * any later version.
9  *
10  * Heritrix is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser Public License
16  * along with Heritrix; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  *
19  * HopsFilter.java
20  * Created on Oct 3, 2003
21  *
22  * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/crawler/filter/PathDepthFilter.java,v 1.18.2.1 2007/01/13 01:31:21 stack-sf Exp $
23  */

24 package org.archive.crawler.filter;
25
26 import java.util.logging.Logger JavaDoc;
27
28 import javax.management.AttributeNotFoundException JavaDoc;
29
30 import org.apache.commons.httpclient.URIException;
31 import org.archive.crawler.datamodel.CandidateURI;
32 import org.archive.crawler.datamodel.CrawlURI;
33 import org.archive.crawler.deciderules.DecideRule;
34 import org.archive.crawler.deciderules.DecidingFilter;
35 import org.archive.crawler.framework.Filter;
36 import org.archive.crawler.settings.SimpleType;
37 import org.archive.net.UURI;
38
39 /**
40  * Accepts all urls passed in with a path depth
41  * less or equal than the max-path-depth
42  * value.
43  *
44  * @author Igor Ranitovic
45  * @deprecated As of release 1.10.0. Replaced by {@link DecidingFilter} and
46  * equivalent {@link DecideRule}.
47  */

48 public class PathDepthFilter extends Filter {
49
50     private static final long serialVersionUID = 1626115117327154205L;
51
52     private static final Logger JavaDoc logger =
53         Logger.getLogger(PathDepthFilter.class.getName());
54     public static final String JavaDoc ATTR_MATCH_RETURN_VALUE =
55         "path-less-or-equal-return";
56     public static final String JavaDoc ATTR_MAX_PATH_DEPTH = "max-path-depth";
57     Integer JavaDoc maxPathDepth = new Integer JavaDoc(Integer.MAX_VALUE);
58     final static char slash = '/';
59
60     /**
61      * @param name
62      */

63     public PathDepthFilter(String JavaDoc name) {
64         super(name, "Path depth less or equal filter *Deprecated* Use" +
65                 "DecidingFilter and equivalent DecideRule instead.");
66         addElementToDefinition(new SimpleType(ATTR_MAX_PATH_DEPTH, "Max path" +
67                 " depth for which this filter will match", maxPathDepth));
68         addElementToDefinition(new SimpleType(ATTR_MATCH_RETURN_VALUE,
69                 "What to return when path depth is less or equal to max path" +
70                 " depth. \n", new Boolean JavaDoc(true)));
71     }
72
73     protected boolean innerAccepts(Object JavaDoc o) {
74         String JavaDoc path = null;
75         if (o == null) {
76             return false;
77         }
78         
79         if (o instanceof CandidateURI) {
80             try {
81                 if (((CandidateURI)o).getUURI() != null) {
82                     path = ((CandidateURI)o).getUURI().getPath();
83                 }
84             }
85             catch (URIException e) {
86                 logger.severe("Failed getpath for " +
87                     ((CandidateURI)o).getUURI());
88             }
89         } else if (o instanceof UURI) {
90             try {
91                 path = ((UURI)o).getPath();
92             }
93             catch (URIException e) {
94                 logger.severe("Failed getpath for " + o);
95             }
96         }
97
98         if (path == null) {
99             return true;
100         }
101
102         int count = 0;
103         for (int i = path.indexOf(slash); i != -1;
104                 i = path.indexOf(slash, i + 1)) {
105             count++;
106         }
107         
108         if (o instanceof CrawlURI) {
109             try {
110                 this.maxPathDepth = (Integer JavaDoc) getAttribute(
111                         ATTR_MAX_PATH_DEPTH, (CrawlURI) o);
112             } catch (AttributeNotFoundException JavaDoc e) {
113                 logger.severe(e.getMessage());
114             }
115         }
116         
117         return (this.maxPathDepth != null) ?
118             count <= this.maxPathDepth.intValue():
119             false;
120     }
121
122     protected boolean returnTrueIfMatches(CrawlURI curi) {
123        try {
124            return ((Boolean JavaDoc) getAttribute(ATTR_MATCH_RETURN_VALUE, curi)).
125                booleanValue();
126        } catch (AttributeNotFoundException JavaDoc e) {
127            logger.severe(e.getMessage());
128            return true;
129        }
130     }
131     
132     protected boolean getFilterOffPosition(CrawlURI curi) {
133         return returnTrueIfMatches(curi);
134     }
135 }
Popular Tags