KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > filter > HopsFilter


1 /* Copyright (C) 2003 Internet Archive.
2  *
3  * This file is part of the Heritrix web crawler (crawler.archive.org).
4  *
5  * Heritrix is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser Public License as published by
7  * the Free Software Foundation; either version 2.1 of the License, or
8  * any later version.
9  *
10  * Heritrix is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser Public License
16  * along with Heritrix; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  *
19  * HopsFilter.java
20  * Created on Oct 3, 2003
21  *
22  * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/crawler/filter/HopsFilter.java,v 1.12.2.1 2007/01/13 01:31:21 stack-sf Exp $
23  */

24 package org.archive.crawler.filter;
25
26 import java.util.logging.Logger JavaDoc;
27
28 import javax.management.AttributeNotFoundException JavaDoc;
29
30 import org.archive.crawler.datamodel.CandidateURI;
31 import org.archive.crawler.datamodel.CrawlURI;
32 import org.archive.crawler.extractor.Link;
33 import org.archive.crawler.framework.CrawlScope;
34 import org.archive.crawler.framework.Filter;
35 import org.archive.crawler.scope.ClassicScope;
36
37 /**
38  * Accepts (returns for)) for all CandidateURIs passed in
39  * with a link-hop-count greater than the max-link-hops
40  * value.
41  *
42  * @author gojomo
43  * @deprecated As of release 1.10.0. Replaced by {@link DecidingFilter} and
44  * equivalent {@link DecideRule}.
45  */

46 public class HopsFilter extends Filter {
47
48     private static final long serialVersionUID = -5943030310651023640L;
49
50     private static final Logger JavaDoc logger =
51         Logger.getLogger(HopsFilter.class.getName());
52
53     /**
54      * @param name
55      */

56     public HopsFilter(String JavaDoc name) {
57         super(name, "Hops filter *Deprecated* Use" +
58             "DecidingFilter and equivalent DecideRule instead");
59     }
60
61     int maxLinkHops = Integer.MAX_VALUE;
62     int maxTransHops = Integer.MAX_VALUE;
63
64     /* (non-Javadoc)
65      * @see org.archive.crawler.framework.Filter#innerAccepts(java.lang.Object)
66      */

67     protected boolean innerAccepts(Object JavaDoc o) {
68         if(! (o instanceof CandidateURI)) {
69             return false;
70         }
71         String JavaDoc path = ((CandidateURI)o).getPathFromSeed();
72         int linkCount = 0;
73         int transCount = 0;
74         for(int i=path.length()-1;i>=0;i--) {
75             if(path.charAt(i)==Link.NAVLINK_HOP) {
76                 linkCount++;
77             } else if (linkCount==0) {
78                 transCount++;
79             }
80         }
81         if (o instanceof CrawlURI) {
82             CrawlURI curi = (CrawlURI) o;
83             CrawlScope scope =
84                 (CrawlScope) globalSettings().getModule(CrawlScope.ATTR_NAME);
85             try {
86                 maxLinkHops =
87                     ((Integer JavaDoc) scope
88                         .getAttribute(ClassicScope.ATTR_MAX_LINK_HOPS, curi))
89                         .intValue();
90                 maxTransHops =
91                     ((Integer JavaDoc) scope
92                         .getAttribute(ClassicScope.ATTR_MAX_TRANS_HOPS, curi))
93                         .intValue();
94             } catch (AttributeNotFoundException JavaDoc e) {
95                 logger.severe(e.getMessage());
96                 // Basically, true means the filter is PASSing this URI.
97
return true;
98             }
99         }
100
101         return (linkCount > maxLinkHops)|| (transCount>maxTransHops);
102     }
103 }
104
Popular Tags