1 24 package org.archive.crawler.filter; 25 26 import java.util.logging.Logger ; 27 28 import javax.management.AttributeNotFoundException ; 29 30 import org.archive.crawler.datamodel.CandidateURI; 31 import org.archive.crawler.datamodel.CrawlURI; 32 import org.archive.crawler.extractor.Link; 33 import org.archive.crawler.framework.CrawlScope; 34 import org.archive.crawler.framework.Filter; 35 import org.archive.crawler.scope.ClassicScope; 36 37 46 public class HopsFilter extends Filter { 47 48 private static final long serialVersionUID = -5943030310651023640L; 49 50 private static final Logger logger = 51 Logger.getLogger(HopsFilter.class.getName()); 52 53 56 public HopsFilter(String name) { 57 super(name, "Hops filter *Deprecated* Use" + 58 "DecidingFilter and equivalent DecideRule instead"); 59 } 60 61 int maxLinkHops = Integer.MAX_VALUE; 62 int maxTransHops = Integer.MAX_VALUE; 63 64 67 protected boolean innerAccepts(Object o) { 68 if(! (o instanceof CandidateURI)) { 69 return false; 70 } 71 String path = ((CandidateURI)o).getPathFromSeed(); 72 int linkCount = 0; 73 int transCount = 0; 74 for(int i=path.length()-1;i>=0;i--) { 75 if(path.charAt(i)==Link.NAVLINK_HOP) { 76 linkCount++; 77 } else if (linkCount==0) { 78 transCount++; 79 } 80 } 81 if (o instanceof CrawlURI) { 82 CrawlURI curi = (CrawlURI) o; 83 CrawlScope scope = 84 (CrawlScope) globalSettings().getModule(CrawlScope.ATTR_NAME); 85 try { 86 maxLinkHops = 87 ((Integer ) scope 88 .getAttribute(ClassicScope.ATTR_MAX_LINK_HOPS, curi)) 89 .intValue(); 90 maxTransHops = 91 ((Integer ) scope 92 .getAttribute(ClassicScope.ATTR_MAX_TRANS_HOPS, curi)) 93 .intValue(); 94 } catch (AttributeNotFoundException e) { 95 logger.severe(e.getMessage()); 96 return true; 98 } 99 } 100 101 return (linkCount > maxLinkHops)|| (transCount>maxTransHops); 102 } 103 } 104 | Popular Tags |