| 1 24 package org.archive.crawler.filter; 25 26 import java.util.logging.Logger ; 27 28 import javax.management.AttributeNotFoundException ; 29 30 import org.apache.commons.httpclient.URIException; 31 import org.archive.crawler.datamodel.CandidateURI; 32 import org.archive.crawler.datamodel.CrawlURI; 33 import org.archive.crawler.deciderules.DecideRule; 34 import org.archive.crawler.deciderules.DecidingFilter; 35 import org.archive.crawler.framework.Filter; 36 import org.archive.crawler.settings.SimpleType; 37 import org.archive.net.UURI; 38 39 48 public class PathDepthFilter extends Filter { 49 50 private static final long serialVersionUID = 1626115117327154205L; 51 52 private static final Logger logger = 53 Logger.getLogger(PathDepthFilter.class.getName()); 54 public static final String ATTR_MATCH_RETURN_VALUE = 55 "path-less-or-equal-return"; 56 public static final String ATTR_MAX_PATH_DEPTH = "max-path-depth"; 57 Integer maxPathDepth = new Integer (Integer.MAX_VALUE); 58 final static char slash = '/'; 59 60 63 public PathDepthFilter(String name) { 64 super(name, "Path depth less or equal filter *Deprecated* Use" + 65 "DecidingFilter and equivalent DecideRule instead."); 66 addElementToDefinition(new SimpleType(ATTR_MAX_PATH_DEPTH, "Max path" + 67 " depth for which this filter will match", maxPathDepth)); 68 addElementToDefinition(new SimpleType(ATTR_MATCH_RETURN_VALUE, 69 "What to return when path depth is less or equal to max path" + 70 " depth. \n", new Boolean (true))); 71 } 72 73 protected boolean innerAccepts(Object o) { 74 String path = null; 75 if (o == null) { 76 return false; 77 } 78 79 if (o instanceof CandidateURI) { 80 try { 81 if (((CandidateURI)o).getUURI() != null) { 82 path = ((CandidateURI)o).getUURI().getPath(); 83 } 84 } 85 catch (URIException e) { 86 logger.severe("Failed getpath for " + 87 ((CandidateURI)o).getUURI()); 88 } 89 } else if (o instanceof UURI) { 90 try { 91 path = ((UURI)o).getPath(); 92 } 93 catch (URIException e) { 94 logger.severe("Failed getpath for " + o); 95 } 96 } 97 98 if (path == null) { 99 return true; 100 } 101 102 int count = 0; 103 for (int i = path.indexOf(slash); i != -1; 104 i = path.indexOf(slash, i + 1)) { 105 count++; 106 } 107 108 if (o instanceof CrawlURI) { 109 try { 110 this.maxPathDepth = (Integer ) getAttribute( 111 ATTR_MAX_PATH_DEPTH, (CrawlURI) o); 112 } catch (AttributeNotFoundException e) { 113 logger.severe(e.getMessage()); 114 } 115 } 116 117 return (this.maxPathDepth != null) ? 118 count <= this.maxPathDepth.intValue(): 119 false; 120 } 121 122 protected boolean returnTrueIfMatches(CrawlURI curi) { 123 try { 124 return ((Boolean ) getAttribute(ATTR_MATCH_RETURN_VALUE, curi)). 125 booleanValue(); 126 } catch (AttributeNotFoundException e) { 127 logger.severe(e.getMessage()); 128 return true; 129 } 130 } 131 132 protected boolean getFilterOffPosition(CrawlURI curi) { 133 return returnTrueIfMatches(curi); 134 } 135 } | Popular Tags |