1 25 package org.archive.crawler.deciderules; 26 27 import org.archive.crawler.datamodel.CandidateURI; 28 import org.archive.crawler.settings.SimpleType; 29 import org.archive.crawler.settings.Type; 30 31 32 33 40 public class TooManyHopsDecideRule extends PredicatedDecideRule { 41 42 private static final long serialVersionUID = -5429536193865916670L; 43 44 private static final String ATTR_MAX_HOPS = "max-hops"; 45 46 49 static final Integer DEFAULT_MAX_HOPS = new Integer (20); 50 51 55 public TooManyHopsDecideRule(String name) { 56 super(name); 57 setDescription("TooManyHopsDecideRule. REJECTs URIs discovered " + 58 "after too many hops (followed links of any type) from seed."); 59 addElementToDefinition(new SimpleType(ATTR_MAX_HOPS, "Max path" + 60 " depth for which this filter will match", DEFAULT_MAX_HOPS)); 61 Type type = addElementToDefinition(new SimpleType(ATTR_DECISION, 63 "Decision to be applied", REJECT, ALLOWED_TYPES)); 64 type.setTransient(true); 65 } 66 67 74 protected boolean evaluate(Object object) { 75 try { 76 CandidateURI curi = (CandidateURI)object; 77 return curi.getPathFromSeed() != null && 78 curi.getPathFromSeed().length() > getThresholdHops(object); 79 } catch (ClassCastException e) { 80 return false; 82 } 83 } 84 85 89 private int getThresholdHops(Object obj) { 90 return ((Integer )getUncheckedAttribute(obj,ATTR_MAX_HOPS)).intValue(); 91 } 92 } 93 | Popular Tags |