1 25 package org.archive.crawler.deciderules; 26 27 import org.archive.crawler.datamodel.CandidateURI; 28 import org.archive.crawler.extractor.Link; 29 import org.archive.crawler.settings.SimpleType; 30 import org.archive.crawler.settings.Type; 31 32 33 34 53 public class TransclusionDecideRule extends PredicatedDecideRule { 54 55 private static final long serialVersionUID = -3975688876990558918L; 56 57 private static final String ATTR_MAX_TRANS_HOPS = "max-trans-hops"; 58 59 63 static final Integer DEFAULT_MAX_TRANS_HOPS = new Integer (3); 64 65 69 public TransclusionDecideRule(String name) { 70 super(name); 71 setDescription("TransclusionDecideRule. ACCEPTs URIs whose path " + 72 "from the seed ends with up to (but not more than) the " + 73 "configured '" + ATTR_MAX_TRANS_HOPS + 74 "' number of non-navlink ('L') hops."); 75 Type type = getElementFromDefinition(ATTR_DECISION); 77 type.setTransient(true); 78 addElementToDefinition(new SimpleType(ATTR_MAX_TRANS_HOPS, 79 "Maximum number of non-navlink ('L') hops.", 80 DEFAULT_MAX_TRANS_HOPS)); 81 } 84 85 92 protected boolean evaluate(Object object) { 93 CandidateURI curi = null; 94 try { 95 curi = (CandidateURI)object; 96 } catch (ClassCastException e) { 97 return false; 99 } 100 String hopsPath = curi.getPathFromSeed(); 101 if (hopsPath == null || hopsPath.length() == 0) { 102 return false; 103 } 104 int count = 0; 105 for (int i = hopsPath.length() - 1; i >= 0; i--) { 106 if (hopsPath.charAt(i) != Link.NAVLINK_HOP) { 107 count++; 111 } else { 112 break; 113 } 114 } 115 return count > 0 && count <= getThresholdHops(object); 116 } 117 118 122 private int getThresholdHops(Object obj) { 123 return ((Integer )getUncheckedAttribute(obj,ATTR_MAX_TRANS_HOPS)). 124 intValue(); 125 } 126 } 127 | Popular Tags |