1 23 package org.archive.crawler.deciderules; 24 25 import org.archive.crawler.settings.SimpleType; 26 27 public class ExceedsDocumentLengthTresholdDecideRule extends 28 NotExceedsDocumentLengthTresholdDecideRule { 29 30 private static final long serialVersionUID = -3008503096295212224L; 31 32 36 public ExceedsDocumentLengthTresholdDecideRule(String name) { 37 super(name); 38 setDescription("ExceedsDocumentLengthTresholdDecideRule. ACCEPTs URIs "+ 39 "with content length exceeding a given treshold. "+ 40 "Either examines HTTP header content length or " + 41 "actual downloaded content length and returns false " + 42 "for documents exceeding a given length treshold."); 43 44 addElementToDefinition(new SimpleType(ATTR_CONTENT_LENGTH_TRESHOLD, 45 "Min " + 46 "content-length this filter will allow to pass through. If -1, " + 47 "then no limit.", DEFAULT_CONTENT_LENGTH_TRESHOLD)); } 48 49 54 protected Boolean makeDecision(int contentLength, Object obj) { 55 return contentLength > getContentLengthTreshold(obj); 56 } 57 58 62 protected int getContentLengthTreshold(Object obj) { 63 int len = ((Integer )getUncheckedAttribute(obj, 64 ATTR_CONTENT_LENGTH_TRESHOLD)).intValue(); 65 return len == -1? 0: len; 66 } 67 } | Popular Tags |