1 25 package org.archive.crawler.deciderules; 26 27 import java.util.logging.Level ; 28 import java.util.logging.Logger ; 29 30 import org.archive.crawler.datamodel.CandidateURI; 31 import org.archive.util.TextUtils; 32 33 34 35 41 public class ClassKeyMatchesRegExpDecideRule extends MatchesRegExpDecideRule { 42 43 private static final long serialVersionUID = 1178873944436973294L; 44 45 private static final Logger logger = 46 Logger.getLogger(ClassKeyMatchesRegExpDecideRule.class.getName()); 47 48 52 public ClassKeyMatchesRegExpDecideRule(String name) { 53 super(name); 54 setDescription("ClassKeyMatchesRegExpDecideRule. " + 55 "Applies the configured " + 56 "decision to class keys matching the supplied " + 57 "regular expression. Class keys are values set into " + 58 "an URL by the Frontier. They are usually the names " + 59 "of queues used by the Frontier. Class keys can " + 60 "look like hostname + port or be plain IPs (It will " + 61 "depend on the Frontier implementation/configuration)."); 62 } 63 64 72 protected boolean evaluate(Object object) { 73 try { 74 CandidateURI cauri = (CandidateURI)object; 75 String classKey = cauri.getClassKey(); 76 if (classKey == null || classKey.length() <= 0) { 77 classKey = getSettingsHandler().getOrder().getController(). 78 getFrontier().getClassKey(cauri); 79 cauri.setClassKey(classKey); 80 } 81 String regexp = getRegexp(cauri); 82 boolean result = (regexp == null)? 83 false: TextUtils.matches(regexp, cauri.getClassKey()); 84 if (logger.isLoggable(Level.FINE)) { 85 logger.fine("Tested '" + cauri.getClassKey() + 86 "' match with regex '" + regexp + " and result was " + 87 result); 88 } 89 return result; 90 } catch (ClassCastException e) { 91 return false; 93 } 94 } 95 } | Popular Tags |