| 1 25 package org.archive.crawler.deciderules; 26 27 import java.util.Iterator ; 28 import java.util.List ; 29 import java.util.logging.Level ; 30 import java.util.logging.Logger ; 31 32 import javax.management.AttributeNotFoundException ; 33 34 import org.archive.crawler.settings.SimpleType; 35 import org.archive.crawler.settings.StringList; 36 import org.archive.util.TextUtils; 37 38 39 49 public class MatchesListRegExpDecideRule extends PredicatedDecideRule { 50 51 private static final long serialVersionUID = 3011579758573454930L; 52 53 private static final Logger logger = 54 Logger.getLogger(MatchesListRegExpDecideRule.class.getName()); 55 56 public static final String ATTR_REGEXP_LIST = "regexp-list"; 57 public static final String ATTR_LIST_LOGIC= "list-logic"; 58 59 public static final String DEFAULT_LIST_LOGIC = "OR"; 60 public static final String [] LEGAL_LIST_LOGIC = {"OR","AND"}; 61 62 66 public MatchesListRegExpDecideRule(String name) { 67 super(name); 68 setDescription("MatchesListRegExpDecideRule. Applies the configured " + 69 "decision to URIs matching the supplied regular expressions.\n" + 70 "The list of regular expressions can be considered logically AND " + 71 "or OR."); 72 addElementToDefinition( 73 new SimpleType(ATTR_LIST_LOGIC, "Should the list of regular " + 74 "expressions be considered as logically AND or OR when " + 75 "matching.", 76 DEFAULT_LIST_LOGIC, LEGAL_LIST_LOGIC)); 77 addElementToDefinition(new StringList(ATTR_REGEXP_LIST,"The list of " + 78 "regular expressions to evalute against the URI.")); 79 } 80 81 88 protected boolean evaluate(Object o) { 89 try { 90 List regexps = getRegexp(o); 91 if(regexps.size()==0){ 92 return false; 93 } 94 String str = o.toString(); 95 Iterator it = regexps.iterator(); 96 97 boolean listLogicOR = isListLogicOR(o); 98 boolean result = listLogicOR == false; 102 103 while(it.hasNext()){ 104 String regexp = (String )it.next(); 105 boolean matches = TextUtils.matches(regexp, str); 106 107 if (logger.isLoggable(Level.FINER)) { 108 logger.finer("Tested '" + str + "' match with regex '" + 109 regexp + " and result was " + matches); 110 } 111 112 if(matches){ 113 if(listLogicOR){ 114 result = true; 116 break; 117 } 118 } else { 119 if(listLogicOR == false){ 120 result = false; 122 break; 123 } 124 } 125 } 126 127 if (logger.isLoggable(Level.FINE) && result){ 128 logger.fine("Matched: " + str); 129 } 130 131 return result; 132 } catch (ClassCastException e) { 133 return false; 135 } 136 } 137 138 145 protected List getRegexp(Object o) { 146 try { 147 return (StringList) getAttribute(o, ATTR_REGEXP_LIST); 148 } catch (AttributeNotFoundException e) { 149 logger.severe(e.getMessage()); 150 return null; 153 } 154 } 155 156 protected boolean isListLogicOR(Object o){ 157 String logic = DEFAULT_LIST_LOGIC; 158 try { 159 logic = (String ) getAttribute(o, ATTR_LIST_LOGIC); 160 } catch (AttributeNotFoundException e) { 161 logger.severe(e.getMessage()); 162 } 163 return logic.equals("OR") ? true : false; 164 } 165 } | Popular Tags |