1 25 package org.archive.crawler.filter; 26 27 import java.util.Iterator ; 28 import java.util.List ; 29 import java.util.logging.Level ; 30 import java.util.logging.Logger ; 31 32 import javax.management.AttributeNotFoundException ; 33 34 import org.archive.crawler.deciderules.DecideRule; 35 import org.archive.crawler.deciderules.DecidingFilter; 36 import org.archive.crawler.framework.Filter; 37 import org.archive.crawler.settings.SimpleType; 38 import org.archive.crawler.settings.StringList; 39 import org.archive.util.TextUtils; 40 41 42 54 public class URIListRegExpFilter 55 extends Filter { 56 57 private static final long serialVersionUID = -2587977969340783677L; 58 59 private static final Logger logger = 60 Logger.getLogger(URIListRegExpFilter.class.getName()); 61 public static final String ATTR_REGEXP_LIST = "regexp-list"; 62 public static final String ATTR_LIST_LOGIC= "list-logic"; 63 public static final String ATTR_MATCH_RETURN_VALUE = "if-match-return"; 64 65 public static final String DEFAULT_LIST_LOGIC = "OR"; 66 public static final String [] LEGAL_LIST_LOGIC = {"OR","AND"}; 67 public static final Boolean DEFAULT_MATCH_RETURN_VALUE = new Boolean (true); 68 69 72 public URIListRegExpFilter(String name) { 73 super(name, "A filter that uses a list of regular expressions " + 74 "*Deprecated* Use DecidingFilter and equivalent DecideRule " + 75 "instead. Can be " + 76 "optionally either OR or AND based in its evaluation."); 77 addElementToDefinition( 78 new SimpleType(ATTR_MATCH_RETURN_VALUE, "What to return when" + 79 " regular expression matches. \n", 80 DEFAULT_MATCH_RETURN_VALUE)); 81 addElementToDefinition( 82 new SimpleType(ATTR_LIST_LOGIC, "Should the list of regular " + 83 "expressions be considered as logically AND or OR when " + 84 "matching.", 85 DEFAULT_LIST_LOGIC, LEGAL_LIST_LOGIC)); 86 addElementToDefinition(new StringList(ATTR_REGEXP_LIST,"The list of " + 87 "regular expressions to evalute against the URI.")); 88 } 89 90 protected boolean innerAccepts(Object o) { 91 List regexps = getRegexp(o); 92 if(regexps.size()==0){ 93 return false; 94 } 95 String str = o.toString(); 96 Iterator it = regexps.iterator(); 97 98 boolean listLogicOR = isListLogicOR(o); 99 boolean result = listLogicOR == false; 103 104 while(it.hasNext()){ 105 String regexp = (String )it.next(); 106 boolean matches = TextUtils.matches(regexp, str); 107 108 if (logger.isLoggable(Level.FINER)) { 109 logger.finer("Tested '" + str + "' match with regex '" + 110 regexp + " and result was " + matches); 111 } 112 113 if(matches){ 114 if(listLogicOR){ 115 result = true; 117 break; 118 } 119 } else { 120 if(listLogicOR == false){ 121 result = false; 123 break; 124 } 125 } 126 } 127 128 result = getMatchReturnValue(o) ? result : !result; 129 130 if (logger.isLoggable(Level.FINE) && result){ 131 logger.fine("Matched: " + str); 132 } 133 134 return result; 135 } 136 137 144 protected List getRegexp(Object o) { 145 try { 146 return (StringList) getAttribute(o, ATTR_REGEXP_LIST); 147 } catch (AttributeNotFoundException e) { 148 logger.severe(e.getMessage()); 149 return null; 152 } 153 } 154 155 protected boolean getMatchReturnValue(Object o){ 156 try { 157 return ((Boolean ) getAttribute(o, ATTR_MATCH_RETURN_VALUE)).booleanValue(); 158 } catch (AttributeNotFoundException e) { 159 logger.severe(e.getMessage()); 160 return DEFAULT_MATCH_RETURN_VALUE.booleanValue(); 161 } 162 } 163 164 protected boolean isListLogicOR(Object o){ 165 String logic = DEFAULT_LIST_LOGIC; 166 try { 167 logic = (String ) getAttribute(o, ATTR_LIST_LOGIC); 168 } catch (AttributeNotFoundException e) { 169 logger.severe(e.getMessage()); 170 } 171 return logic.equals("OR") ? true : false; 172 } 173 174 } 175 | Popular Tags |