1 24 package org.archive.crawler.filter; 25 26 import java.util.logging.Level ; 27 import java.util.logging.Logger ; 28 29 import javax.management.AttributeNotFoundException ; 30 31 import org.archive.crawler.datamodel.CrawlURI; 32 import org.archive.crawler.deciderules.DecideRule; 33 import org.archive.crawler.deciderules.DecidingFilter; 34 import org.archive.crawler.framework.Filter; 35 import org.archive.crawler.settings.SimpleType; 36 import org.archive.util.TextUtils; 37 38 39 47 public class URIRegExpFilter 48 extends Filter { 49 50 private static final long serialVersionUID = 1878356276332865537L; 51 52 private static final Logger logger = 53 Logger.getLogger(URIRegExpFilter.class.getName()); 54 public static final String ATTR_REGEXP = "regexp"; 55 public static final String ATTR_MATCH_RETURN_VALUE = "if-match-return"; 56 57 60 public URIRegExpFilter(String name) { 61 this(name, "URI regexp filter *Deprecated* Use DecidingFilter and " + 62 "equivalent DecideRule instead. ", ""); 63 addElementToDefinition( 64 new SimpleType(ATTR_MATCH_RETURN_VALUE, "What to return when" + 65 " regular expression matches. \n", new Boolean (true))); 66 addElementToDefinition( 67 new SimpleType(ATTR_REGEXP, "Java regular expression.", "")); 68 } 69 70 public URIRegExpFilter(String name, String regexp) { 71 this(name, "URI regexp filter.", regexp); 72 } 73 74 protected URIRegExpFilter(String name, String description, String regexp) { 75 super(name, description); 76 addElementToDefinition(new SimpleType(ATTR_MATCH_RETURN_VALUE, 77 "What to return when" + " regular expression matches. \n", 78 new Boolean (true))); 79 addElementToDefinition(new SimpleType(ATTR_REGEXP, 80 "Java regular expression.", regexp)); 81 } 82 83 protected boolean innerAccepts(Object o) { 84 String regexp = getRegexp(o); 85 String str = o.toString(); 86 boolean result = (regexp == null)? 87 false: TextUtils.matches(regexp, str); 88 if (logger.isLoggable(Level.FINE)) { 89 logger.fine("Tested '" + str + "' match with regex '" + 90 getRegexp(o) + " and result was " + result); 91 } 92 return result; 93 } 94 95 102 protected String getRegexp(Object o) { 103 try { 104 return (String ) getAttribute(o, ATTR_REGEXP); 105 } catch (AttributeNotFoundException e) { 106 logger.severe(e.getMessage()); 107 return null; 110 } 111 } 112 113 protected boolean returnTrueIfMatches(CrawlURI curi) { 114 try { 115 return ((Boolean )getAttribute(ATTR_MATCH_RETURN_VALUE, curi)). 116 booleanValue(); 117 } catch (AttributeNotFoundException e) { 118 logger.severe(e.getMessage()); 119 return true; 120 } 121 } 122 } 123 | Popular Tags |