1 23 package org.archive.crawler.deciderules; 24 25 import java.net.InetAddress ; 26 import java.net.UnknownHostException ; 27 import java.util.logging.Level ; 28 import java.util.logging.Logger ; 29 30 31 import org.apache.commons.httpclient.URIException; 32 import org.archive.crawler.datamodel.CandidateURI; 33 import org.archive.crawler.datamodel.CrawlHost; 34 import org.archive.crawler.settings.SimpleType; 35 import org.xbill.DNS.Address; 36 37 52 public class ExternalGeoLocationDecideRule 53 extends PredicatedDecideRule { 54 55 private static final long serialVersionUID = -32974116429860725L; 56 57 private static final Logger LOGGER = 58 Logger.getLogger(ExternalGeoLocationDecideRule.class.getName()); 59 static final String ATTR_IMPLEMENTATION = "implementation-class"; 60 static final String ATTR_COUNTRY_CODE = "country-code"; 61 static final String DEFAULT_COUNTRY_CODE = "--"; 62 private String countryCode; 63 private ExternalGeoLookupInterface implementation = null; 64 65 68 public ExternalGeoLocationDecideRule(String name) { 69 super(name); 70 setDescription("ExternalGeoLocationDecideRule. Rule that " + 71 "instantiates implementations of the ExternalGeoLookupInterface. " + 72 "The implementation needs to be present on the classpath. " + 73 "On initialization, the implementation is instantiated (" + 74 "assumption is that there is public constructor that takes +" + 75 "country code)."); 76 addElementToDefinition(new SimpleType(ATTR_IMPLEMENTATION, 77 "Name of implementation of ExternalGeoLookupInterface class to " + 78 "instantiate.", "")); 79 addElementToDefinition(new SimpleType(ATTR_COUNTRY_CODE, 80 "Country code name.", "")); 81 82 } 83 84 boolean evaluate(Object obj) { 85 ExternalGeoLookupInterface impl = getConfiguredImplementation(obj); 86 if (impl == null) { 87 return false; 88 } 89 CrawlHost crawlHost = null; 90 String host; 91 InetAddress address; 92 try { 93 if (obj instanceof CandidateURI) { 94 host = ((CandidateURI) obj).getUURI().getHost(); 95 crawlHost = getSettingsHandler().getOrder() 96 .getController().getServerCache().getHostFor(host); 97 if (crawlHost.getCountryCode() != null){ 98 return (crawlHost.getCountryCode().equals(countryCode)) 99 ? true : false; 100 } 101 address = crawlHost.getIP(); 102 if (address == null) { 103 address = Address.getByName(host); 104 } 105 crawlHost.setCountryCode((String )impl.lookup(address)); 106 if (crawlHost.getCountryCode().equals(countryCode)){ 107 LOGGER.fine("Country Code Lookup: " + " " + host + 108 crawlHost.getCountryCode()); 109 return true; 110 } 111 } 112 } catch (UnknownHostException e) { 113 LOGGER.log(Level.FINE, "Failed dns lookup " + obj, e); 114 if (crawlHost != null){ 115 crawlHost.setCountryCode(DEFAULT_COUNTRY_CODE); 116 } 117 } catch (URIException e) { 118 LOGGER.log(Level.FINE, "Failed to parse hostname " + obj, e); 119 } 120 121 return false; 122 } 123 124 131 protected synchronized ExternalGeoLookupInterface 132 getConfiguredImplementation(Object o) { 133 if (this.implementation != null) { 134 return this.implementation; 135 } 136 ExternalGeoLookupInterface result = null; 137 try { 138 String className = 139 (String )getAttribute(o, ATTR_IMPLEMENTATION); 140 countryCode = (String )getAttribute(o, ATTR_COUNTRY_CODE); 141 if (className != null && className.length() != 0) { 142 Object obj = Class.forName(className).getConstructor(new Class [] 143 {String .class}).newInstance(new Object [] {countryCode}); 144 if (!(obj instanceof ExternalGeoLookupInterface)) { 145 LOGGER.severe("Implementation " + className + 146 " does not implement ExternalGeoLookupInterface"); 147 } 148 result = (ExternalGeoLookupInterface)obj; 149 this.implementation = result; 150 } 151 } catch (Exception e) { 152 LOGGER.severe(e.getMessage()); 153 } 154 return result; 155 } 156 } | Popular Tags |