KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > ExternalGeoLocationDecideRule


1 /* ExternalGeoLocationDecideRule
2  *
3  * Created on May 25, 2005
4  *
5  * Copyright (C) 2005 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.deciderules;
24
25 import java.net.InetAddress JavaDoc;
26 import java.net.UnknownHostException JavaDoc;
27 import java.util.logging.Level JavaDoc;
28 import java.util.logging.Logger JavaDoc;
29
30
31 import org.apache.commons.httpclient.URIException;
32 import org.archive.crawler.datamodel.CandidateURI;
33 import org.archive.crawler.datamodel.CrawlHost;
34 import org.archive.crawler.settings.SimpleType;
35 import org.xbill.DNS.Address;
36
37 /**
38  * A rule that can be configured to take alternate implementations
39  * of the ExternalGeoLocationInterface.
40  * If no implementation specified, or none found, returns configured decision.
41  * If host in URI has been resolved checks CrawlHost for the country code
42  * determination.
43  * If country code is not present, does country lookup, and saves the country
44  * code to <code>CrawlHost</code> for future consultation.
45  * If country code is present in <code>CrawlHost</code>, compares it against
46  * the configured code.
47  * Note that if a host's IP address changes during the crawl, we still consider
48  * the associated hostname to be in the country of its original IP address.
49  *
50  * @author Igor Ranitovic
51  */

52 public class ExternalGeoLocationDecideRule
53 extends PredicatedDecideRule {
54
55     private static final long serialVersionUID = -32974116429860725L;
56
57     private static final Logger JavaDoc LOGGER =
58         Logger.getLogger(ExternalGeoLocationDecideRule.class.getName());
59     static final String JavaDoc ATTR_IMPLEMENTATION = "implementation-class";
60     static final String JavaDoc ATTR_COUNTRY_CODE = "country-code";
61     static final String JavaDoc DEFAULT_COUNTRY_CODE = "--";
62     private String JavaDoc countryCode;
63     private ExternalGeoLookupInterface implementation = null;
64
65     /**
66      * @param name Name of this rule.
67      */

68     public ExternalGeoLocationDecideRule(String JavaDoc name) {
69         super(name);
70         setDescription("ExternalGeoLocationDecideRule. Rule that " +
71             "instantiates implementations of the ExternalGeoLookupInterface. " +
72             "The implementation needs to be present on the classpath. " +
73             "On initialization, the implementation is instantiated (" +
74             "assumption is that there is public constructor that takes +" +
75             "country code).");
76         addElementToDefinition(new SimpleType(ATTR_IMPLEMENTATION,
77             "Name of implementation of ExternalGeoLookupInterface class to " +
78             "instantiate.", ""));
79         addElementToDefinition(new SimpleType(ATTR_COUNTRY_CODE,
80                 "Country code name.", ""));
81
82     }
83     
84     boolean evaluate(Object JavaDoc obj) {
85         ExternalGeoLookupInterface impl = getConfiguredImplementation(obj);
86         if (impl == null) {
87             return false;
88         }
89         CrawlHost crawlHost = null;
90         String JavaDoc host;
91         InetAddress JavaDoc address;
92         try {
93             if (obj instanceof CandidateURI) {
94                 host = ((CandidateURI) obj).getUURI().getHost();
95                 crawlHost = getSettingsHandler().getOrder()
96                    .getController().getServerCache().getHostFor(host);
97                 if (crawlHost.getCountryCode() != null){
98                    return (crawlHost.getCountryCode().equals(countryCode))
99                                 ? true : false;
100                 }
101                 address = crawlHost.getIP();
102                 if (address == null) {
103                     address = Address.getByName(host);
104                 }
105                 crawlHost.setCountryCode((String JavaDoc)impl.lookup(address));
106                 if (crawlHost.getCountryCode().equals(countryCode)){
107                     LOGGER.fine("Country Code Lookup: " + " " + host +
108                             crawlHost.getCountryCode());
109                     return true;
110                 }
111             }
112         } catch (UnknownHostException JavaDoc e) {
113             LOGGER.log(Level.FINE, "Failed dns lookup " + obj, e);
114             if (crawlHost != null){
115                 crawlHost.setCountryCode(DEFAULT_COUNTRY_CODE);
116             }
117         } catch (URIException e) {
118             LOGGER.log(Level.FINE, "Failed to parse hostname " + obj, e);
119         }
120         
121         return false;
122     }
123     
124     /**
125      * Get implementation, if one specified. If none specified, will keep trying
126      * to find one. Will be messy if the provided class is not-instantiable
127      *
128      * @param o A context object.
129      * @return Instance of <code>ExternalGeoLookupInterface</code> or null.
130      */

131     protected synchronized ExternalGeoLookupInterface
132             getConfiguredImplementation(Object JavaDoc o) {
133         if (this.implementation != null) {
134             return this.implementation;
135         }
136         ExternalGeoLookupInterface result = null;
137         try {
138             String JavaDoc className =
139                 (String JavaDoc)getAttribute(o, ATTR_IMPLEMENTATION);
140             countryCode = (String JavaDoc)getAttribute(o, ATTR_COUNTRY_CODE);
141             if (className != null && className.length() != 0) {
142                 Object JavaDoc obj = Class.forName(className).getConstructor(new Class JavaDoc[]
143                       {String JavaDoc.class}).newInstance(new Object JavaDoc[] {countryCode});
144                 if (!(obj instanceof ExternalGeoLookupInterface)) {
145                     LOGGER.severe("Implementation " + className +
146                         " does not implement ExternalGeoLookupInterface");
147                 }
148                 result = (ExternalGeoLookupInterface)obj;
149                 this.implementation = result;
150             }
151         } catch (Exception JavaDoc e) {
152             LOGGER.severe(e.getMessage());
153         }
154         return result;
155     }
156 }
Popular Tags