KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > datamodel > CrawlHost


1 /* Copyright (C) 2003 Internet Archive.
2  *
3  * This file is part of the Heritrix web crawler (crawler.archive.org).
4  *
5  * Heritrix is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser Public License as published by
7  * the Free Software Foundation; either version 2.1 of the License, or
8  * any later version.
9  *
10  * Heritrix is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser Public License
16  * along with Heritrix; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  *
19  * CrawlHost.java
20  * Created on Aug 5, 2003
21  *
22  * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/crawler/datamodel/CrawlHost.java,v 1.20.12.1 2007/01/13 01:31:08 stack-sf Exp $
23  */

24 package org.archive.crawler.datamodel;
25
26 import java.io.Serializable JavaDoc;
27 import java.net.InetAddress JavaDoc;
28 import java.util.logging.Level JavaDoc;
29 import java.util.logging.Logger JavaDoc;
30
31 import org.archive.util.InetAddressUtil;
32
33 /**
34  * Represents a single remote "host".
35  *
36  * An host is a name for which there is a dns record or an IP-address. This
37  * might be a machine or a virtual host.
38  *
39  * @author gojomo
40  */

41 public class CrawlHost implements Serializable JavaDoc, CrawlSubstats.HasCrawlSubstats {
42
43     private static final long serialVersionUID = -5494573967890942895L;
44
45     private static final Logger JavaDoc logger = Logger.getLogger(CrawlHost.class.getName());
46     /** Flag value indicating always-valid IP */
47     public static final long IP_NEVER_EXPIRES = -1;
48     /** Flag value indicating an IP has not yet been looked up */
49     public static final long IP_NEVER_LOOKED_UP = -2;
50     private String JavaDoc hostname;
51     private String JavaDoc countryCode;
52     private InetAddress JavaDoc ip;
53     private long ipFetched = IP_NEVER_LOOKED_UP;
54     protected CrawlSubstats substats = new CrawlSubstats();
55     /**
56      * TTL gotten from dns record.
57      *
58      * From rfc2035:
59      * <pre>
60      * TTL a 32 bit unsigned integer that specifies the time
61      * interval (in seconds) that the resource record may be
62      * cached before it should be discarded. Zero values are
63      * interpreted to mean that the RR can only be used for the
64      * transaction in progress, and should not be cached.
65      * </pre>
66      */

67     private long ipTTL = IP_NEVER_LOOKED_UP;
68
69     // Used when bandwith constraint are used
70
private long earliestNextURIEmitTime = 0;
71     
72     /**
73      * Create a new CrawlHost object.
74      *
75      * @param hostname the host name for this host.
76      */

77     public CrawlHost(String JavaDoc hostname) {
78             this(hostname, null);
79     }
80
81     /**
82      * Create a new CrawlHost object.
83      *
84      * @param hostname the host name for this host.
85      * @param countryCode the country code for this host.
86      */

87     public CrawlHost(String JavaDoc hostname, String JavaDoc countryCode) {
88         this.hostname = hostname;
89         this.countryCode = countryCode;
90         InetAddress JavaDoc tmp = InetAddressUtil.getIPHostAddress(hostname);
91         if (tmp != null) {
92             setIP(tmp, IP_NEVER_EXPIRES);
93         }
94     }
95
96     /** Return true if the IP for this host has been looked up.
97      *
98      * Returns true even if the lookup failed.
99      *
100      * @return true if the IP for this host has been looked up.
101      */

102     public boolean hasBeenLookedUp() {
103         return ipFetched != IP_NEVER_LOOKED_UP;
104     }
105
106     /**
107      * Set the IP address for this host.
108      *
109      * @param address
110      * @param ttl the TTL from the dns record in seconds or -1 if it should live
111      * forever (is a numeric IP).
112      */

113     public void setIP(InetAddress JavaDoc address, long ttl) {
114         this.ip = address;
115         // Assume that a lookup as occurred by the time
116
// a caller decides to set this (even to null)
117
this.ipFetched = System.currentTimeMillis();
118         this.ipTTL = ttl;
119         if (logger.isLoggable(Level.FINE)) {
120             logger.fine(hostname + ": " +
121                 ((address != null)? address.toString(): "null"));
122         }
123     }
124
125     /** Get the IP address for this host.
126      *
127      * @return the IP address for this host.
128      */

129     public InetAddress JavaDoc getIP() {
130         return ip;
131     }
132
133     /** Get the time when the IP address for this host was last looked up.
134      *
135      * @return the time when the IP address for this host was last looked up.
136      */

137     public long getIpFetched() {
138         return ipFetched;
139     }
140
141     /**
142      * Get the TTL value from the dns record for this host.
143      *
144      * @return the TTL value from the dns record for this host -- in seconds --
145      * or -1 if this lookup should be valid forever (numeric ip).
146      */

147     public long getIpTTL() {
148         return this.ipTTL;
149     }
150
151     public String JavaDoc toString() {
152         return "CrawlHost<" + hostname + "(ip:" + ip + ")>";
153     }
154
155     /**
156      * Get the host name.
157      * @return Returns the host name.
158      */

159     public String JavaDoc getHostName() {
160         return hostname;
161     }
162
163     /**
164      * Get the earliest time a URI for this host could be emitted.
165      * This only has effect if constraints on bandwidth per host is set.
166      *
167      * @return Returns the earliestNextURIEmitTime.
168      */

169     public long getEarliestNextURIEmitTime() {
170         return earliestNextURIEmitTime;
171     }
172
173     /**
174      * Set the earliest time a URI for this host could be emitted.
175      * This only has effect if constraints on bandwidth per host is set.
176      *
177      * @param earliestNextURIEmitTime The earliestNextURIEmitTime to set.
178      */

179     public void setEarliestNextURIEmitTime(long earliestNextURIEmitTime) {
180         this.earliestNextURIEmitTime = earliestNextURIEmitTime;
181     }
182
183     /**
184      * Get country code of this host
185      *
186      * @return Retruns country code or null if not availabe
187      */

188     public String JavaDoc getCountryCode() {
189         return countryCode;
190     }
191
192     /**
193      * Set country code for this hos
194      *
195      * @param countryCode The country code of this host
196      */

197     public void setCountryCode(String JavaDoc countryCode) {
198         this.countryCode = countryCode;
199     }
200     
201     /* (non-Javadoc)
202      * @see org.archive.crawler.datamodel.CrawlSubstats.HasCrawlSubstats#getSubstats()
203      */

204     public CrawlSubstats getSubstats() {
205         return substats;
206     }
207 }
208
Popular Tags