KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > fetcher > HeritrixProtocolSocketFactory


1 /* HeritrixProtocolSocketFactory
2  *
3  * Created on Oct 8, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.fetcher;
24
25 import java.io.IOException JavaDoc;
26 import java.net.InetAddress JavaDoc;
27 import java.net.InetSocketAddress JavaDoc;
28 import java.net.Socket JavaDoc;
29 import java.net.SocketTimeoutException JavaDoc;
30 import java.net.UnknownHostException JavaDoc;
31
32 import org.apache.commons.httpclient.ConnectTimeoutException;
33 import org.apache.commons.httpclient.params.HttpConnectionParams;
34 import org.apache.commons.httpclient.protocol.ProtocolSocketFactory;
35 import org.archive.crawler.datamodel.CrawlHost;
36 import org.archive.crawler.datamodel.ServerCache;
37
38
39 /**
40  * Version of protocol socket factory that tries to get IP from heritrix IP
41  * cache -- if its been set into the HttpConnectionParameters.
42  *
43  * Copied the guts of DefaultProtocolSocketFactory. This factory gets
44  * setup by {@link FetchHTTP}.
45  *
46  * @author stack
47  * @version $Date: 2006/08/29 22:47:03 $, $Revision: 1.10 $
48  */

49 public class HeritrixProtocolSocketFactory
50 implements ProtocolSocketFactory {
51     /**
52      * Constructor.
53      */

54     public HeritrixProtocolSocketFactory() {
55         super();
56     }
57
58     /**
59      * @see #createSocket(java.lang.String,int,java.net.InetAddress,int)
60      */

61     public Socket JavaDoc createSocket(
62         String JavaDoc host,
63         int port,
64         InetAddress JavaDoc localAddress,
65         int localPort
66     ) throws IOException JavaDoc, UnknownHostException JavaDoc {
67         return new Socket JavaDoc(host, port, localAddress, localPort);
68     }
69
70     /**
71      * Attempts to get a new socket connection to the given host within the
72      * given time limit.
73      * <p>
74      * This method employs several techniques to circumvent the limitations
75      * of older JREs that do not support connect timeout. When running in
76      * JRE 1.4 or above reflection is used to call
77      * Socket#connect(SocketAddress endpoint, int timeout) method. When
78      * executing in older JREs a controller thread is executed. The
79      * controller thread attempts to create a new socket within the given
80      * limit of time. If socket constructor does not return until the
81      * timeout expires, the controller terminates and throws an
82      * {@link ConnectTimeoutException}
83      * </p>
84      *
85      * @param host the host name/IP
86      * @param port the port on the host
87      * @param localAddress the local host name/IP to bind the socket to
88      * @param localPort the port on the local machine
89      * @param params {@link HttpConnectionParams Http connection parameters}
90      *
91      * @return Socket a new socket
92      *
93      * @throws IOException if an I/O error occurs while creating the socket
94      * @throws UnknownHostException if the IP address of the host cannot be
95      * @throws IOException if an I/O error occurs while creating the socket
96      * @throws UnknownHostException if the IP address of the host cannot be
97      * determined
98      * @throws ConnectTimeoutException if socket cannot be connected within the
99      * given time limit
100      *
101      * @since 3.0
102      */

103     public Socket JavaDoc createSocket(
104         final String JavaDoc host,
105         final int port,
106         final InetAddress JavaDoc localAddress,
107         final int localPort,
108         final HttpConnectionParams params)
109     throws IOException JavaDoc, UnknownHostException JavaDoc, ConnectTimeoutException {
110         // Below code is from the DefaultSSLProtocolSocketFactory#createSocket
111
// method only it has workarounds to deal with pre-1.4 JVMs. I've
112
// cut these out.
113
if (params == null) {
114             throw new IllegalArgumentException JavaDoc("Parameters may not be null");
115         }
116         Socket JavaDoc socket = null;
117         int timeout = params.getConnectionTimeout();
118         if (timeout == 0) {
119             socket = createSocket(host, port, localAddress, localPort);
120         } else {
121             socket = new Socket JavaDoc();
122             ServerCache cache = (ServerCache)params.
123                 getParameter(FetchHTTP.SERVER_CACHE_KEY);
124             InetAddress JavaDoc hostAddress =
125                 (cache != null)? getHostAddress(cache, host): null;
126             InetSocketAddress JavaDoc address = (hostAddress != null)?
127                     new InetSocketAddress JavaDoc(hostAddress, port):
128                     new InetSocketAddress JavaDoc(host, port);
129             socket.bind(new InetSocketAddress JavaDoc(localAddress, localPort));
130             try {
131                 socket.connect(address, timeout);
132             } catch (SocketTimeoutException JavaDoc e) {
133                 // Add timeout info. to the exception.
134
throw new SocketTimeoutException JavaDoc(e.getMessage() +
135                     ": timeout set at " + Integer.toString(timeout) + "ms.");
136             }
137             assert socket.isConnected(): "Socket not connected " + host;
138         }
139         return socket;
140     }
141     
142     /**
143      * Get host address using first the heritrix cache of addresses, then,
144      * failing that, go to the dnsjava cache.
145      *
146      * Default access and static so can be used by other classes in this
147      * package.
148      *
149      * @param host Host whose address we're to fetch.
150      * @return an IP address for this host or null if one can't be found
151      * in caches.
152      * @exception IOException If we fail to get host IP from ServerCache.
153      */

154     static InetAddress JavaDoc getHostAddress(final ServerCache cache,
155             final String JavaDoc host) throws IOException JavaDoc {
156         InetAddress JavaDoc result = null;
157         if (cache != null) {
158             CrawlHost ch = cache.getHostFor(host);
159             if (ch != null) {
160                 result = ch.getIP();
161             }
162         }
163         if (result == null) {
164             throw new IOException JavaDoc("Failed to get host " + host +
165                 " address from ServerCache");
166         }
167         return result;
168     }
169
170     /**
171      * @see ProtocolSocketFactory#createSocket(java.lang.String,int)
172      */

173     public Socket JavaDoc createSocket(String JavaDoc host, int port)
174             throws IOException JavaDoc, UnknownHostException JavaDoc {
175         return new Socket JavaDoc(host, port);
176     }
177
178     /**
179      * All instances of DefaultProtocolSocketFactory are the same.
180      * @param obj Object to compare.
181      * @return True if equal
182      */

183     public boolean equals(Object JavaDoc obj) {
184         return ((obj != null) &&
185             obj.getClass().equals(HeritrixProtocolSocketFactory.class));
186     }
187
188     /**
189      * All instances of DefaultProtocolSocketFactory have the same hash code.
190      * @return Hash code for this object.
191      */

192     public int hashCode() {
193         return HeritrixProtocolSocketFactory.class.hashCode();
194     }
195 }
196
Popular Tags