1 23 package org.archive.crawler.fetcher; 24 25 import java.io.IOException ; 26 import java.net.InetAddress ; 27 import java.net.InetSocketAddress ; 28 import java.net.Socket ; 29 import java.net.SocketTimeoutException ; 30 import java.net.UnknownHostException ; 31 32 import org.apache.commons.httpclient.ConnectTimeoutException; 33 import org.apache.commons.httpclient.params.HttpConnectionParams; 34 import org.apache.commons.httpclient.protocol.ProtocolSocketFactory; 35 import org.archive.crawler.datamodel.CrawlHost; 36 import org.archive.crawler.datamodel.ServerCache; 37 38 39 49 public class HeritrixProtocolSocketFactory 50 implements ProtocolSocketFactory { 51 54 public HeritrixProtocolSocketFactory() { 55 super(); 56 } 57 58 61 public Socket createSocket( 62 String host, 63 int port, 64 InetAddress localAddress, 65 int localPort 66 ) throws IOException , UnknownHostException { 67 return new Socket (host, port, localAddress, localPort); 68 } 69 70 103 public Socket createSocket( 104 final String host, 105 final int port, 106 final InetAddress localAddress, 107 final int localPort, 108 final HttpConnectionParams params) 109 throws IOException , UnknownHostException , ConnectTimeoutException { 110 if (params == null) { 114 throw new IllegalArgumentException ("Parameters may not be null"); 115 } 116 Socket socket = null; 117 int timeout = params.getConnectionTimeout(); 118 if (timeout == 0) { 119 socket = createSocket(host, port, localAddress, localPort); 120 } else { 121 socket = new Socket (); 122 ServerCache cache = (ServerCache)params. 123 getParameter(FetchHTTP.SERVER_CACHE_KEY); 124 InetAddress hostAddress = 125 (cache != null)? getHostAddress(cache, host): null; 126 InetSocketAddress address = (hostAddress != null)? 127 new InetSocketAddress (hostAddress, port): 128 new InetSocketAddress (host, port); 129 socket.bind(new InetSocketAddress (localAddress, localPort)); 130 try { 131 socket.connect(address, timeout); 132 } catch (SocketTimeoutException e) { 133 throw new SocketTimeoutException (e.getMessage() + 135 ": timeout set at " + Integer.toString(timeout) + "ms."); 136 } 137 assert socket.isConnected(): "Socket not connected " + host; 138 } 139 return socket; 140 } 141 142 154 static InetAddress getHostAddress(final ServerCache cache, 155 final String host) throws IOException { 156 InetAddress result = null; 157 if (cache != null) { 158 CrawlHost ch = cache.getHostFor(host); 159 if (ch != null) { 160 result = ch.getIP(); 161 } 162 } 163 if (result == null) { 164 throw new IOException ("Failed to get host " + host + 165 " address from ServerCache"); 166 } 167 return result; 168 } 169 170 173 public Socket createSocket(String host, int port) 174 throws IOException , UnknownHostException { 175 return new Socket (host, port); 176 } 177 178 183 public boolean equals(Object obj) { 184 return ((obj != null) && 185 obj.getClass().equals(HeritrixProtocolSocketFactory.class)); 186 } 187 188 192 public int hashCode() { 193 return HeritrixProtocolSocketFactory.class.hashCode(); 194 } 195 } 196 | Popular Tags |