1 2 3 4 package net.nutch.protocol.ftp; 5 6 import javax.activation.MimetypesFileTypeMap ; 7 10 import org.apache.commons.net.ftp.FTPFileEntryParser; 11 12 import net.nutch.net.protocols.HttpDateFormat; 13 14 import net.nutch.util.LogFormatter; 15 import net.nutch.util.NutchConf; 16 17 import net.nutch.protocol.Content; 18 import net.nutch.protocol.Protocol; 19 20 import java.util.logging.Level ; 21 import java.util.logging.Logger ; 22 23 import java.net.URL ; 24 25 import java.io.InputStream ; 26 import java.io.IOException ; 29 30 38 public class Ftp implements Protocol { 39 40 public static final Logger LOG = 41 LogFormatter.getLogger("net.nutch.protocol.ftp.Ftp"); 42 43 static final int BUFFER_SIZE = 16384; 45 static final int MAX_REDIRECTS = 5; 46 47 static int timeout = NutchConf.getInt("ftp.timeout", 10000); 48 49 static int maxContentLength = NutchConf.getInt("ftp.content.limit",64*1024); 50 51 String userName = NutchConf.get("ftp.username", "anonymous"); 52 String passWord = NutchConf.get("ftp.password", "anonymous@example.com"); 53 54 int serverTimeout = NutchConf.getInt("ftp.server.timeout", 60*1000); 57 58 long renewalTime = -1; 60 61 boolean keepConnection = NutchConf.getBoolean("ftp.keep.connection", false); 62 63 boolean followTalk = NutchConf.getBoolean("ftp.follow.talk", false); 64 65 Client client = null; 67 FTPFileEntryParser parser = null; 69 70 74 HttpDateFormat httpDateFormat = null; 76 77 static MimetypesFileTypeMap TYPE_MAP = null; 79 80 static { 81 try { 82 InputStream is = 84 NutchConf.getConfResourceAsInputStream 85 (NutchConf.get("mime.types.file")); 86 if (is == null) { 87 LOG.warning 88 ("no mime.types.file: won't use url extension for content-type."); 89 TYPE_MAP = null; 90 } else { 91 TYPE_MAP = new MimetypesFileTypeMap (is); 92 } 93 94 if (is != null) 95 is.close(); 96 } catch (IOException e) { 97 LOG.log(Level.SEVERE, "Unexpected error", e); 98 } 99 } 100 101 126 public Ftp() { 128 this.httpDateFormat = new HttpDateFormat(); 129 } 130 131 132 public void setTimeout(int timeout) { 133 this.timeout = timeout; 134 } 135 136 137 public void setMaxContentLength(int length) { 138 this.maxContentLength = length; 139 } 140 141 142 public void setFollowTalk(boolean followTalk) { 143 this.followTalk = followTalk; 144 } 145 146 147 public void setKeepConnection(boolean keepConnection) { 148 this.keepConnection = keepConnection; 149 } 150 151 public Content getContent(String urlString) throws FtpException { 152 try { 153 URL url = new URL (urlString); 154 155 int redirects = 0; 156 157 while (true) { 158 FtpResponse response; 159 response = new FtpResponse(urlString, url, this); 161 int code = response.getCode(); 162 163 if (code == 200) { return response.toContent(); 166 } else if (code >= 300 && code < 400) { if (redirects == MAX_REDIRECTS) 168 throw new FtpException("Too many redirects: " + url); 169 url = new URL (response.getHeader("Location")); 170 redirects++; 171 if (LOG.isLoggable(Level.FINE)) 172 LOG.fine("redirect to " + url); 173 174 } else { throw new FtpError(code); 176 } 177 } 178 } catch (IOException e) { 179 throw new FtpException(e); 180 } 181 } 182 183 protected void finalize () { 184 try { 185 if (this.client != null && this.client.isConnected()) { 186 this.client.logout(); 187 this.client.disconnect(); 188 } 189 } catch (IOException e) { 190 } 192 } 193 194 195 public static void main(String [] args) throws Exception { 196 int timeout = Integer.MIN_VALUE; 197 int maxContentLength = Integer.MIN_VALUE; 198 String logLevel = "info"; 199 boolean followTalk = false; 200 boolean keepConnection = false; 201 boolean dumpContent = false; 202 String urlString = null; 203 204 String usage = "Usage: Ftp [-logLevel level] [-followTalk] [-keepConnection] [-timeout N] [-maxContentLength L] [-dumpContent] url"; 205 206 if (args.length == 0) { 207 System.err.println(usage); 208 System.exit(-1); 209 } 210 211 for (int i = 0; i < args.length; i++) { 212 if (args[i].equals("-logLevel")) { 213 logLevel = args[++i]; 214 } else if (args[i].equals("-followTalk")) { 215 followTalk = true; 216 } else if (args[i].equals("-keepConnection")) { 217 keepConnection = true; 218 } else if (args[i].equals("-timeout")) { 219 timeout = Integer.parseInt(args[++i]) * 1000; 220 } else if (args[i].equals("-maxContentLength")) { 221 maxContentLength = Integer.parseInt(args[++i]); 222 } else if (args[i].equals("-dumpContent")) { 223 dumpContent = true; 224 } else if (i != args.length-1) { 225 System.err.println(usage); 226 System.exit(-1); 227 } else { 228 urlString = args[i]; 229 } 230 } 231 232 Ftp ftp = new Ftp(); 233 234 ftp.setFollowTalk(followTalk); 235 ftp.setKeepConnection(keepConnection); 236 237 if (timeout != Integer.MIN_VALUE) ftp.setTimeout(timeout); 239 240 if (maxContentLength != Integer.MIN_VALUE) ftp.setMaxContentLength(maxContentLength); 242 243 LOG.setLevel(Level.parse((new String (logLevel)).toUpperCase())); 245 246 Content content = ftp.getContent(urlString); 247 248 System.err.println("Content-Type: " + content.getContentType()); 249 System.err.println("Content-Length: " + content.get("Content-Length")); 250 System.err.println("Last-Modified: " + content.get("Last-Modified")); 251 if (dumpContent) { 252 System.out.print(new String (content.getContent())); 253 } 254 255 ftp = null; 256 } 257 258 } 259 | Popular Tags |