KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > protocol > ftp > FtpResponse


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.protocol.ftp;
5
6 import javax.activation.MimetypesFileTypeMap JavaDoc;
7 // 20040528, xing, disabled for now
8
//import xing.net.nutch.util.magicfile.*;
9

10 import org.apache.commons.net.ftp.FTP;
11 import org.apache.commons.net.ftp.FTPFile;
12 import org.apache.commons.net.ftp.FTPReply;
13
14 import org.apache.commons.net.ftp.parser.DefaultFTPFileEntryParserFactory;
15 import org.apache.commons.net.ftp.parser.ParserInitializationException;
16
17 import net.nutch.protocol.Content;
18
19 import java.net.InetAddress JavaDoc;
20 import java.net.URL JavaDoc;
21
22 import java.lang.Exception JavaDoc;
23 import java.lang.StackTraceElement JavaDoc;
24
25 import java.util.List JavaDoc;
26 import java.util.LinkedList JavaDoc;
27 import java.util.Map JavaDoc;
28 import java.util.TreeMap JavaDoc;
29 import java.util.Properties JavaDoc;
30
31 import java.util.logging.Level JavaDoc;
32
33 import java.io.ByteArrayOutputStream JavaDoc;
34 //import java.io.InputStream;
35
import java.io.IOException JavaDoc;
36
37 /************************************
38  * FtpResponse.java mimics ftp replies as http response.
39  * It tries its best to follow http's way for headers, response codes
40  * as well as exceptions.
41  *
42  * Comments:
43  * In this class, all FtpException*.java thrown by Client.java
44  * and some important commons-net exceptions passed by Client.java
45  * must have been properly dealt with. They'd better not be leaked
46  * to the caller of this class.
47  *
48  * @author John Xing
49  ***********************************/

50 public class FtpResponse {
51   private String JavaDoc orig;
52   private String JavaDoc base;
53   private byte[] content;
54   private int code;
55   private Properties JavaDoc headers = new Properties JavaDoc();
56
57   private final Ftp ftp;
58
59   /** Returns the response code. */
60   public int getCode() { return code; }
61
62   /** Returns the value of a named header. */
63   public String JavaDoc getHeader(String JavaDoc name) {
64     return (String JavaDoc)headers.get(name);
65   }
66
67   public byte[] getContent() { return content; }
68
69   public Content toContent() {
70     return new Content(orig, base, content,
71                        getHeader("Content-Type"),
72                        headers);
73   }
74
75   public FtpResponse(URL JavaDoc url, Ftp ftp)
76     throws FtpException, IOException JavaDoc {
77     this(url.toString(), url, ftp);
78   }
79
80   public FtpResponse(String JavaDoc orig, URL JavaDoc url, Ftp ftp)
81     throws FtpException, IOException JavaDoc {
82
83     this.orig = orig;
84     this.base = url.toString();
85     this.ftp = ftp;
86
87     if (!"ftp".equals(url.getProtocol()))
88       throw new FtpException("Not a ftp url:" + url);
89
90     if (url.getPath() != url.getFile())
91       Ftp.LOG.warning("url.getPath() != url.getFile(): " + url);
92
93     String JavaDoc path = "".equals(url.getPath()) ? "/" : url.getPath();
94
95     try {
96
97       if (ftp.followTalk) {
98         Ftp.LOG.info("fetching "+url);
99       } else {
100         if (Ftp.LOG.isLoggable(Level.FINE))
101           Ftp.LOG.fine("fetching "+url);
102       }
103
104       InetAddress JavaDoc addr = InetAddress.getByName(url.getHost());
105
106       // idled too long, remote server or ourselves may have timed out,
107
// should start anew.
108
if (ftp.client != null && ftp.keepConnection
109           && ftp.renewalTime < System.currentTimeMillis()) {
110         Ftp.LOG.info("delete client because idled too long");
111         ftp.client = null;
112       }
113
114       // start anew if needed
115
if (ftp.client == null) {
116         if (ftp.followTalk)
117           Ftp.LOG.info("start client");
118         // the real client
119
ftp.client = new Client();
120         // when to renew, take the lesser
121
//ftp.renewalTime = System.currentTimeMillis()
122
// + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
123

124         // timeout for control connection
125
ftp.client.setDefaultTimeout(ftp.timeout);
126         // timeout for data connection
127
ftp.client.setDataTimeout(ftp.timeout);
128
129         // follow ftp talk?
130
if (ftp.followTalk)
131           ftp.client.addProtocolCommandListener(
132             new PrintCommandListener(ftp.LOG));
133       }
134
135       // quit from previous site if at a different site now
136
if (ftp.client.isConnected()) {
137         InetAddress JavaDoc remoteAddress = ftp.client.getRemoteAddress();
138         if (!addr.equals(remoteAddress)) {
139           if (ftp.followTalk)
140             Ftp.LOG.info("disconnect from "+remoteAddress
141             +" before connect to "+addr);
142           // quit from current site
143
ftp.client.logout();
144           ftp.client.disconnect();
145         }
146       }
147
148       // connect to current site if needed
149
if (!ftp.client.isConnected()) {
150
151         if (ftp.followTalk)
152           Ftp.LOG.info("connect to "+addr);
153
154         ftp.client.connect(addr);
155         if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
156           ftp.client.disconnect();
157           Ftp.LOG.warning("ftp.client.connect() failed: "
158             + addr + " " + ftp.client.getReplyString());
159           this.code = 500; // http Internal Server Error
160
return;
161         }
162
163         if (ftp.followTalk)
164           Ftp.LOG.info("log into "+addr);
165
166         if (!ftp.client.login(ftp.userName, ftp.passWord)) {
167           // login failed.
168
// please note that some server may return 421 immediately
169
// after USER anonymous, thus ftp.client.login() won't return false,
170
// but throw exception, which then will be handled by caller
171
// (not dealt with here at all) .
172
ftp.client.disconnect();
173           Ftp.LOG.warning("ftp.client.login() failed: "+addr);
174           this.code = 401; // http Unauthorized
175
return;
176         }
177
178         // insist on binary file type
179
if (!ftp.client.setFileType(FTP.BINARY_FILE_TYPE)) {
180           ftp.client.logout();
181           ftp.client.disconnect();
182           Ftp.LOG.warning("ftp.client.setFileType() failed: "+addr);
183           this.code = 500; // http Internal Server Error
184
return;
185         }
186
187         if (ftp.followTalk)
188           Ftp.LOG.info("set parser for "+addr);
189
190         // SYST is valid only after login
191
try {
192           ftp.parser = null;
193           String JavaDoc parserKey = ftp.client.getSystemName();
194           // some server reports as UNKNOWN Type: L8, but in fact UNIX Type: L8
195
if (parserKey.startsWith("UNKNOWN Type: L8"))
196             parserKey = "UNIX Type: L8";
197           ftp.parser = (new DefaultFTPFileEntryParserFactory())
198             .createFileEntryParser(parserKey);
199         } catch (FtpExceptionBadSystResponse e) {
200           Ftp.LOG.warning("ftp.client.getSystemName() failed: "+addr+" "+e);
201           ftp.parser = null;
202         } catch (ParserInitializationException e) {
203           // ParserInitializationException is RuntimeException defined in
204
// org.apache.commons.net.ftp.parser.ParserInitializationException
205
Ftp.LOG.warning("createFileEntryParser() failed. "+addr+" "+e);
206           ftp.parser = null;
207         } finally {
208           if (ftp.parser == null) {
209             // do not log as severe, otherwise
210
// FetcherThread/RequestScheduler will abort
211
Ftp.LOG.warning("ftp.parser is null: "+addr);
212             ftp.client.logout();
213             ftp.client.disconnect();
214             this.code = 500; // http Internal Server Error
215
return;
216           }
217         }
218
219       } else {
220         if (ftp.followTalk)
221           Ftp.LOG.info("use existing connection");
222       }
223
224       this.content = null;
225
226       if (path.endsWith("/")) {
227         getDirAsHttpResponse(path);
228       } else {
229         getFileAsHttpResponse(path);
230       }
231
232       // reset next renewalTime, take the lesser
233
if (ftp.client != null && ftp.keepConnection) {
234         ftp.renewalTime = System.currentTimeMillis()
235           + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
236         if (ftp.followTalk)
237           Ftp.LOG.info("reset renewalTime to "
238             +ftp.httpDateFormat.toString(ftp.renewalTime));
239       }
240
241       // getDirAsHttpResponse() or getFileAsHttpResponse() above
242
// may have deleted ftp.client
243
if (ftp.client != null && !ftp.keepConnection) {
244         if (ftp.followTalk)
245           Ftp.LOG.info("disconnect from "+addr);
246         ftp.client.logout();
247         ftp.client.disconnect();
248       }
249       
250     } catch (Exception JavaDoc e) {
251       ftp.LOG.warning(""+e);
252       StackTraceElement JavaDoc stes[] = e.getStackTrace();
253       for (int i=0; i<stes.length; i++) {
254         ftp.LOG.warning(" "+stes[i].toString());
255       }
256       // for any un-foreseen exception (run time exception or not),
257
// do ultimate clean and leave ftp.client for garbage collection
258
if (ftp.followTalk)
259         Ftp.LOG.info("delete client due to exception");
260       ftp.client = null;
261       // or do explicit garbage collection?
262
// System.gc();
263
// can we be less dramatic, using the following instead?
264
// probably unnecessary for our practical purpose here
265
// try {
266
// ftp.client.logout();
267
// ftp.client.disconnect();
268
// }
269
throw new FtpException(e);
270       //throw e;
271
}
272
273   }
274
275   // get ftp file as http response
276
private void getFileAsHttpResponse(String JavaDoc path)
277     throws IOException JavaDoc {
278
279     ByteArrayOutputStream JavaDoc os = null;
280     List JavaDoc list = null;
281
282     try {
283       // first get its possible attributes
284
list = new LinkedList JavaDoc();
285       ftp.client.retrieveList(path, list, ftp.maxContentLength, ftp.parser);
286
287       os = new ByteArrayOutputStream JavaDoc(ftp.BUFFER_SIZE);
288       ftp.client.retrieveFile(path, os, ftp.maxContentLength);
289
290       FTPFile ftpFile = (FTPFile) list.get(0);
291       this.headers.put("Content-Length",
292         new Long JavaDoc(ftpFile.getSize()).toString());
293       //this.headers.put("content-type", "text/html");
294
this.headers.put("Last-Modified",
295         ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
296       this.content = os.toByteArray();
297
298       String JavaDoc contentType = null;
299       // 20040427, xing, disabled for now
300
//if (contentType == null && ftp.magic != null)
301
// contentType = ftp.magic.getMimeType(this.content);
302
if (contentType == null && ftp.TYPE_MAP != null)
303         contentType = ftp.TYPE_MAP.getContentType(path);
304       if (contentType != null)
305         this.headers.put("Content-Type", contentType);
306
307 // // approximate bytes sent and read
308
// if (this.httpAccounting != null) {
309
// this.httpAccounting.incrementBytesSent(path.length());
310
// this.httpAccounting.incrementBytesRead(this.content.length);
311
// }
312

313       this.code = 200; // http OK
314

315     } catch (FtpExceptionControlClosedByForcedDataClose e) {
316
317       // control connection is off, clean up
318
// ftp.client.disconnect();
319
if (ftp.followTalk)
320         Ftp.LOG.info("delete client because server cut off control channel: "+e);
321       ftp.client = null;
322
323       // in case this FtpExceptionControlClosedByForcedDataClose is
324
// thrown by retrieveList() (not retrieveFile()) above,
325
if (os == null) { // indicating throwing by retrieveList()
326
//throw new FtpException("fail to get attibutes: "+path);
327
Ftp.LOG.warning(
328             "Please try larger maxContentLength for ftp.client.retrieveList(). "
329           + e);
330         // in a way, this is our request fault
331
this.code = 400; // http Bad request
332
return;
333       }
334
335       FTPFile ftpFile = (FTPFile) list.get(0);
336       this.headers.put("Content-Length",
337         new Long JavaDoc(ftpFile.getSize()).toString());
338       //this.headers.put("content-type", "text/html");
339
this.headers.put("Last-Modified",
340         ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
341       this.content = os.toByteArray();
342
343       String JavaDoc contentType = null;
344       // 20040427, xing, disabled for now
345
//if (contentType == null && ftp.magic != null)
346
// contentType = ftp.magic.getMimeType(this.content);
347
if (contentType == null && ftp.TYPE_MAP != null)
348         contentType = ftp.TYPE_MAP.getContentType(path);
349       if (contentType != null)
350         this.headers.put("Content-Type", contentType);
351
352 // // approximate bytes sent and read
353
// if (this.httpAccounting != null) {
354
// this.httpAccounting.incrementBytesSent(path.length());
355
// this.httpAccounting.incrementBytesRead(this.content.length);
356
// }
357

358       this.code = 200; // http OK
359

360     } catch (FtpExceptionCanNotHaveDataConnection e) {
361
362       if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
363       // it is not a file, but dir, so redirect as a dir
364
this.headers.put("Location", path + "/");
365         this.code = 300; // http redirect
366
// fixme, should we do ftp.client.cwd("/"), back to top dir?
367
} else {
368       // it is not a dir either
369
this.code = 404; // http Not Found
370
}
371
372     } catch (FtpExceptionUnknownForcedDataClose e) {
373       // Please note control channel is still live.
374
// in a way, this is our request fault
375
Ftp.LOG.warning(
376           "Unrecognized reply after forced close of data channel. "
377         + "If this is acceptable, please modify Client.java accordingly. "
378         + e);
379       this.code = 400; // http Bad Request
380
}
381
382   }
383
384   // get ftp dir list as http response
385
private void getDirAsHttpResponse(String JavaDoc path)
386     throws IOException JavaDoc {
387     List JavaDoc list = new LinkedList JavaDoc();
388
389     try {
390
391       // change to that dir first
392
if (!FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
393         this.code = 404; // http Not Found
394
return;
395       }
396
397       // fixme, should we do ftp.client.cwd("/"), back to top dir?
398

399       ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
400       this.content = list2html(list, path, "/".equals(path) ? false : true);
401       this.headers.put("Content-Length",
402         new Integer JavaDoc(this.content.length).toString());
403       this.headers.put("Content-Type", "text/html");
404       // this.headers.put("Last-Modified", null);
405

406 // // approximate bytes sent and read
407
// if (this.httpAccounting != null) {
408
// this.httpAccounting.incrementBytesSent(path.length());
409
// this.httpAccounting.incrementBytesRead(this.content.length);
410
// }
411

412       this.code = 200; // http OK
413

414     } catch (FtpExceptionControlClosedByForcedDataClose e) {
415
416       // control connection is off, clean up
417
// ftp.client.disconnect();
418
if (ftp.followTalk)
419         Ftp.LOG.info("delete client because server cut off control channel: "+e);
420       ftp.client = null;
421
422       this.content = list2html(list, path, "/".equals(path) ? false : true);
423       this.headers.put("Content-Length",
424         new Integer JavaDoc(this.content.length).toString());
425       this.headers.put("Content-Type", "text/html");
426       // this.headers.put("Last-Modified", null);
427

428 // // approximate bytes sent and read
429
// if (this.httpAccounting != null) {
430
// this.httpAccounting.incrementBytesSent(path.length());
431
// this.httpAccounting.incrementBytesRead(this.content.length);
432
// }
433

434       this.code = 200; // http OK
435

436     } catch (FtpExceptionUnknownForcedDataClose e) {
437       // Please note control channel is still live.
438
// in a way, this is our request fault
439
Ftp.LOG.warning(
440           "Unrecognized reply after forced close of data channel. "
441         + "If this is acceptable, please modify Client.java accordingly. "
442         + e);
443       this.code = 400; // http Bad Request
444
} catch (FtpExceptionCanNotHaveDataConnection e) {
445       Ftp.LOG.warning(""+ e);
446       this.code = 500; // http Iternal Server Error
447
}
448
449   }
450
451   // generate html page from ftp dir list
452
private byte[] list2html(List JavaDoc list, String JavaDoc path, boolean includeDotDot) {
453
454     //StringBuffer x = new StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
455
StringBuffer JavaDoc x = new StringBuffer JavaDoc("<html><head>");
456     x.append("<title>Index of "+path+"</title></head>\n");
457     x.append("<body><h1>Index of "+path+"</h1><pre>\n");
458
459     if (includeDotDot) {
460       x.append("<a HREF='../'>../</a>\t-\t-\t-\n");
461     }
462
463     for (int i=0; i<list.size(); i++) {
464       FTPFile f = (FTPFile) list.get(i);
465       String JavaDoc name = f.getName();
466       String JavaDoc time = ftp.httpDateFormat.toString(f.getTimestamp());
467       if (f.isDirectory()) {
468         // some ftp server LIST "." and "..", we skip them here
469
if (name.equals(".") || name.equals(".."))
470           continue;
471         x.append("<a HREF='"+name+"/"+"'>"+name+"/</a>\t");
472         x.append(time+"\t-\n");
473       } else if (f.isFile()) {
474         x.append("<a HREF='"+name+ "'>"+name+"</a>\t");
475         x.append(time+"\t"+f.getSize()+"\n");
476       } else {
477         // ignore isSymbolicLink()
478
// ignore isUnknown()
479
}
480     }
481
482     x.append("</pre></body></html>\n");
483
484     return new String JavaDoc(x).getBytes();
485   }
486
487 }
488
Popular Tags