KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > protocol > ftp > Client


1 package net.nutch.protocol.ftp;
2
3 import java.io.BufferedReader JavaDoc;
4 import java.io.IOException JavaDoc;
5 import java.io.InputStream JavaDoc;
6 import java.io.InputStreamReader JavaDoc;
7 import java.io.OutputStream JavaDoc;
8
9 import java.net.InetAddress JavaDoc;
10 import java.net.Socket JavaDoc;
11
12 import java.util.List JavaDoc;
13 //import java.util.LinkedList;
14

15 import org.apache.commons.net.MalformedServerReplyException;
16
17 import org.apache.commons.net.ftp.FTP;
18 import org.apache.commons.net.ftp.FTPCommand;
19 import org.apache.commons.net.ftp.FTPFile;
20 import org.apache.commons.net.ftp.FTPFileEntryParser;
21 import org.apache.commons.net.ftp.FTPReply;
22
23 import org.apache.commons.net.ftp.FTPConnectionClosedException;
24
25 /***********************************************
26  * Client.java encapsulates functionalities necessary for nutch to
27  * get dir list and retrieve file from an FTP server.
28  * This class takes care of all low level details of interacting
29  * with an FTP server and provides a convenient higher level interface.
30  *
31  * Modified from FtpClient.java in apache commons-net.
32  *
33  * Notes by John Xing:
34  * ftp server implementations are hardly uniform and none seems to follow
35  * RFCs whole-heartedly. We have no choice, but assume common denominator
36  * as following:
37  * (1) Use stream mode for data tranfer. Block mode will be better for
38  * multiple file downloading and partial file downloading. However
39  * not every ftpd has block mode support.
40  * (2) Use passive mode for data connection.
41  * So nutch will work if we run behind firewall.
42  * (3) Data connection is opened/closed per ftp command for the reasons
43  * listed in (1). There are ftp servers out there,
44  * when partial downloading is enforeced by closing data channel
45  * socket on our client side, the server side immediately closes
46  * control channel (socket). Our codes deal with such a bad behavior.
47  * (4) LIST is used to obtain remote file attributes if possible.
48  * MDTM & SIZE would be nice, but not as ubiquitously implemented as LIST.
49  * (5) Avoid using ABOR in single thread? Do not use it at all.
50  *
51  * About exceptions:
52  * Some specific exceptions are re-thrown as one of FtpException*.java
53  * In fact, each function throws FtpException*.java or pass IOException.
54  *
55  * @author John Xing
56  ***********************************************/

57
58 public class Client extends FTP
59 {
60     private int __dataTimeout;
61     private int __passivePort;
62     private String JavaDoc __passiveHost;
63     private int __fileType, __fileFormat;
64     private boolean __remoteVerificationEnabled;
65     private FTPFileEntryParser __entryParser;
66     private String JavaDoc __systemName;
67
68     // constructor
69
public Client()
70     {
71         __initDefaults();
72         __dataTimeout = -1;
73         __remoteVerificationEnabled = true;
74     }
75
76     // defaults when initialize
77
private void __initDefaults()
78     {
79         __passiveHost = null;
80         __passivePort = -1;
81         __fileType = FTP.ASCII_FILE_TYPE;
82         __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
83         __systemName = null;
84         __entryParser = null;
85     }
86
87     // parse reply for pass()
88
private void __parsePassiveModeReply(String JavaDoc reply)
89     throws MalformedServerReplyException
90     {
91         int i, index, lastIndex;
92         String JavaDoc octet1, octet2;
93         StringBuffer JavaDoc host;
94
95         reply = reply.substring(reply.indexOf('(') + 1,
96                                 reply.indexOf(')')).trim();
97
98         host = new StringBuffer JavaDoc(24);
99         lastIndex = 0;
100         index = reply.indexOf(',');
101         host.append(reply.substring(lastIndex, index));
102
103         for (i = 0; i < 3; i++)
104         {
105             host.append('.');
106             lastIndex = index + 1;
107             index = reply.indexOf(',', lastIndex);
108             host.append(reply.substring(lastIndex, index));
109         }
110
111         lastIndex = index + 1;
112         index = reply.indexOf(',', lastIndex);
113
114         octet1 = reply.substring(lastIndex, index);
115         octet2 = reply.substring(index + 1);
116
117         // index and lastIndex now used as temporaries
118
try
119         {
120             index = Integer.parseInt(octet1);
121             lastIndex = Integer.parseInt(octet2);
122         }
123         catch (NumberFormatException JavaDoc e)
124         {
125             throw new MalformedServerReplyException(
126                 "Could not parse passive host information.\nServer Reply: " + reply);
127         }
128
129         index <<= 8;
130         index |= lastIndex;
131
132         __passiveHost = host.toString();
133         __passivePort = index;
134     }
135
136     // open passive data connection socket
137
protected Socket JavaDoc __openPassiveDataConnection(int command, String JavaDoc arg)
138       throws IOException JavaDoc, FtpExceptionCanNotHaveDataConnection {
139         Socket JavaDoc socket;
140
141 // // 20040317, xing, accommodate ill-behaved servers, see below
142
// int port_previous = __passivePort;
143

144         if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
145           throw new FtpExceptionCanNotHaveDataConnection(
146             "pasv() failed. " + getReplyString());
147
148         try {
149           __parsePassiveModeReply(getReplyStrings()[0]);
150         } catch (MalformedServerReplyException e) {
151           throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
152         }
153
154 // // 20040317, xing, accommodate ill-behaved servers, see above
155
// int count = 0;
156
// System.err.println("__passivePort "+__passivePort);
157
// System.err.println("port_previous "+port_previous);
158
// while (__passivePort == port_previous) {
159
// // just quit if too many tries. make it an exception here?
160
// if (count++ > 10)
161
// return null;
162
// // slow down further for each new try
163
// Thread.sleep(500*count);
164
// if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
165
// throw new FtpExceptionCanNotHaveDataConnection(
166
// "pasv() failed. " + getReplyString());
167
// //return null;
168
// try {
169
// __parsePassiveModeReply(getReplyStrings()[0]);
170
// } catch (MalformedServerReplyException e) {
171
// throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
172
// }
173
// }
174

175         socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
176
177         if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
178           socket.close();
179           return null;
180         }
181
182         if (__remoteVerificationEnabled && !verifyRemote(socket))
183         {
184             InetAddress JavaDoc host1, host2;
185
186             host1 = socket.getInetAddress();
187             host2 = getRemoteAddress();
188
189             socket.close();
190
191             // our precaution
192
throw new FtpExceptionCanNotHaveDataConnection(
193                 "Host attempting data connection " + host1.getHostAddress() +
194                 " is not same as server " + host2.getHostAddress() +
195                 " So we intentionally close it for security precaution."
196                 );
197         }
198
199         if (__dataTimeout >= 0)
200             socket.setSoTimeout(__dataTimeout);
201
202         return socket;
203     }
204
205     /***
206      * Sets the timeout in milliseconds to use for data connection.
207      * set immediately after opening the data connection.
208      ***/

209     public void setDataTimeout(int timeout)
210     {
211         __dataTimeout = timeout;
212     }
213
214     /***
215      * Closes the connection to the FTP server and restores
216      * connection parameters to the default values.
217      * <p>
218      * @exception IOException If an error occurs while disconnecting.
219      ***/

220     public void disconnect() throws IOException JavaDoc
221     {
222         __initDefaults();
223         super.disconnect();
224         // no worry for data connection, since we always close it
225
// in every ftp command that invloves data connection
226
}
227
228     /***
229      * Enable or disable verification that the remote host taking part
230      * of a data connection is the same as the host to which the control
231      * connection is attached. The default is for verification to be
232      * enabled. You may set this value at any time, whether the
233      * FTPClient is currently connected or not.
234      * <p>
235      * @param enable True to enable verification, false to disable verification.
236      ***/

237     public void setRemoteVerificationEnabled(boolean enable)
238     {
239         __remoteVerificationEnabled = enable;
240     }
241
242     /***
243      * Return whether or not verification of the remote host participating
244      * in data connections is enabled. The default behavior is for
245      * verification to be enabled.
246      * <p>
247      * @return True if verification is enabled, false if not.
248      ***/

249     public boolean isRemoteVerificationEnabled()
250     {
251         return __remoteVerificationEnabled;
252     }
253
254     /***
255      * Login to the FTP server using the provided username and password.
256      * <p>
257      * @param username The username to login under.
258      * @param password The password to use.
259      * @return True if successfully completed, false if not.
260      * @exception FTPConnectionClosedException
261      * If the FTP server prematurely closes the connection as a result
262      * of the client being idle or some other reason causing the server
263      * to send FTP reply code 421. This exception may be caught either
264      * as an IOException or independently as itself.
265      * @exception IOException If an I/O error occurs while either sending a
266      * command to the server or receiving a reply from the server.
267      ***/

268     public boolean login(String JavaDoc username, String JavaDoc password) throws IOException JavaDoc
269     {
270         user(username);
271
272         if (FTPReply.isPositiveCompletion(getReplyCode()))
273             return true;
274
275         // If we get here, we either have an error code, or an intermmediate
276
// reply requesting password.
277
if (!FTPReply.isPositiveIntermediate(getReplyCode()))
278             return false;
279
280         return FTPReply.isPositiveCompletion(pass(password));
281     }
282
283     /***
284      * Logout of the FTP server by sending the QUIT command.
285      * <p>
286      * @return True if successfully completed, false if not.
287      * @exception FTPConnectionClosedException
288      * If the FTP server prematurely closes the connection as a result
289      * of the client being idle or some other reason causing the server
290      * to send FTP reply code 421. This exception may be caught either
291      * as an IOException or independently as itself.
292      * @exception IOException If an I/O error occurs while either sending a
293      * command to the server or receiving a reply from the server.
294      ***/

295     public boolean logout() throws IOException JavaDoc
296     {
297         return FTPReply.isPositiveCompletion(quit());
298     }
299
300     // retrieve list reply for path
301
public void retrieveList(String JavaDoc path, List JavaDoc entries, int limit,
302       FTPFileEntryParser parser)
303       throws IOException JavaDoc,
304         FtpExceptionCanNotHaveDataConnection,
305         FtpExceptionUnknownForcedDataClose,
306         FtpExceptionControlClosedByForcedDataClose {
307       Socket JavaDoc socket = __openPassiveDataConnection(FTPCommand.LIST, path);
308
309       if (socket == null)
310         throw new FtpExceptionCanNotHaveDataConnection("LIST "
311           + ((path == null) ? "" : path));
312
313       BufferedReader JavaDoc reader =
314           new BufferedReader JavaDoc(new InputStreamReader JavaDoc(socket.getInputStream()));
315
316       // force-close data channel socket, when download limit is reached
317
boolean mandatory_close = false;
318
319       //List entries = new LinkedList();
320
int count = 0;
321       String JavaDoc line = parser.readNextEntry(reader);
322       while (line != null) {
323         FTPFile ftpFile = parser.parseFTPEntry(line);
324         // skip non-formatted lines
325
if (ftpFile == null) {
326           line = parser.readNextEntry(reader);
327           continue;
328         }
329         entries.add(ftpFile);
330         count += line.length();
331         // impose download limit if limit > 0, otherwise no limit
332
// here, cut off is up to the line when total bytes is just over limit
333
if (limit > 0 && count > limit) {
334           mandatory_close = true;
335           break;
336         }
337         line = parser.readNextEntry(reader);
338       }
339
340       //if (mandatory_close)
341
// you always close here, no matter mandatory_close or not.
342
// however different ftp servers respond differently, see below.
343
socket.close();
344
345       // scenarios:
346
// (1) mandatory_close is false, download limit not reached
347
// no special care here
348
// (2) mandatory_close is true, download limit is reached
349
// different servers have different reply codes:
350

351       try {
352         int reply = getReply();
353         if (!_notBadReply(reply))
354           throw new FtpExceptionUnknownForcedDataClose(getReplyString());
355       } catch (FTPConnectionClosedException e) {
356         // some ftp servers will close control channel if data channel socket
357
// is closed by our end before all data has been read out. Check:
358
// tux414.q-tam.hp.com FTP server (hp.com version whp02)
359
// so must catch FTPConnectionClosedException thrown by getReply() above
360
//disconnect();
361
throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
362       }
363
364     }
365
366     // retrieve file for path
367
public void retrieveFile(String JavaDoc path, OutputStream JavaDoc os, int limit)
368       throws IOException JavaDoc,
369         FtpExceptionCanNotHaveDataConnection,
370         FtpExceptionUnknownForcedDataClose,
371         FtpExceptionControlClosedByForcedDataClose {
372
373       Socket JavaDoc socket = __openPassiveDataConnection(FTPCommand.RETR, path);
374
375       if (socket == null)
376         throw new FtpExceptionCanNotHaveDataConnection("RETR "
377           + ((path == null) ? "" : path));
378
379       InputStream JavaDoc input = socket.getInputStream();
380
381       // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
382
// do we ever need ASCII_FILE_TYPE?
383
//if (__fileType == ASCII_FILE_TYPE)
384
// input = new FromNetASCIIInputStream(input);
385

386       // fixme, should we instruct server here for binary file type?
387

388       // force-close data channel socket
389
boolean mandatory_close = false;
390
391       int len; int count = 0;
392       byte[] buf =
393         new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
394       while((len=input.read(buf,0,buf.length)) != -1){
395         count += len;
396         // impose download limit if limit > 0, otherwise no limit
397
// here, cut off is exactly of limit bytes
398
if (limit > 0 && count > limit) {
399           os.write(buf,0,len-(count-limit));
400           mandatory_close = true;
401           break;
402         }
403         os.write(buf,0,len);
404         os.flush();
405       }
406
407       //if (mandatory_close)
408
// you always close here, no matter mandatory_close or not.
409
// however different ftp servers respond differently, see below.
410
socket.close();
411
412       // scenarios:
413
// (1) mandatory_close is false, download limit not reached
414
// no special care here
415
// (2) mandatory_close is true, download limit is reached
416
// different servers have different reply codes:
417

418       // do not need this
419
//sendCommand("ABOR");
420

421       try {
422         int reply = getReply();
423         if (!_notBadReply(reply))
424           throw new FtpExceptionUnknownForcedDataClose(getReplyString());
425       } catch (FTPConnectionClosedException e) {
426         // some ftp servers will close control channel if data channel socket
427
// is closed by our end before all data has been read out. Check:
428
// tux414.q-tam.hp.com FTP server (hp.com version whp02)
429
// so must catch FTPConnectionClosedException thrown by getReply() above
430
//disconnect();
431
throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
432       }
433
434     }
435
436     // reply check after closing data connection
437
private boolean _notBadReply(int reply) {
438
439       if (FTPReply.isPositiveCompletion(reply)) {
440         // do nothing
441
} else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
442
// some ftp servers reply 426, e.g.,
443
// foggy FTP server (Version wu-2.6.2(2)
444
// there is second reply witing? no!
445
//getReply();
446
} else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
447
// some ftp servers reply 450, e.g.,
448
// ProFTPD [ftp.kernel.org]
449
// there is second reply witing? no!
450
//getReply();
451
} else if (reply == 451) { // FTPReply.ACTION_ABORTED
452
// some ftp servers reply 451, e.g.,
453
// ProFTPD [ftp.kernel.org]
454
// there is second reply witing? no!
455
//getReply();
456
} else if (reply == 451) { // FTPReply.ACTION_ABORTED
457
} else {
458       // what other kind of ftp server out there?
459
return false;
460       }
461
462       return true;
463     }
464
465     /***
466      * Sets the file type to be transferred. This should be one of
467      * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
468      * etc. The file type only needs to be set when you want to change the
469      * type. After changing it, the new type stays in effect until you change
470      * it again. The default file type is <code> FTP.ASCII_FILE_TYPE </code>
471      * if this method is never called.
472      * <p>
473      * @param fileType The <code> _FILE_TYPE </code> constant indcating the
474      * type of file.
475      * @return True if successfully completed, false if not.
476      * @exception FTPConnectionClosedException
477      * If the FTP server prematurely closes the connection as a result
478      * of the client being idle or some other reason causing the server
479      * to send FTP reply code 421. This exception may be caught either
480      * as an IOException or independently as itself.
481      * @exception IOException If an I/O error occurs while either sending a
482      * command to the server or receiving a reply from the server.
483      ***/

484     public boolean setFileType(int fileType) throws IOException JavaDoc
485     {
486         if (FTPReply.isPositiveCompletion(type(fileType)))
487         {
488             __fileType = fileType;
489             __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
490             return true;
491         }
492         return false;
493     }
494
495     /***
496      * Fetches the system type name from the server and returns the string.
497      * This value is cached for the duration of the connection after the
498      * first call to this method. In other words, only the first time
499      * that you invoke this method will it issue a SYST command to the
500      * FTP server. FTPClient will remember the value and return the
501      * cached value until a call to disconnect.
502      * <p>
503      * @return The system type name obtained from the server. null if the
504      * information could not be obtained.
505      * @exception FTPConnectionClosedException
506      * If the FTP server prematurely closes the connection as a result
507      * of the client being idle or some other reason causing the server
508      * to send FTP reply code 421. This exception may be caught either
509      * as an IOException or independently as itself.
510      * @exception IOException If an I/O error occurs while either sending a
511      * command to the server or receiving a reply from the server.
512      ***/

513     public String JavaDoc getSystemName()
514       throws IOException JavaDoc, FtpExceptionBadSystResponse
515     {
516       //if (syst() == FTPReply.NAME_SYSTEM_TYPE)
517
// Technically, we should expect a NAME_SYSTEM_TYPE response, but
518
// in practice FTP servers deviate, so we soften the condition to
519
// a positive completion.
520
if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
521             __systemName = (getReplyStrings()[0]).substring(4);
522         } else {
523             throw new FtpExceptionBadSystResponse(
524               "Bad response of SYST: " + getReplyString());
525         }
526
527         return __systemName;
528     }
529
530     /***
531      * Sends a NOOP command to the FTP server. This is useful for preventing
532      * server timeouts.
533      * <p>
534      * @return True if successfully completed, false if not.
535      * @exception FTPConnectionClosedException
536      * If the FTP server prematurely closes the connection as a result
537      * of the client being idle or some other reason causing the server
538      * to send FTP reply code 421. This exception may be caught either
539      * as an IOException or independently as itself.
540      * @exception IOException If an I/O error occurs while either sending a
541      * command to the server or receiving a reply from the server.
542      ***/

543     public boolean sendNoOp() throws IOException JavaDoc
544     {
545         return FTPReply.isPositiveCompletion(noop());
546     }
547
548 // client.stat(path);
549
// client.sendCommand("STAT");
550
// client.sendCommand("STAT",path);
551
// client.sendCommand("MDTM",path);
552
// client.sendCommand("SIZE",path);
553
// client.sendCommand("HELP","SITE");
554
// client.sendCommand("SYST");
555
// client.setRestartOffset(120);
556

557 }
558
Popular Tags