KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > jftp > tools > HttpSpider


1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15  */

16 package net.sf.jftp.tools;
17
18 import net.sf.jftp.*;
19 import net.sf.jftp.config.*;
20 import net.sf.jftp.gui.framework.*;
21 import net.sf.jftp.net.*;
22 import net.sf.jftp.system.LocalIO;
23 import net.sf.jftp.system.logging.Log;
24 import net.sf.jftp.util.*;
25
26 import java.awt.*;
27 import java.awt.event.*;
28
29 import java.io.*;
30
31 import java.net.*;
32
33 import java.util.*;
34
35 import javax.swing.*;
36 import javax.swing.event.*;
37
38
39 public class HttpSpider extends HPanel implements Runnable JavaDoc, ActionListener
40 {
41     private HTextField host = new HTextField("Full URL:",
42                                              "http://j-ftp.sourceforge.net/index.html",
43                                              30);
44     private HTextField type = new HTextField("Types (use * for all):",
45                                              "html-htm-css-gif-jpg-zip-gz-avi-mpg",
46                                              25);
47     private HTextField depth = new HTextField("Search up to this many levels deeper:",
48                                               "1", 10);
49     private HTextField dir = new HTextField("Store files in:", "", 25);
50     private JPanel p1 = new JPanel();
51     private JPanel okP = new JPanel();
52     private JButton ok = new JButton("Start");
53     private int currentDepth = 0;
54     private int MAX = 1;
55     private String JavaDoc[] typeArray = { "mpg", "avi", "mpeg", "mov", "rm", "wmv" };
56     private String JavaDoc localDir = ".";
57     private String JavaDoc[] argv;
58     private Thread JavaDoc runner;
59     private boolean stopflag = false;
60     private JButton stop = new JButton("Stop download (ASAP)");
61
62     public HttpSpider(String JavaDoc localDir)
63     {
64         this.localDir = localDir;
65
66         //setSize(440,220);
67
//setLocation(200,250);
68
//setTitle("Http spider...");
69
//getContentPane().
70
setLayout(new BorderLayout());
71
72         //setBackground(Color.lightGray);
73
p1.setLayout(new GridLayout(4, 1, 5, 5));
74         p1.add(host);
75         p1.add(type);
76         p1.add(depth);
77         dir.setText(localDir);
78         p1.add(dir);
79
80         //getContentPane().
81
add("Center", p1);
82
83         //getContentPane().
84
add("South", okP);
85         okP.add(ok);
86         ok.addActionListener(this);
87
88         setVisible(true);
89     }
90
91     public void actionPerformed(ActionEvent e)
92     {
93         if(e.getSource() == ok)
94         {
95             //this.dispose();
96
localDir = dir.getText();
97
98             if(!localDir.endsWith("/"))
99             {
100                 localDir = localDir + "/";
101             }
102
103             String JavaDoc[] argv2 =
104                              {
105                                  host.getText().trim(), type.getText().trim(),
106                                  depth.getText().trim()
107                              };
108             argv = argv2;
109
110             removeAll();
111             add("North",
112                 new JLabel("Starting download, please watch the log window for details"));
113             add("Center", stop);
114             stop.addActionListener(this);
115             JFtp.statusP.jftp.setClosable(this.hashCode(), false);
116             validate();
117
118             runner = new Thread JavaDoc(this);
119             runner.start();
120         }
121         else if(e.getSource() == stop)
122         {
123             stopflag = true;
124         }
125     }
126
127     public void run()
128     {
129         spider(argv);
130
131         if(!stopflag)
132         {
133             Log.debug("\nRecursive download finished.\nOuptut dir: " +
134                       localDir);
135         }
136         else
137         {
138             Log.debug("\nRecursive download aborted.");
139         }
140
141         JFtp.statusP.jftp.ensureLogging();
142         JFtp.statusP.jftp.removeFromDesktop(this.hashCode());
143     }
144
145     private void spider(String JavaDoc[] argv)
146     {
147         try
148         {
149             String JavaDoc url = "http://j-ftp.sourceforge.net/index.html";
150
151             if(argv.length >= 2)
152             {
153                 url = clear(argv[0]);
154
155                 if(url.indexOf("/") < 0)
156                 {
157                     url = url + "/";
158                 }
159
160                 typeArray = check(argv[1]);
161
162                 Log.debugRaw(">>> Scanning for ");
163
164                 for(int i = 0; i < typeArray.length; i++)
165                 {
166                     Log.debugRaw(typeArray[i] + " ");
167                 }
168
169                 Log.debug("");
170             }
171
172             if(argv.length > 2)
173             {
174                 MAX = Integer.parseInt(argv[2]);
175             }
176
177             //for(int i=0; i<typeArray.length; i++) Log.debug("+ "+typeArray[i]);
178
if(stopflag)
179             {
180                 return;
181             }
182
183             Log.debug("Fetching initial HTML file...");
184
185             Holer sammy = new Holer(localDir);
186             sammy.bringAnStart(url, true);
187
188             if(stopflag)
189             {
190                 return;
191             }
192
193             Log.debug("Searching for links...");
194             JFtp.statusP.jftp.ensureLogging();
195             LocalIO.pause(500);
196
197             if(stopflag)
198             {
199                 return;
200             }
201
202             smoke(url);
203         }
204         catch(Exception JavaDoc ex)
205         {
206             ex.printStackTrace();
207         }
208     }
209
210     private String JavaDoc clear(String JavaDoc url)
211     {
212         int idx = url.indexOf("http://");
213
214         if(idx >= 0)
215         {
216             url = url.substring(7);
217         }
218
219         return url;
220     }
221
222     private Vector addVector(Vector v, Vector x)
223     {
224         Enumeration e = x.elements();
225
226         while(e.hasMoreElements())
227         {
228             String JavaDoc next = (String JavaDoc) e.nextElement();
229             v.add(next);
230         }
231
232         return v;
233     }
234
235     private void smoke(String JavaDoc url) throws Exception JavaDoc
236     {
237         if(stopflag)
238         {
239             return;
240         }
241
242         url = clear(url);
243
244         Holer sammy = new Holer(localDir);
245         String JavaDoc zeug = sammy.holZeug(url);
246
247         Vector m = sortiermal(zeug, url.substring(0, url.lastIndexOf("/")),
248                               "href=\"");
249         m = addVector(m,
250                       sortiermal(zeug, url.substring(0, url.lastIndexOf("/")),
251                                  "src=\""));
252         m = addVector(m,
253                       sortiermal(zeug, url.substring(0, url.lastIndexOf("/")),
254                                  "HREF=\""));
255         m = addVector(m,
256                       sortiermal(zeug, url.substring(0, url.lastIndexOf("/")),
257                                  "SRC=\""));
258
259         Enumeration mischen = m.elements();
260
261         while(mischen.hasMoreElements())
262         {
263             if(stopflag)
264             {
265                 return;
266             }
267
268             String JavaDoc next = (String JavaDoc) mischen.nextElement();
269
270             Log.out("Processing: " + next);
271
272             for(int i = 0; i < typeArray.length; i++)
273             {
274                 if(next.endsWith(typeArray[i]) ||
275                        typeArray[i].trim().equals("*"))
276                 {
277                     int x = next.indexOf("/");
278
279                     if((x > 0) && (next.substring(0, x).indexOf(".") > 0))
280                     {
281                         Holer nochnsammy = new Holer(localDir);
282                         nochnsammy.bringAnStart(next, false);
283
284                         if(stopflag)
285                         {
286                             return;
287                         }
288
289                         continue;
290                     }
291                 }
292             }
293
294             if(currentDepth < MAX)
295             {
296                 if(stopflag)
297                 {
298                     return;
299                 }
300
301                 int x = next.indexOf("/");
302
303                 if((x > 0) && (next.substring(0, x).indexOf(".") > 0))
304                 {
305                     currentDepth++;
306                     smoke(next);
307                     currentDepth--;
308                 }
309             }
310         }
311     }
312
313     private Vector sortiermal(String JavaDoc zeug, String JavaDoc url, String JavaDoc index)
314     {
315         Vector mischen = new Vector();
316         int wo = 0;
317
318         while(true)
319         {
320             wo = zeug.indexOf(index);
321
322             if(wo < 0)
323             {
324                 return mischen;
325             }
326
327             zeug = zeug.substring(wo + index.length());
328
329             String JavaDoc was = zeug.substring(0, zeug.indexOf("\""));
330
331             was = checker(was, url);
332             mischen.add(was);
333             Log.out("Added: " + was);
334         }
335     }
336
337     private String JavaDoc[] check(String JavaDoc auswahl)
338     {
339         StringTokenizer flyer = new StringTokenizer(auswahl, "-", false);
340         String JavaDoc[] einkauf = new String JavaDoc[flyer.countTokens()];
341         int tmp = 0;
342
343         while(flyer.hasMoreElements())
344         {
345             einkauf[tmp] = (String JavaDoc) flyer.nextElement();
346             tmp++;
347         }
348
349         return einkauf;
350     }
351
352     private String JavaDoc checker(String JavaDoc was, String JavaDoc url)
353     {
354         was = clear(was);
355
356         if(was.startsWith(url))
357         {
358             return was;
359         }
360
361         if(was.startsWith("/") && (url.indexOf("/") > 0))
362         {
363             was = url.substring(0, url.indexOf("/")) + was;
364         }
365         else if(was.startsWith("/") && (url.indexOf("/") < 0))
366         {
367             was = url + was;
368         }
369         else if((was.indexOf(".") > 0))
370         {
371             int idx = was.indexOf("/");
372             String JavaDoc tmp = "";
373
374             if(idx >= 0)
375             {
376                 tmp = was.substring(0, idx);
377             }
378
379             if((tmp.indexOf(".") > 0))
380             {
381                 return clear(was);
382             }
383
384             if(url.endsWith("/"))
385             {
386                 was = url + was;
387             }
388             else
389             {
390                 was = url + "/" + was;
391             }
392         }
393
394         Log.out("-> " + was);
395
396         return was;
397     }
398
399     public Insets getInsets()
400     {
401         return new Insets(5, 5, 5, 5);
402     }
403 }
404
405
406 class Holer
407 {
408     private String JavaDoc localDir = null;
409
410     public Holer(String JavaDoc localDir)
411     {
412         this.localDir = localDir;
413     }
414
415     public String JavaDoc holZeug(String JavaDoc wat)
416     {
417         try
418         {
419             String JavaDoc dealer = wat.substring(0, wat.indexOf("/"));
420             String JavaDoc wo = wat.substring(wat.indexOf("/"));
421             String JavaDoc zeug = "";
422
423             Log.out(">> " + dealer + wo);
424
425             Socket deal = new Socket(dealer, 80);
426             deal.setSoTimeout(5000);
427
428             BufferedWriter order = new BufferedWriter(new OutputStreamWriter(deal.getOutputStream()));
429             BufferedReader checkung = new BufferedReader(new InputStreamReader(deal.getInputStream()));
430
431             order.write("GET http://" + wat + " HTTP/1.0\n\n");
432             order.flush();
433
434             int len = 0;
435
436             while(!checkung.ready() && (len < 5000))
437             {
438                 chill(100);
439                 len += 100;
440             }
441
442             while(checkung.ready())
443             {
444                 zeug = zeug + checkung.readLine();
445             }
446
447             order.close();
448             checkung.close();
449
450             return zeug;
451         }
452         catch(Exception JavaDoc ex)
453         {
454             ex.printStackTrace();
455         }
456
457         return "";
458     }
459
460     public void bringAnStart(String JavaDoc wat, boolean force)
461     {
462         try
463         {
464             String JavaDoc dealer = wat.substring(0, wat.indexOf("/"));
465             String JavaDoc wo = wat.substring(wat.indexOf("/"));
466             String JavaDoc zeug = "";
467
468             Log.debug(">>> " + dealer + wo);
469
470             //JFtp.statusP.jftp.ensureLogging();
471
File d = new File(localDir);
472             d.mkdir();
473
474             File f = new File(localDir + wo.substring(wo.lastIndexOf("/") + 1));
475
476             if(f.exists() && !force)
477             {
478                 Log.debug(">>> file already exists...");
479
480                 return;
481             }
482             else
483             {
484                 f.delete();
485             }
486
487             Socket deal = new Socket(dealer, 80);
488             BufferedWriter order = new BufferedWriter(new OutputStreamWriter(deal.getOutputStream()));
489             DataInputStream checkung = new DataInputStream(new BufferedInputStream(deal.getInputStream()));
490
491             BufferedOutputStream vorrat = new BufferedOutputStream(new FileOutputStream(localDir +
492                                                                                         wo.substring(wo.lastIndexOf("/") +
493                                                                                                      1)));
494
495             byte[] alu = new byte[2048];
496
497             order.write("GET http://" + wat + " HTTP/1.0\n\n");
498             order.flush();
499
500             boolean line = true;
501             boolean bin = false;
502
503             while(true)
504             {
505                 chill(10);
506
507                 String JavaDoc tmp = "";
508
509                 while(line)
510                 {
511                     String JavaDoc x = checkung.readLine();
512
513                     if(x == null)
514                     {
515                         break;
516                     }
517
518                     tmp += (x + "\n");
519
520                     if(x.equals(""))
521                     {
522                         line = false;
523                     }
524                 }
525
526                 int x = checkung.read(alu);
527
528                 if(x == -1)
529                 {
530                     if(line)
531                     {
532                         vorrat.write(tmp.getBytes(), 0, tmp.length());
533                     }
534
535                     order.close();
536                     checkung.close();
537                     vorrat.flush();
538                     vorrat.close();
539
540                     return;
541                 }
542                 else
543                 {
544                     vorrat.write(alu, 0, x);
545                 }
546             }
547         }
548         catch(Exception JavaDoc ex)
549         {
550             ex.printStackTrace();
551         }
552     }
553
554     private static void chill(int time)
555     {
556         try
557         {
558             Thread.sleep(time);
559         }
560         catch(Exception JavaDoc ex)
561         {
562         }
563     }
564 }
565
Popular Tags