KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > GetGraphics


1 /** GetGraphics - walk through a Web site, find
2  * all the graphics and save in a local directory tree
3  *
4  * Sample program for Arachnid web spider framework
5  */

6
7 import java.io.*;
8 import java.net.*;
9 import java.util.*;
10 import bplatt.spider.*;
11
12 public class GetGraphics {
13     
14     public static void main(String JavaDoc[] args) {
15         if (args.length != 2) {
16             System.out.println("java GetGraphics <url> <output directory>");
17             System.exit(-1);
18         }
19         File outdir = new File(args[1]);
20         if (outdir.isDirectory() == false || outdir.canWrite() == false) {
21             System.out.println("Cannot access directory "+args[1]);
22             System.exit(-1);
23         }
24         ImageSpider spider = null;
25         try { spider = new ImageSpider(args[0],outdir); }
26         catch(MalformedURLException e) {
27             System.out.println(e);
28             System.out.println("Invalid URL: "+args[0]);
29             System.exit(-1);
30         }
31         System.out.println("Get Graphics:");
32         spider.traverse();
33         System.out.println("Finished");
34     }
35 }
36     
37 class ImageSpider extends Arachnid {
38     private HashSet images;
39     private File outdir;
40     
41     public ImageSpider(String JavaDoc base, File outdir) throws MalformedURLException
42     {
43         super(base);
44         super.setDelay(5);
45         images = new HashSet();
46         this.outdir = outdir;
47     }
48     protected void handleBadLink(URL url,URL parent, PageInfo p) { }
49     protected void handleBadIO(URL url, URL parent) { }
50         
51     protected void handleLink(PageInfo p) {
52         URL[] list = p.getImages();
53         if (list != null) {
54             for (int i=0; i<list.length; ++i) {
55                 if (images.contains(list[i]) == false) {
56                     images.add(list[i]);
57                     if (saveImage(list[i],outdir))
58                         System.out.println("Saved image from: " + list[i].toString());
59                     else System.out.println("Could not save image: " + list[i].toString());
60                 }
61             }
62         }
63     }
64     protected void handleNonHTMLlink(URL url, URL parent,PageInfo p) { }
65         
66     protected void handleExternalLink(URL url, URL parent) { }
67     
68     private boolean saveImage(URL url, File dir)
69     {
70         String JavaDoc outdir = dir.toString();
71         String JavaDoc file = url.getFile();
72         File outfile;
73         if (outdir == null || file == null || outdir.length() == 0 ||
74             file.length() == 0) return(false);
75         if (File.separatorChar == '\\') {
76             StringBuffer JavaDoc b = new StringBuffer JavaDoc(file);
77             for (int i=0; i<b.length(); ++i)
78                 if (b.charAt(i) == '/') b.setCharAt(i,'\\');
79             file = b.toString();
80         }
81         if (outdir.charAt(outdir.length()-1) == File.separatorChar ||
82             file.charAt(0) == File.separatorChar) outfile = new File(outdir+file);
83         else outfile = new File(outdir+File.separatorChar+file);
84         
85         // Create any needed intermediate directories
86
if (outfile.getParent() != null) {
87             File parentDir = outfile.getParentFile();
88             if (parentDir.exists() == false) parentDir.mkdirs();
89         }
90         
91         byte[] theImage = this.getContent(url);
92         if (theImage != null) {
93             int size = theImage.length;
94             try {
95                 BufferedOutputStream fstream =
96                     new BufferedOutputStream(new FileOutputStream(outfile));
97                 fstream.write(theImage,0,size);
98                 fstream.flush();
99                 fstream.close();
100             }
101             catch(IOException e) { return(false); }
102             return(true);
103         }
104         else return(false);
105     }
106 }
107
108
Popular Tags