KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > mybatchfwk > test > websitedownloader > DownloadTask


1 /*
2  * Created on 4 oct. 2006
3  */

4 package net.sf.mybatchfwk.test.websitedownloader;
5
6 import java.io.BufferedReader JavaDoc;
7 import java.io.BufferedWriter JavaDoc;
8 import java.io.File JavaDoc;
9 import java.io.FileReader JavaDoc;
10 import java.io.FileWriter JavaDoc;
11 import java.io.IOException JavaDoc;
12 import java.io.InputStreamReader JavaDoc;
13 import java.io.Reader JavaDoc;
14 import java.net.URL JavaDoc;
15 import java.util.LinkedList JavaDoc;
16 import java.util.List JavaDoc;
17
18 import javax.swing.text.BadLocationException JavaDoc;
19 import javax.swing.text.Document JavaDoc;
20 import javax.swing.text.EditorKit JavaDoc;
21 import javax.swing.text.ElementIterator JavaDoc;
22 import javax.swing.text.SimpleAttributeSet JavaDoc;
23 import javax.swing.text.html.HTML JavaDoc;
24 import javax.swing.text.html.HTMLEditorKit JavaDoc;
25
26 import net.sf.mybatchfwk.ITask;
27 import net.sf.mybatchfwk.TaskExecutionException;
28
29 public class DownloadTask implements ITask {
30     
31     private URL JavaDoc url;
32     private File JavaDoc downloadFolder;
33     private String JavaDoc[] links;
34     
35     public DownloadTask(URL JavaDoc url, File JavaDoc downloadFolder) {
36         this.url = url;
37         this.downloadFolder = downloadFolder;
38     }
39
40     public String JavaDoc getId() {
41         return "[D]" + url.toString();
42     }
43
44     public void run() {
45         String JavaDoc fileUrl = url.getFile();
46         
47         File JavaDoc parent = downloadFolder;
48         if (fileUrl.startsWith("/") && (fileUrl.length() > 1) && (fileUrl.indexOf('/', 1) != -1)) {
49             parent = new File JavaDoc(downloadFolder, fileUrl.substring(0, fileUrl.lastIndexOf('/')));
50             parent.mkdirs();
51         }
52         
53         if ((fileUrl.lastIndexOf('/')) < (fileUrl.length()-1)) {
54             File JavaDoc destination = new File JavaDoc(parent, fileUrl.substring(fileUrl.lastIndexOf('/')+1, fileUrl.length()));
55             try {
56                 downloadFile(url, destination);
57                 if (isHTML(destination)) {
58                     this.links = getRelativeLinks(destination);
59                 }
60             } catch (IOException JavaDoc e) {
61                 throw new TaskExecutionException(e);
62             } catch (BadLocationException JavaDoc e) {
63                 throw new TaskExecutionException(e);
64             }
65         }
66     }
67     
68     protected boolean isHTML(File JavaDoc file) {
69         String JavaDoc[] parts = file.getName().split("\\.");
70         if (parts.length > 1) {
71             String JavaDoc extension = parts[parts.length-1];
72             if ("HTM".equalsIgnoreCase(extension) || "HTML".equalsIgnoreCase(extension)) {
73                 return true;
74             }
75         }
76         return false;
77     }
78     
79     protected void downloadFile(URL JavaDoc source, File JavaDoc destination) throws IOException JavaDoc {
80         BufferedReader JavaDoc in = null;
81         BufferedWriter JavaDoc out = null;
82         
83         try {
84             in = new BufferedReader JavaDoc(new InputStreamReader JavaDoc(source.openStream()));
85             out = new BufferedWriter JavaDoc(new FileWriter JavaDoc(destination));
86             
87             String JavaDoc line = null;
88             while ((line = in.readLine()) != null) {
89                 out.write(line + "\n");
90             }
91             in.close();
92             out.close();
93         } finally {
94             if (out != null) {
95                 try {
96                     out.close();
97                 } catch (IOException JavaDoc e) {}
98             }
99             if (in != null) {
100                 try {
101                     in.close();
102                 } catch (IOException JavaDoc e) {}
103             }
104         }
105     }
106     
107     protected String JavaDoc[] getRelativeLinks(File JavaDoc file) throws IOException JavaDoc, BadLocationException JavaDoc {
108         List JavaDoc<String JavaDoc> links = new LinkedList JavaDoc<String JavaDoc>();
109         
110         EditorKit JavaDoc kit = new HTMLEditorKit JavaDoc();
111         Document JavaDoc doc = kit.createDefaultDocument();
112
113         // The Document class does not yet
114
// handle charset's properly.
115
doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
116         // Create a reader on the HTML content.
117
Reader JavaDoc rd = new FileReader JavaDoc(file);
118         
119         // Parse the HTML.
120
kit.read(rd, doc, 0);
121
122         // Iterate through the elements
123
// of the HTML document.
124
ElementIterator JavaDoc it = new ElementIterator JavaDoc(doc);
125         javax.swing.text.Element JavaDoc elem;
126         while ((elem = it.next()) != null) {
127             SimpleAttributeSet JavaDoc s = (SimpleAttributeSet JavaDoc) elem.getAttributes().getAttribute(HTML.Tag.A);
128             if (s != null) {
129                 String JavaDoc href = (String JavaDoc) s.getAttribute(HTML.Attribute.HREF);
130                 if ((href != null) && (!href.startsWith("http"))) {
131                     links.add(href.toString());
132                 }
133             }
134         }
135         
136         return links.toArray(new String JavaDoc[links.size()]);
137     }
138
139     /**
140      * @return Returns the links.
141      */

142     public String JavaDoc[] getLinks() {
143         return links;
144     }
145 }
146
Popular Tags