KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lenya > net > WGet


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17
18 /* $Id: WGet.java 42598 2004-03-01 16:18:28Z gregor $ */
19
20 package org.apache.lenya.net;
21
22 import java.io.ByteArrayOutputStream JavaDoc;
23 import java.io.File JavaDoc;
24 import java.io.FileNotFoundException JavaDoc;
25 import java.io.FileOutputStream JavaDoc;
26 import java.io.IOException JavaDoc;
27 import java.io.InputStream JavaDoc;
28 import java.net.HttpURLConnection JavaDoc;
29 import java.net.MalformedURLException JavaDoc;
30 import java.net.URL JavaDoc;
31 import java.util.Iterator JavaDoc;
32 import java.util.List JavaDoc;
33
34 import org.apache.log4j.Category;
35
36
37 /**
38  * Similar to the UNIX wget
39  */

40 public class WGet {
41     static Category log = Category.getInstance(WGet.class);
42     String JavaDoc directory_prefix = null;
43
44     /**
45      * Creates a new WGet object.
46      */

47     public WGet() {
48         directory_prefix = System.getProperty("user.dir");
49     }
50
51     /**
52      * DOCUMENT ME!
53      *
54      * @param args DOCUMENT ME!
55      */

56     public static void main(String JavaDoc[] args) {
57         if (args.length == 0) {
58             System.out.println("Usage: org.apache.lenya.net.WGet [URL] -P/home/lenya/download");
59
60             return;
61         }
62
63         try {
64             WGet wget = new WGet();
65
66             for (int i = 0; i < args.length; i++) {
67                 if (args[i].indexOf("-P") == 0) {
68                     wget.setDirectoryPrefix(args[i].substring(2)); // -P/home/lenya/download, 2: remove "-P"
69
}
70             }
71
72             byte[] response = wget.download(new URL JavaDoc(args[0]), "s/\\/lenya\\/oscom//g", "");
73         } catch (MalformedURLException JavaDoc e) {
74             System.err.println(e);
75         } catch (Exception JavaDoc e) {
76             System.err.println(e);
77         }
78     }
79
80     /**
81      * -P
82      *
83      * @param directory_prefix DOCUMENT ME!
84      */

85     public void setDirectoryPrefix(String JavaDoc directory_prefix) {
86         this.directory_prefix = directory_prefix;
87     }
88
89     /**
90      * @param url The url of the resource to download
91      * @param prefixSubstitute Regexp which shall be replaced
92      * @param substituteReplacement Replacement of the regexp
93      *
94      * @return bytes of downloaded resource
95      *
96      * @throws IOException URL might not exist
97      */

98     public byte[] download(URL JavaDoc url, String JavaDoc prefixSubstitute, String JavaDoc substituteReplacement)
99         throws IOException JavaDoc {
100         log.debug(".download(): " + url + " " + prefixSubstitute + " " + substituteReplacement);
101
102         return downloadUsingHttpClient(url, prefixSubstitute, substituteReplacement);
103     }
104
105     /**
106      * DOCUMENT ME!
107      *
108      * @param url DOCUMENT ME!
109      * @param prefixSubstitute DOCUMENT ME!
110      *
111      * @return DOCUMENT ME!
112      */

113     public byte[] downloadUsingHttpClient(URL JavaDoc url, String JavaDoc prefixSubstitute,
114         String JavaDoc substituteReplacement) {
115         log.debug(".downloadUsingHttpClient(): " + url);
116
117         byte[] sresponse = null;
118
119         try {
120             sresponse = getResource(url);
121
122             File JavaDoc file = new File JavaDoc(createFileName(url, prefixSubstitute, substituteReplacement));
123
124             saveToFile(file.getAbsolutePath(), sresponse);
125
126             substitutePrefix(file.getAbsolutePath(), prefixSubstitute, substituteReplacement);
127         } catch (MalformedURLException JavaDoc e) {
128             log.error(".downloadUsingHttpClient(): ", e);
129         } catch (FileNotFoundException JavaDoc e) {
130             log.error(".downloadUsingHttpClient(): ", e);
131         } catch (IOException JavaDoc e) {
132             log.error(".downloadUsingHttpClient(): ", e);
133         }
134
135         List JavaDoc links = null;
136
137         try {
138             links = getLinks(url);
139         } catch (IOException JavaDoc ioe) {
140             log.error(".downloadUsingHttpClient(): ", ioe);
141         }
142
143         if (links != null) {
144             Iterator JavaDoc iterator = links.iterator();
145
146             while (iterator.hasNext()) {
147                 String JavaDoc link = (String JavaDoc) iterator.next();
148
149                 try {
150                     URL JavaDoc child_url = new URL JavaDoc(org.apache.lenya.util.URLUtil.complete(url.toString(),
151                                 link));
152
153                     byte[] child_sresponse = getResource(child_url);
154                     saveToFile(createFileName(child_url, prefixSubstitute, substituteReplacement),
155                         child_sresponse);
156                 } catch (Exception JavaDoc e) {
157                     log.error(".downloadUsingHttpClient(): ", e);
158                 }
159             }
160         }
161
162         return sresponse;
163     }
164
165     /**
166      *
167      */

168     public byte[] getResource(URL JavaDoc url) throws IOException JavaDoc {
169         log.debug(".getResource(): " + url);
170
171         HttpURLConnection JavaDoc httpConnection = (HttpURLConnection JavaDoc) url.openConnection();
172         InputStream JavaDoc in = httpConnection.getInputStream();
173         byte[] buffer = new byte[1024];
174         int bytes_read;
175         ByteArrayOutputStream JavaDoc bufferOut = new ByteArrayOutputStream JavaDoc();
176
177         while ((bytes_read = in.read(buffer)) != -1) {
178             bufferOut.write(buffer, 0, bytes_read);
179         }
180
181         byte[] sresponse = bufferOut.toByteArray();
182         httpConnection.disconnect();
183
184         return sresponse;
185     }
186
187     /**
188      *
189      */

190     public List JavaDoc getLinks(URL JavaDoc url) throws IOException JavaDoc {
191         log.debug(".getLinks(): Get links from " + url);
192
193         List JavaDoc links = null;
194
195         try {
196             org.apache.lenya.util.HTML html = new org.apache.lenya.util.HTML(url.toString());
197             links = html.getImageSrcs(false);
198             links.addAll(html.getLinkHRefs(false));
199         } catch (Exception JavaDoc e) {
200             log.error(".getLinks() Exception 423432: ", e);
201         }
202
203         if (links != null) {
204             log.debug(".getLinks(): Number of links found: " + links.size());
205         }
206
207         return links;
208     }
209
210     /**
211      * Substitute prefix, e.g. "/lenya/blog/live/" by "/"
212      *
213      * @param filename Filename
214      * @param prefixSubstitute Prefix which shall be replaced
215      * @param substituteReplacement Prefix which is going to replace the original
216      *
217      * @throws IOException DOCUMENT ME!
218      */

219     public void substitutePrefix(String JavaDoc filename, String JavaDoc prefixSubstitute, String JavaDoc substituteReplacement) throws IOException JavaDoc {
220         log.debug("Replace " + prefixSubstitute + " by " + substituteReplacement);
221
222     org.apache.lenya.util.SED.replaceAll(new File JavaDoc(filename), escapeSlashes(prefixSubstitute), escapeSlashes(substituteReplacement));
223     }
224
225     /**
226      * Escape slashes
227      *
228      * @return String with escaped slashes
229      */

230     public String JavaDoc escapeSlashes(String JavaDoc string) {
231         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc("");
232
233         for (int i = 0; i < string.length(); i++) {
234             if (string.charAt(i) == '/') {
235                 buffer.append("\\/");
236             } else {
237                 buffer.append(string.charAt(i));
238             }
239         }
240
241         return buffer.toString();
242     }
243
244     /**
245      * DOCUMENT ME!
246      *
247      * @return DOCUMENT ME!
248      */

249     public String JavaDoc toString() {
250         return "-P: " + directory_prefix;
251     }
252
253     /**
254      *
255      */

256     public void saveToFile(String JavaDoc filename, byte[] bytes)
257         throws FileNotFoundException JavaDoc, IOException JavaDoc {
258         File JavaDoc file = new File JavaDoc(filename);
259         File JavaDoc parent = new File JavaDoc(file.getParent());
260
261         if (!parent.exists()) {
262             log.warn(".saveToFile(): Directory will be created: " + parent.getAbsolutePath());
263             parent.mkdirs();
264         }
265
266         FileOutputStream JavaDoc out = new FileOutputStream JavaDoc(file.getAbsolutePath());
267         out.write(bytes);
268         out.close();
269     }
270
271     /**
272      * @param url URL of resource, which has been downloaded and shall be saved
273      * @return Absolute substituted filename
274      */

275     public String JavaDoc createFileName(URL JavaDoc url, String JavaDoc prefixSubstitute, String JavaDoc substituteReplacement) {
276         File JavaDoc file = new File JavaDoc(directory_prefix + File.separator + url.getFile());
277
278         return file.getAbsolutePath().replaceAll(prefixSubstitute, substituteReplacement);
279     }
280
281     /**
282      *
283      */

284     public byte[] runProcess(String JavaDoc command) throws Exception JavaDoc {
285         Process JavaDoc process = Runtime.getRuntime().exec(command);
286
287         java.io.InputStream JavaDoc in = process.getInputStream();
288         byte[] buffer = new byte[1024];
289         int bytes_read = 0;
290         java.io.ByteArrayOutputStream JavaDoc baout = new java.io.ByteArrayOutputStream JavaDoc();
291
292         while ((bytes_read = in.read(buffer)) != -1) {
293             baout.write(buffer, 0, bytes_read);
294         }
295
296         if (baout.toString().length() > 0) {
297             log.debug(".runProcess(): %%%InputStream:START" + baout.toString() +
298                 "END:InputStream%%%");
299         }
300
301         java.io.InputStream JavaDoc in_e = process.getErrorStream();
302         java.io.ByteArrayOutputStream JavaDoc baout_e = new java.io.ByteArrayOutputStream JavaDoc();
303
304         while ((bytes_read = in_e.read(buffer)) != -1) {
305             baout_e.write(buffer, 0, bytes_read);
306         }
307
308         if (baout_e.toString().length() > 0) {
309             log.error(".runProcess(): ###ErrorStream:START" + baout_e.toString() +
310                 "END:ErrorStream###");
311         }
312
313         return baout.toByteArray();
314     }
315 }
316
Popular Tags