KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > quadcap > http > client > HttpFetcher


1 package com.quadcap.http.client;
2
3 /* Copyright 1998 - 2003 Quadcap Software. All rights reserved.
4  *
5  * This software is distributed under the Quadcap Free Software License.
6  * This software may be used or modified for any purpose, personal or
7  * commercial. Open Source redistributions are permitted. Commercial
8  * redistribution of larger works derived from, or works which bundle
9  * this software requires a "Commercial Redistribution License"; see
10  * http://www.quadcap.com/purchase.
11  *
12  * Redistributions qualify as "Open Source" under one of the following terms:
13  *
14  * Redistributions are made at no charge beyond the reasonable cost of
15  * materials and delivery.
16  *
17  * Redistributions are accompanied by a copy of the Source Code or by an
18  * irrevocable offer to provide a copy of the Source Code for up to three
19  * years at the cost of materials and delivery. Such redistributions
20  * must allow further use, modification, and redistribution of the Source
21  * Code under substantially the same terms as this license.
22  *
23  * Redistributions of source code must retain the copyright notices as they
24  * appear in each source code file, these license terms, and the
25  * disclaimer/limitation of liability set forth as paragraph 6 below.
26  *
27  * Redistributions in binary form must reproduce this Copyright Notice,
28  * these license terms, and the disclaimer/limitation of liability set
29  * forth as paragraph 6 below, in the documentation and/or other materials
30  * provided with the distribution.
31  *
32  * The Software is provided on an "AS IS" basis. No warranty is
33  * provided that the Software is free of defects, or fit for a
34  * particular purpose.
35  *
36  * Limitation of Liability. Quadcap Software shall not be liable
37  * for any damages suffered by the Licensee or any third party resulting
38  * from use of the Software.
39  */

40
41 import java.io.*;
42 import java.util.*;
43
44 import java.net.Socket JavaDoc;
45 import java.net.URL JavaDoc;
46 import java.net.URLEncoder JavaDoc;
47 import java.net.URLConnection JavaDoc;
48
49 import org.xml.sax.InputSource JavaDoc;
50
51 import com.quadcap.http.util.HeaderParser;
52
53 import com.quadcap.util.collections.ArrayQueue;
54
55 import com.quadcap.util.text.OctetMap;
56 import com.quadcap.util.text.Scanner;
57
58 import com.quadcap.util.Debug;
59 import com.quadcap.util.Util;
60
61 import com.quadcap.io.IO;
62 import com.quadcap.io.LimitedInputStream;
63 import com.quadcap.io.NullOutputStream;
64
65 public class HttpFetcher {
66     static boolean checkLinks = false;
67     static boolean showResponseHeaders = false;
68
69     static byte[] delims = { 0x0d, 0x0a, 0x0d, 0x0a };
70
71     public static byte[] fetch(String JavaDoc url) throws Exception JavaDoc {
72         return fetch(url, new ArrayList());
73     }
74     
75     public static byte[] fetch(String JavaDoc url, List headers) throws Exception JavaDoc {
76     InputStream is = fetchStream(url, headers);
77     byte[] doc = readStream(is);
78     is.close();
79     return doc;
80     }
81
82     public static byte[] post(String JavaDoc url, String JavaDoc fileName,
83                               List headers) throws Exception JavaDoc {
84     InputStream is = postStream(url, fileName, headers);
85     byte[] doc = readStream(is);
86     is.close();
87     return doc;
88         
89     }
90
91     public static InputStream postStream(String JavaDoc url, String JavaDoc fileName,
92                                          List headers) throws Exception JavaDoc {
93     if (url.indexOf("http://") != 0) {
94         System.err.println("Bad url (protocol): " + url);
95         return null;
96     }
97     url = url.substring(7);
98     int idx = url.indexOf('/');
99     if (idx <= 0) {
100             url = url + "/";
101             idx = url.indexOf('/');
102     }
103     String JavaDoc host = url.substring(0, idx);
104     String JavaDoc name = url.substring(idx);
105     int port = 80;
106     idx = host.indexOf(':');
107     if (idx >= 0) {
108         port = Integer.parseInt(host.substring(idx+1));
109         host = host.substring(0, idx);
110     }
111     Socket JavaDoc s = new Socket JavaDoc(host, port);
112
113         headers.add("Content-Length: " +
114                     String.valueOf(new File(fileName).length()));
115
116     OutputStream sos = s.getOutputStream();
117         BufferedOutputStream os = new BufferedOutputStream(sos);
118     os.write(("POST " + name + " HTTP/1.0\r\n").getBytes());
119         Iterator iter = headers.iterator();
120         while (iter.hasNext()) {
121             IO.write(os, iter.next().toString());
122             os.write("\r\n".getBytes());
123         }
124         os.write("\r\n".getBytes());
125                      
126         FileInputStream fis = new FileInputStream(fileName);
127         IO.copyStream(fis, os);
128         os.flush();
129
130         InputStream is = s.getInputStream();
131     return new BufferedInputStream(is);
132     }
133     
134     public static InputStream fetchStream(String JavaDoc url) throws Exception JavaDoc {
135         return fetchStream(url, new ArrayList());
136     }
137
138     public static void ripPlayList(InputStream is, List headers)
139         throws Exception JavaDoc
140     {
141         BufferedReader br = new BufferedReader(
142             new InputStreamReader(is));
143         String JavaDoc line;
144         while ((line = br.readLine()) != null) {
145             String JavaDoc[] v = line.split("=");
146             if (v.length == 2 && v[0].equals("File1")) {
147                 is.close();
148                 ripStream(v[1], headers);
149                 return;
150             }
151         }
152         is.close();
153     }
154     
155     public static void ripStream(String JavaDoc url, List headers)
156         throws Exception JavaDoc
157     {
158         InputStream is = fetchStream(url, headers);
159         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
160         for (int c = is.read(); c != '\n'; c = is.read()) {
161             sb.append((char)c);
162         }
163         Debug.println("ripStream(" + url + "), Response: " + sb);
164         Map hdrs = HeaderParser.parseHeaders(is);
165         Debug.println("Headers = " + hdrs);
166         if (sb.toString().indexOf("302") > 0) {
167             is.close();
168             Debug.println("redirecting to: " + hdrs.get("location"));
169             ripStream(hdrs.get("location").toString(), headers);
170             return;
171         }
172         String JavaDoc contentType = String.valueOf(hdrs.get("content-type"));
173         if (contentType.equalsIgnoreCase("audio/x-scpls")) {
174             ripPlayList(is, headers);
175             return;
176         }
177         int metaInt = 0;
178         try {
179             metaInt = Integer.parseInt(String.valueOf(hdrs.get("icy-metaint")));
180         } catch (Throwable JavaDoc t) {
181         }
182         byte[] buf = new byte[metaInt];
183         String JavaDoc title = null;
184         String JavaDoc lastTitle = "__INVALID_lastTitle__";
185         Mp3FrameStream out = new Mp3FrameStream();
186         FileOutputStream fout = null;
187         while (true) {
188             int cnt = is.read(buf);
189             while (cnt < buf.length) {
190                 if (cnt < 0) {
191                     if (out != null) {
192                         out.close();
193                     }
194                     Debug.println("partial buffer, returning... (" + cnt + ")");
195                     return;
196                 }
197                 cnt += is.read(buf, cnt, buf.length - cnt);
198             }
199             title = getTitle(is).replace('/', ' ');
200             if (title.length() > 0) {
201                 if (!title.equals(lastTitle)) {
202                     Debug.println("Title: " + title);
203                     if (fout == null) {
204                         // Start a new file
205
fout = new FileOutputStream(title);
206                         out.init(fout, new NullOutputStream());
207                         out.write(buf);
208                     } else {
209                         // split the difference
210
out.write(buf, 0, metaInt/2);
211                         out.close();
212                         fout = new FileOutputStream(title);
213                         out.init(fout, new NullOutputStream());
214                         out.write(buf, metaInt/2, metaInt/2);
215                     }
216                     lastTitle = title;
217                 }
218             } else {
219                 if (fout != null) {
220                     out.write(buf);
221                 }
222             }
223         }
224     }
225
226     public static String JavaDoc getTitle(InputStream in) throws IOException {
227         byte[] buf = new byte[in.read() * 16];
228         in.read(buf);
229         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
230         for (int i = 0; i < buf.length && buf[i] != 0; i++) {
231             sb.append((char)(buf[i]));
232         }
233         String JavaDoc[] p = sb.toString().split(";");
234         for (int i = 0; i < p.length; i++) {
235             String JavaDoc[] v = p[i].trim().split("=");
236             if (v.length == 2 && v[0].equalsIgnoreCase("StreamTitle")) {
237                 String JavaDoc s = v[1].substring(1, v[1].length()-1);
238                 while (s.toLowerCase().endsWith(".mp3")) {
239                     s = s.substring(0, s.length() - 4);
240                 }
241                 s += ".mp3";
242                 return s;
243             }
244         }
245         return "";
246     }
247     
248     public static InputStream fetchStream(String JavaDoc url, List headers)
249         throws IOException
250     {
251     //Debug.println(0, "Fetch: " + url);
252
if (url.indexOf("http://") != 0) {
253         System.err.println("Bad url (protocol): " + url);
254         return null;
255     }
256     url = url.substring(7);
257     int idx = url.indexOf('/');
258     if (idx <= 0) {
259             url = url + "/";
260             idx = url.length() - 1;
261     }
262     String JavaDoc host = url.substring(0, idx);
263     String JavaDoc name = url.substring(idx);
264     int port = 80;
265     idx = host.indexOf(':');
266     if (idx >= 0) {
267         port = Integer.parseInt(host.substring(idx+1));
268         host = host.substring(0, idx);
269     }
270     Socket JavaDoc s = new Socket JavaDoc(host, port);
271
272     ByteArrayOutputStream bos = new ByteArrayOutputStream();
273         Debug.println("GET " + name);
274     bos.write(("GET " + name + " HTTP/1.0\r\n").getBytes());
275         Iterator iter = headers.iterator();
276         while (iter.hasNext()) {
277             String JavaDoc hdr = iter.next().toString();
278             IO.write(bos, hdr);
279             Debug.println(" " + hdr);
280             bos.write('\r');
281             bos.write('\n');
282         }
283         bos.write("\r\n".getBytes());
284
285         s.getOutputStream().write(bos.toByteArray());
286
287     InputStream is = s.getInputStream();
288     return new BufferedInputStream(is);
289     }
290
291     public static InputStream fetch2(String JavaDoc url) throws Exception JavaDoc {
292     System.out.println("Fetch: " + url);
293     URLConnection JavaDoc c = new URL JavaDoc(url).openConnection();
294     c.connect();
295     return c.getInputStream();
296     }
297
298     public static byte[] readStream(InputStream is) throws IOException {
299     ByteArrayOutputStream bos = new ByteArrayOutputStream();
300
301     int state = 0;
302     int cnt = 0;
303         if (showResponseHeaders) state = 5;
304     while (state < 4) {
305         int c = is.read();
306         if (c < 0) {
307         throw new IOException("unexpected eof in message headers");
308         }
309         if (delims[state] == c) state++;
310         else if (delims[0] == c) state = 1;
311         else state = 0;
312     }
313
314     byte[] buf = new byte[1024];
315     while ((cnt = is.read(buf)) > 0) {
316         bos.write(buf, 0, cnt);
317     }
318     return bos.toByteArray();
319     }
320
321     public static Hashtable buildTable(String JavaDoc fname) throws Exception JavaDoc {
322     BufferedReader r = new BufferedReader(new FileReader(fname));
323     String JavaDoc turl;
324     Hashtable t = new Hashtable();
325     while ((turl = r.readLine()) != null) {
326             url = turl;
327         byte[] doc = fetch(url);
328         System.err.println(url + ": " + Util.strBytes(doc));
329         t.put(url, doc);
330     }
331     return t;
332     }
333     
334     public static void checkTable(String JavaDoc fname, Hashtable t) throws Exception JavaDoc {
335     BufferedReader r = new BufferedReader(new FileReader(fname));
336     String JavaDoc url;
337     while ((url = r.readLine()) != null) {
338         try {
339         byte[] doc = fetch(url);
340         byte[] exp = (byte[])t.get(url);
341         if (Util.compareBytes(doc, exp) != 0) {
342             System.err.println("Failed: " + url);
343             System.err.println("Doc: " + Util.strBytes(doc));
344         }
345         } catch (Exception JavaDoc e) {
346         Debug.print(e);
347         }
348     }
349     }
350
351     public static void addAV(String JavaDoc fname) throws Exception JavaDoc {
352     BufferedReader r = new BufferedReader(new FileReader(fname));
353     String JavaDoc url;
354     Hashtable t = new Hashtable();
355     while ((url = r.readLine()) != null) {
356         System.out.println("url: " + url);
357         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(
358         "http://add-url.altavista.com/cgi-bin/newurl?ad=1&q=");
359         sb.append(URLEncoder.encode(url));
360         byte[] doc = fetch(sb.toString());
361         String JavaDoc s = new String JavaDoc(doc);
362         if (s.indexOf("The page was fetched") < 0) {
363         System.out.println(s);
364         break;
365         }
366         try { Thread.sleep(2000); } catch (Throwable JavaDoc dt) {}
367     }
368     }
369     
370     public static void check(String JavaDoc name) throws Exception JavaDoc {
371         LinkChecker lc = new LinkChecker(name);
372         lc.run();
373         lc.printBadLinks();
374     }
375     
376     public static void main(String JavaDoc args[]) {
377     Debug.debugMode = Debug.debugAll;
378     Debug.debugStream = System.out;
379     try {
380         doit(args);
381     } catch (Exception JavaDoc e) {
382         System.out.println("Exception: " + e.toString());
383         Debug.print(e);
384     }
385     }
386
387     static String JavaDoc fname = null;
388     static int repeat = 2;
389     static int delay = 0;
390     static Hashtable t;
391     static boolean times = false;
392     static boolean rip = false;
393     static String JavaDoc url = null;
394     static int limit = 0;
395
396     public static void doit() throws Exception JavaDoc {
397         if (times) {
398             for (int i = 0; i < repeat; i++) {
399                 fetch(url);
400             }
401         } else {
402             for (int i = 0; i < repeat; i++) {
403                 checkTable(fname, t);
404                 if (delay > 0) Thread.sleep(delay);
405             }
406         }
407     }
408     
409     public static void doit (String JavaDoc args[]) throws Exception JavaDoc {
410     int numThreads = 1;
411     boolean av = false;
412         String JavaDoc post = null;
413         List headers = new ArrayList();
414
415     int ac = 0;
416         while (ac < args.length) {
417         String JavaDoc arg = args[ac].trim();
418             if (arg.charAt(0) != '-') break;
419             ac++;
420         if (arg.equals("-urls")) {
421         fname = args[ac++];
422         } else if (arg.equals("-count")) {
423         repeat = Integer.parseInt(args[ac++]);
424         } else if (arg.equals("-delay")) {
425         delay = Integer.parseInt(args[ac++]);
426         } else if (arg.equals("-threads")) {
427         numThreads = Integer.parseInt(args[ac++]);
428         } else if (arg.equals("-checklinks")) {
429         checkLinks = true;
430         fname = args[ac++];
431             } else if (arg.equals("-headers")) {
432         showResponseHeaders = true;
433             } else if (arg.equals("-post")) {
434                 post = args[ac++];
435             } else if (arg.equals("-header")) {
436                 String JavaDoc hName = args[ac++];
437                 String JavaDoc hVal = args[ac++];
438                 headers.add(hName + ": " + hVal);
439         } else if (arg.equals("-altavista")) {
440         av = true;
441         fname = args[ac++];
442             } else if (arg.equals("-times")) {
443                 times = true;
444             } else if (arg.equals("-limit")) {
445                 limit = Integer.parseInt(args[ac++]);
446             } else if (arg.equals("-rip")) {
447                 if (!rip) {
448                     headers.add("Host: 192.168.1.8");
449                     headers.add("User-Agent: WinampMPEG/2.8");
450                     headers.add("Accept: */*");
451                     headers.add("Icy-Metadata:1");
452                     headers.add("Connection: close");
453                 }
454                 rip = true;
455         } else {
456         //throw new Exception("??");
457
}
458     }
459
460     if (av) {
461         addAV(fname);
462     } else if (checkLinks) {
463         check(fname);
464         return;
465     } else if (fname == null && !times) {
466         url = args[ac];
467             if (rip) {
468                 ripStream(url, headers);
469                 return;
470             }
471             if (post == null) {
472                 InputStream in = fetchStream(url, headers);
473                 if (limit > 0) {
474                     in = new LimitedInputStream(in, limit);
475                 }
476                 try {
477                     IO.copyStream(in, System.out);
478                 } finally {
479                     in.close();
480                 }
481             } else {
482                 byte[] doc = post(url, post, headers);
483                 System.out.write(doc);
484             }
485         } else {
486             t = buildTable(fname);
487             Thread JavaDoc[] threads = new Thread JavaDoc[numThreads];
488             for (int i = 0; i < numThreads; i++) {
489                 threads[i] = new Thread JavaDoc() {
490                     public void run() {
491                         try {
492                             doit();
493                         } catch (Throwable JavaDoc t) {
494                             Debug.print(t);
495                         }
496                     }
497                 };
498             }
499             long start = System.currentTimeMillis();
500             for (int i = 0; i < numThreads; i++) {
501                 threads[i].start();
502             }
503             for (int i = 0; i < numThreads; i++) {
504                 try {
505                     threads[i].join();
506                 } catch (Throwable JavaDoc t) {
507                     Debug.print(t);
508                 }
509             }
510             long stop = System.currentTimeMillis();
511             long elap = stop - start;
512             int r_s = (int)((repeat * numThreads * 1000) / elap);
513             if (times) {
514                 System.out.println("" + elap + " elapsed");
515                 System.out.println("" + r_s + " requests/second");
516             }
517         }
518     }
519 }
520
Popular Tags