1 package com.quadcap.http.client; 2 3 40 41 import java.io.*; 42 import java.util.*; 43 44 import java.net.Socket ; 45 import java.net.URL ; 46 import java.net.URLEncoder ; 47 import java.net.URLConnection ; 48 49 import org.xml.sax.InputSource ; 50 51 import com.quadcap.http.util.HeaderParser; 52 53 import com.quadcap.util.collections.ArrayQueue; 54 55 import com.quadcap.util.text.OctetMap; 56 import com.quadcap.util.text.Scanner; 57 58 import com.quadcap.util.Debug; 59 import com.quadcap.util.Util; 60 61 import com.quadcap.io.IO; 62 import com.quadcap.io.LimitedInputStream; 63 import com.quadcap.io.NullOutputStream; 64 65 public class HttpFetcher { 66 static boolean checkLinks = false; 67 static boolean showResponseHeaders = false; 68 69 static byte[] delims = { 0x0d, 0x0a, 0x0d, 0x0a }; 70 71 public static byte[] fetch(String url) throws Exception { 72 return fetch(url, new ArrayList()); 73 } 74 75 public static byte[] fetch(String url, List headers) throws Exception { 76 InputStream is = fetchStream(url, headers); 77 byte[] doc = readStream(is); 78 is.close(); 79 return doc; 80 } 81 82 public static byte[] post(String url, String fileName, 83 List headers) throws Exception { 84 InputStream is = postStream(url, fileName, headers); 85 byte[] doc = readStream(is); 86 is.close(); 87 return doc; 88 89 } 90 91 public static InputStream postStream(String url, String fileName, 92 List headers) throws Exception { 93 if (url.indexOf("http://") != 0) { 94 System.err.println("Bad url (protocol): " + url); 95 return null; 96 } 97 url = url.substring(7); 98 int idx = url.indexOf('/'); 99 if (idx <= 0) { 100 url = url + "/"; 101 idx = url.indexOf('/'); 102 } 103 String host = url.substring(0, idx); 104 String name = url.substring(idx); 105 int port = 80; 106 idx = host.indexOf(':'); 107 if (idx >= 0) { 108 port = Integer.parseInt(host.substring(idx+1)); 109 host = host.substring(0, idx); 110 } 111 Socket s = new Socket (host, port); 112 113 headers.add("Content-Length: " + 114 String.valueOf(new File(fileName).length())); 115 116 OutputStream sos = s.getOutputStream(); 117 BufferedOutputStream os = new BufferedOutputStream(sos); 118 os.write(("POST " + name + " HTTP/1.0\r\n").getBytes()); 119 Iterator iter = headers.iterator(); 120 while (iter.hasNext()) { 121 IO.write(os, iter.next().toString()); 122 os.write("\r\n".getBytes()); 123 } 124 os.write("\r\n".getBytes()); 125 126 FileInputStream fis = new FileInputStream(fileName); 127 IO.copyStream(fis, os); 128 os.flush(); 129 130 InputStream is = s.getInputStream(); 131 return new BufferedInputStream(is); 132 } 133 134 public static InputStream fetchStream(String url) throws Exception { 135 return fetchStream(url, new ArrayList()); 136 } 137 138 public static void ripPlayList(InputStream is, List headers) 139 throws Exception 140 { 141 BufferedReader br = new BufferedReader( 142 new InputStreamReader(is)); 143 String line; 144 while ((line = br.readLine()) != null) { 145 String [] v = line.split("="); 146 if (v.length == 2 && v[0].equals("File1")) { 147 is.close(); 148 ripStream(v[1], headers); 149 return; 150 } 151 } 152 is.close(); 153 } 154 155 public static void ripStream(String url, List headers) 156 throws Exception 157 { 158 InputStream is = fetchStream(url, headers); 159 StringBuffer sb = new StringBuffer (); 160 for (int c = is.read(); c != '\n'; c = is.read()) { 161 sb.append((char)c); 162 } 163 Debug.println("ripStream(" + url + "), Response: " + sb); 164 Map hdrs = HeaderParser.parseHeaders(is); 165 Debug.println("Headers = " + hdrs); 166 if (sb.toString().indexOf("302") > 0) { 167 is.close(); 168 Debug.println("redirecting to: " + hdrs.get("location")); 169 ripStream(hdrs.get("location").toString(), headers); 170 return; 171 } 172 String contentType = String.valueOf(hdrs.get("content-type")); 173 if (contentType.equalsIgnoreCase("audio/x-scpls")) { 174 ripPlayList(is, headers); 175 return; 176 } 177 int metaInt = 0; 178 try { 179 metaInt = Integer.parseInt(String.valueOf(hdrs.get("icy-metaint"))); 180 } catch (Throwable t) { 181 } 182 byte[] buf = new byte[metaInt]; 183 String title = null; 184 String lastTitle = "__INVALID_lastTitle__"; 185 Mp3FrameStream out = new Mp3FrameStream(); 186 FileOutputStream fout = null; 187 while (true) { 188 int cnt = is.read(buf); 189 while (cnt < buf.length) { 190 if (cnt < 0) { 191 if (out != null) { 192 out.close(); 193 } 194 Debug.println("partial buffer, returning... (" + cnt + ")"); 195 return; 196 } 197 cnt += is.read(buf, cnt, buf.length - cnt); 198 } 199 title = getTitle(is).replace('/', ' '); 200 if (title.length() > 0) { 201 if (!title.equals(lastTitle)) { 202 Debug.println("Title: " + title); 203 if (fout == null) { 204 fout = new FileOutputStream(title); 206 out.init(fout, new NullOutputStream()); 207 out.write(buf); 208 } else { 209 out.write(buf, 0, metaInt/2); 211 out.close(); 212 fout = new FileOutputStream(title); 213 out.init(fout, new NullOutputStream()); 214 out.write(buf, metaInt/2, metaInt/2); 215 } 216 lastTitle = title; 217 } 218 } else { 219 if (fout != null) { 220 out.write(buf); 221 } 222 } 223 } 224 } 225 226 public static String getTitle(InputStream in) throws IOException { 227 byte[] buf = new byte[in.read() * 16]; 228 in.read(buf); 229 StringBuffer sb = new StringBuffer (); 230 for (int i = 0; i < buf.length && buf[i] != 0; i++) { 231 sb.append((char)(buf[i])); 232 } 233 String [] p = sb.toString().split(";"); 234 for (int i = 0; i < p.length; i++) { 235 String [] v = p[i].trim().split("="); 236 if (v.length == 2 && v[0].equalsIgnoreCase("StreamTitle")) { 237 String s = v[1].substring(1, v[1].length()-1); 238 while (s.toLowerCase().endsWith(".mp3")) { 239 s = s.substring(0, s.length() - 4); 240 } 241 s += ".mp3"; 242 return s; 243 } 244 } 245 return ""; 246 } 247 248 public static InputStream fetchStream(String url, List headers) 249 throws IOException 250 { 251 if (url.indexOf("http://") != 0) { 253 System.err.println("Bad url (protocol): " + url); 254 return null; 255 } 256 url = url.substring(7); 257 int idx = url.indexOf('/'); 258 if (idx <= 0) { 259 url = url + "/"; 260 idx = url.length() - 1; 261 } 262 String host = url.substring(0, idx); 263 String name = url.substring(idx); 264 int port = 80; 265 idx = host.indexOf(':'); 266 if (idx >= 0) { 267 port = Integer.parseInt(host.substring(idx+1)); 268 host = host.substring(0, idx); 269 } 270 Socket s = new Socket (host, port); 271 272 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 273 Debug.println("GET " + name); 274 bos.write(("GET " + name + " HTTP/1.0\r\n").getBytes()); 275 Iterator iter = headers.iterator(); 276 while (iter.hasNext()) { 277 String hdr = iter.next().toString(); 278 IO.write(bos, hdr); 279 Debug.println(" " + hdr); 280 bos.write('\r'); 281 bos.write('\n'); 282 } 283 bos.write("\r\n".getBytes()); 284 285 s.getOutputStream().write(bos.toByteArray()); 286 287 InputStream is = s.getInputStream(); 288 return new BufferedInputStream(is); 289 } 290 291 public static InputStream fetch2(String url) throws Exception { 292 System.out.println("Fetch: " + url); 293 URLConnection c = new URL (url).openConnection(); 294 c.connect(); 295 return c.getInputStream(); 296 } 297 298 public static byte[] readStream(InputStream is) throws IOException { 299 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 300 301 int state = 0; 302 int cnt = 0; 303 if (showResponseHeaders) state = 5; 304 while (state < 4) { 305 int c = is.read(); 306 if (c < 0) { 307 throw new IOException("unexpected eof in message headers"); 308 } 309 if (delims[state] == c) state++; 310 else if (delims[0] == c) state = 1; 311 else state = 0; 312 } 313 314 byte[] buf = new byte[1024]; 315 while ((cnt = is.read(buf)) > 0) { 316 bos.write(buf, 0, cnt); 317 } 318 return bos.toByteArray(); 319 } 320 321 public static Hashtable buildTable(String fname) throws Exception { 322 BufferedReader r = new BufferedReader(new FileReader(fname)); 323 String turl; 324 Hashtable t = new Hashtable(); 325 while ((turl = r.readLine()) != null) { 326 url = turl; 327 byte[] doc = fetch(url); 328 System.err.println(url + ": " + Util.strBytes(doc)); 329 t.put(url, doc); 330 } 331 return t; 332 } 333 334 public static void checkTable(String fname, Hashtable t) throws Exception { 335 BufferedReader r = new BufferedReader(new FileReader(fname)); 336 String url; 337 while ((url = r.readLine()) != null) { 338 try { 339 byte[] doc = fetch(url); 340 byte[] exp = (byte[])t.get(url); 341 if (Util.compareBytes(doc, exp) != 0) { 342 System.err.println("Failed: " + url); 343 System.err.println("Doc: " + Util.strBytes(doc)); 344 } 345 } catch (Exception e) { 346 Debug.print(e); 347 } 348 } 349 } 350 351 public static void addAV(String fname) throws Exception { 352 BufferedReader r = new BufferedReader(new FileReader(fname)); 353 String url; 354 Hashtable t = new Hashtable(); 355 while ((url = r.readLine()) != null) { 356 System.out.println("url: " + url); 357 StringBuffer sb = new StringBuffer ( 358 "http://add-url.altavista.com/cgi-bin/newurl?ad=1&q="); 359 sb.append(URLEncoder.encode(url)); 360 byte[] doc = fetch(sb.toString()); 361 String s = new String (doc); 362 if (s.indexOf("The page was fetched") < 0) { 363 System.out.println(s); 364 break; 365 } 366 try { Thread.sleep(2000); } catch (Throwable dt) {} 367 } 368 } 369 370 public static void check(String name) throws Exception { 371 LinkChecker lc = new LinkChecker(name); 372 lc.run(); 373 lc.printBadLinks(); 374 } 375 376 public static void main(String args[]) { 377 Debug.debugMode = Debug.debugAll; 378 Debug.debugStream = System.out; 379 try { 380 doit(args); 381 } catch (Exception e) { 382 System.out.println("Exception: " + e.toString()); 383 Debug.print(e); 384 } 385 } 386 387 static String fname = null; 388 static int repeat = 2; 389 static int delay = 0; 390 static Hashtable t; 391 static boolean times = false; 392 static boolean rip = false; 393 static String url = null; 394 static int limit = 0; 395 396 public static void doit() throws Exception { 397 if (times) { 398 for (int i = 0; i < repeat; i++) { 399 fetch(url); 400 } 401 } else { 402 for (int i = 0; i < repeat; i++) { 403 checkTable(fname, t); 404 if (delay > 0) Thread.sleep(delay); 405 } 406 } 407 } 408 409 public static void doit (String args[]) throws Exception { 410 int numThreads = 1; 411 boolean av = false; 412 String post = null; 413 List headers = new ArrayList(); 414 415 int ac = 0; 416 while (ac < args.length) { 417 String arg = args[ac].trim(); 418 if (arg.charAt(0) != '-') break; 419 ac++; 420 if (arg.equals("-urls")) { 421 fname = args[ac++]; 422 } else if (arg.equals("-count")) { 423 repeat = Integer.parseInt(args[ac++]); 424 } else if (arg.equals("-delay")) { 425 delay = Integer.parseInt(args[ac++]); 426 } else if (arg.equals("-threads")) { 427 numThreads = Integer.parseInt(args[ac++]); 428 } else if (arg.equals("-checklinks")) { 429 checkLinks = true; 430 fname = args[ac++]; 431 } else if (arg.equals("-headers")) { 432 showResponseHeaders = true; 433 } else if (arg.equals("-post")) { 434 post = args[ac++]; 435 } else if (arg.equals("-header")) { 436 String hName = args[ac++]; 437 String hVal = args[ac++]; 438 headers.add(hName + ": " + hVal); 439 } else if (arg.equals("-altavista")) { 440 av = true; 441 fname = args[ac++]; 442 } else if (arg.equals("-times")) { 443 times = true; 444 } else if (arg.equals("-limit")) { 445 limit = Integer.parseInt(args[ac++]); 446 } else if (arg.equals("-rip")) { 447 if (!rip) { 448 headers.add("Host: 192.168.1.8"); 449 headers.add("User-Agent: WinampMPEG/2.8"); 450 headers.add("Accept: */*"); 451 headers.add("Icy-Metadata:1"); 452 headers.add("Connection: close"); 453 } 454 rip = true; 455 } else { 456 } 458 } 459 460 if (av) { 461 addAV(fname); 462 } else if (checkLinks) { 463 check(fname); 464 return; 465 } else if (fname == null && !times) { 466 url = args[ac]; 467 if (rip) { 468 ripStream(url, headers); 469 return; 470 } 471 if (post == null) { 472 InputStream in = fetchStream(url, headers); 473 if (limit > 0) { 474 in = new LimitedInputStream(in, limit); 475 } 476 try { 477 IO.copyStream(in, System.out); 478 } finally { 479 in.close(); 480 } 481 } else { 482 byte[] doc = post(url, post, headers); 483 System.out.write(doc); 484 } 485 } else { 486 t = buildTable(fname); 487 Thread [] threads = new Thread [numThreads]; 488 for (int i = 0; i < numThreads; i++) { 489 threads[i] = new Thread () { 490 public void run() { 491 try { 492 doit(); 493 } catch (Throwable t) { 494 Debug.print(t); 495 } 496 } 497 }; 498 } 499 long start = System.currentTimeMillis(); 500 for (int i = 0; i < numThreads; i++) { 501 threads[i].start(); 502 } 503 for (int i = 0; i < numThreads; i++) { 504 try { 505 threads[i].join(); 506 } catch (Throwable t) { 507 Debug.print(t); 508 } 509 } 510 long stop = System.currentTimeMillis(); 511 long elap = stop - start; 512 int r_s = (int)((repeat * numThreads * 1000) / elap); 513 if (times) { 514 System.out.println("" + elap + " elapsed"); 515 System.out.println("" + r_s + " requests/second"); 516 } 517 } 518 } 519 } 520 | Popular Tags |