1 package net.matuschek.http; 2 3 6 7 import java.io.BufferedInputStream ; 8 import java.io.BufferedWriter ; 9 import java.io.IOException ; 10 import java.io.InputStream ; 11 import java.io.OutputStreamWriter ; 12 import java.net.InetAddress ; 13 import java.net.URL ; 14 import java.net.UnknownHostException ; 15 import java.text.ParseException ; 16 import java.text.SimpleDateFormat ; 17 import java.util.Date ; 18 import java.util.Locale ; 19 import java.util.TimeZone ; 20 import java.util.Properties ; 21 22 import net.matuschek.http.connection.HttpConnection; 23 import net.matuschek.http.connection.HttpsHelper; 24 import net.matuschek.http.cookie.Cookie; 25 import net.matuschek.http.cookie.CookieException; 26 import net.matuschek.http.cookie.CookieManager; 27 import net.matuschek.http.cookie.MemoryCookieManager; 28 import net.matuschek.util.Base64; 29 import net.matuschek.util.ByteBuffer; 30 import net.matuschek.util.ChunkedInputStream; 31 import net.matuschek.util.LimitedBandwidthStream; 32 33 import org.apache.log4j.Category; 34 35 59 public class HttpTool { 60 61 62 final static byte CR = 13; 63 64 65 final static byte LF = 10; 66 67 68 final static String HTTP_VERSION="HTTP/1.1"; 69 70 71 72 73 public final static int STATUS_CONNECTING=0; 74 75 public final static int STATUS_CONNECTED=1; 76 77 public final static int STATUS_RETRIEVING=2; 78 79 public final static int STATUS_DONE=3; 80 81 public final static int STATUS_DENIEDBYRULE=4; 82 83 84 private final static int DEFAULT_HTTPPORT = 80; 85 86 87 private final static int DEFAULT_HTTPSPORT = 443; 88 89 90 private final static String AGENTNAME = 91 "JoBo/1.4beta " 92 +"(http://www.matuschek.net/jobo.html)"; 93 94 98 private final static int DEFAULT_UPDATEINTERVAL =1024; 99 100 101 private final static int DEFAULT_SOCKETTIMEOUT=20; 102 103 104 private String agentName = AGENTNAME; 105 106 107 private String referer = null; 108 109 110 private String fromAddress = null; 111 112 113 private Date modifyDate = null; 114 115 119 private int bandwidth = 0; 120 121 122 private InetAddress proxyAddr = null; 123 124 125 private int proxyPort = 0; 126 127 128 private String proxyDescr=""; 129 130 131 private int socketTimeout = DEFAULT_SOCKETTIMEOUT; 132 133 134 private boolean cookiesEnabled = true; 135 136 137 private Category log = null; 138 139 140 private Properties userInfos = new Properties (); 141 142 143 144 145 146 151 private int updateInterval = DEFAULT_UPDATEINTERVAL; 152 153 158 private HttpToolCallback callback=null; 159 160 164 private DownloadRuleSet downloadRules = null; 165 166 169 private CookieManager cookieManager = null; 170 171 174 static SimpleDateFormat df; 175 176 private NTLMAuthorization ntlmAuthorization = null; 177 178 183 static { 184 TimeZone local = TimeZone.getDefault(); 185 TimeZone gmt = TimeZone.getTimeZone("GMT"); 186 TimeZone.setDefault(gmt); 187 df = new SimpleDateFormat ("EEE, dd MMM yyyy HH:mm:ss z", Locale.US); 188 TimeZone.setDefault(local); 189 } 190 191 192 197 public HttpTool() { 198 this.cookieManager = new MemoryCookieManager(); 199 log = Category.getInstance(getClass().getName()); 200 } 201 202 203 207 public void setReferer(String referer) { 208 this.referer = referer; 209 } 210 211 215 public void setAgentName(String name) { 216 this.agentName = name; 217 } 218 219 223 public String getAgentName() { 224 return agentName; 225 } 226 227 231 public void finish() { 232 if (cookieManager != null) { 233 cookieManager.finish(); 234 } 235 } 236 237 243 public void setDownloadRuleSet(DownloadRuleSet rules) { 244 this.downloadRules=rules; 245 } 246 247 248 252 public DownloadRuleSet getDownloadRuleSet() { 253 return this.downloadRules; 254 } 255 256 257 262 public int getTimeout() { 263 return this.socketTimeout; 264 } 265 266 267 273 public void setTimeout(int timeout) { 274 this.socketTimeout = timeout; 275 } 276 277 278 283 public void setEnableCookies(boolean enable) { 284 this.cookiesEnabled=enable; 285 } 286 287 291 public boolean getEnableCookies() { 292 return this.cookiesEnabled; 293 } 294 295 296 300 public void setProxy(String proxyDescr) 301 throws HttpException 302 { 303 proxyAddr=null; 304 proxyPort=0; 305 String proxyHost = null; 306 307 if ((proxyDescr != null) && 308 (! proxyDescr.equals(""))) { 309 int pos = proxyDescr.indexOf(":"); 310 if (pos > 0) { 311 try { 312 String port = proxyDescr.substring(pos+1); 313 proxyHost = proxyDescr.substring(0,pos); 314 proxyPort = Integer.parseInt(port); 315 proxyAddr = InetAddress.getByName(proxyHost); 316 } catch (NumberFormatException e) { 317 throw new HttpException("Proxy definition incorrect, "+ 318 "port not numeric: "+ 319 proxyDescr); 320 } catch (UnknownHostException e) { 321 throw new HttpException("Host not found: "+proxyHost); 322 } 323 } else { 324 throw new HttpException("Proxy definition incorrect, "+ 325 "fomat must be host:port: "+ 326 proxyDescr); 327 } 328 } 329 this.proxyDescr=proxyDescr; 330 } 331 332 333 337 public String getProxy() { 338 return proxyDescr; 339 } 340 341 342 348 public void setIfModifiedSince(Date modifyDate) { 349 this.modifyDate=modifyDate; 350 } 351 352 353 358 public Date getIfModifiedSince() { 359 return this.modifyDate; 360 } 361 362 363 367 public void setFromAddress(String fromAddress) { 368 this.fromAddress=fromAddress; 369 } 370 371 372 376 public HttpToolCallback getCallback() { 377 return callback; 378 } 379 380 381 385 public int getBandwidth() { 386 return bandwidth; 387 } 388 389 390 394 public void setBandwidth(int bandwidth) { 395 this.bandwidth = bandwidth; 396 } 397 398 399 409 public void setCallback(HttpToolCallback callback) { 410 this.callback = callback; 411 } 412 413 414 419 public int getUpdateInterval() { 420 return updateInterval; 421 } 422 423 424 437 public void setUpdateInterval(int updateInterval) { 438 if (updateInterval > 0) { 439 this.updateInterval = updateInterval; 440 } else { 441 throw new IllegalArgumentException ("updateInterval must be > 0 (was "+ 442 updateInterval+")"); 443 } 444 } 445 446 453 public void setCookieManager(CookieManager cm) { 454 this.cookieManager = cm; 455 } 456 457 458 463 public CookieManager getCookieManager() { 464 return this.cookieManager; 465 } 466 467 468 471 public void clearCookies() { 472 if (cookieManager != null) { 473 cookieManager.clear(); 474 } 475 } 476 477 478 492 public HttpDoc retrieveDocument(URL u, int method, String parameters) throws HttpException { 493 DocAndConnection docAndConnection = retrieveDocumentInternal(u, method, parameters, null, null); 494 HttpDoc doc = docAndConnection != null ? docAndConnection.httpDoc : null; 495 if (doc != null && doc.getHttpCode() == 401) { 496 String authProtName = NTLMAuthorization.WWW_AUTHENTICATE_HEADER; 497 String authProtValue = doc.getHeaderValue(authProtName); 498 if (authProtValue == null) { 499 authProtName = NTLMAuthorization.PROXY_AUTHENTICATE_HEADER; 500 authProtValue = doc.getHeaderValue(authProtName); 501 } 502 if (authProtValue.indexOf(NTLMAuthorization.NTLM_TAG)>=0 || 503 authProtValue.indexOf("Negotiate")>=0) { 504 505 try { 506 NTLMAuthorization authorization = (NTLMAuthorization) ntlmAuthorization.clone(); 508 authorization.setHost(u.getHost()); 509 String auth = authorization.getRequest(); 511 docAndConnection = retrieveDocumentInternal(u, method, parameters, null, auth); 512 513 doc = docAndConnection.httpDoc; 515 authProtValue = doc.getHeaderValue(authProtName); 516 authorization.extractNonce(authProtValue); 517 518 auth = authorization.getResponse(); 520 docAndConnection = retrieveDocumentInternal(u, method, parameters, docAndConnection.httpConnection, auth); 521 if (docAndConnection != null) { 522 doc = docAndConnection.httpDoc; 523 if (docAndConnection.httpConnection != null) { 524 docAndConnection.httpConnection.close(); 525 } 526 } else { 527 doc = null; } 529 530 } catch (Exception e) { 531 log.error("NTLM-Authentication Error: " + e.getMessage()); 532 throw new HttpException(e.getMessage()); 533 } 534 } 535 } 536 return doc; 537 } 538 539 542 protected class DocAndConnection { 543 HttpDoc httpDoc; 544 HttpConnection httpConnection; 545 } 546 547 557 protected DocAndConnection retrieveDocumentInternal(URL u, int method, String parameters, 558 HttpConnection httpConn, String ntlmAuthorizationInfo) 559 throws HttpException 560 { 561 String host = null; 562 InetAddress addr = null; 563 String path = null; 564 String requestPath = null; 565 String protocol = null; 566 String userinfo = null; 567 boolean chunkedEncoding = false; 568 boolean secureConnection = false; 569 ChunkedInputStream chunkStream=null; 570 571 int docSize = -1; 573 574 int port = 0; 575 HttpDoc doc = new HttpDoc(); 576 int i = 0; 577 578 doc.setURL(u); 580 581 ByteBuffer buff = new ByteBuffer(); 583 584 587 InputStream is = null; 588 BufferedWriter bwrite = null; 589 590 host = u.getHost(); 592 if (host == null) { 593 throw new HttpException("no host part in URL found"); 594 } 595 596 if(! useProxy()) { 600 try { 601 addr = InetAddress.getByName(host); 602 } catch (UnknownHostException e) { 603 addr = null; 604 } 605 if (addr == null) { 606 throw new HttpException("host part (" + host + ") does not resolve"); 607 } 608 } 609 610 path = u.getFile(); 612 if (path.equals("")) { 613 path = "/"; 614 } 615 path=path.replaceAll(" ","%20"); 617 618 port = u.getPort(); 620 protocol = u.getProtocol().toLowerCase(); 621 if (protocol.equals("http")) { 622 if (port == -1) { 623 port = DEFAULT_HTTPPORT; 624 } 625 } else if (protocol.equals("https")) { 626 if (port == -1) { 627 port = DEFAULT_HTTPSPORT; 628 } 629 secureConnection=true; 630 } else { 631 throw new HttpException("protocol " + protocol + " not supported"); 632 } 633 634 if (useProxy() && (! secureConnection)) { 637 requestPath="http://"+host+path; 638 } else { 639 requestPath=path; 640 } 641 642 userinfo = u.getUserInfo(); 644 if (userinfo != null) { 645 if (userinfo.equals("")) { 646 userinfo=null; 647 } else { 648 userInfos.setProperty(host,userinfo); 650 } 651 } else { 652 userinfo=userInfos.getProperty(host); 654 } 655 656 657 if (callback != null) { 658 callback.setHttpToolDocUrl(u.toString()); 659 callback.setHttpToolStatus(STATUS_CONNECTING); 660 } 661 662 try { 664 if (httpConn == null) { 665 if (secureConnection) { 669 HttpsHelper helper = new HttpsHelper(proxyAddr,proxyPort,useProxy()); 670 httpConn = helper.createHttpsConnection(host,port); 671 } else { 672 if (useProxy()) { 673 httpConn = HttpConnection.createConnection(proxyAddr, 674 proxyPort, 675 socketTimeout*1000); 676 } else { 677 httpConn = HttpConnection.createConnection(addr, 678 port, 679 socketTimeout*1000); 680 } 681 } 682 } 683 684 is = new LimitedBandwidthStream( 685 new BufferedInputStream (httpConn.getInputStream(), 256), 686 bandwidth); 687 bwrite = new BufferedWriter ( 688 new OutputStreamWriter (httpConn.getOutputStream())); 689 690 if (callback != null) { 691 callback.setHttpToolStatus(STATUS_CONNECTED); 692 } 693 694 695 if (method == HttpConstants.GET) { 698 bwrite.write("GET "); 699 bwrite.write(requestPath); 700 if ((parameters != null) 701 && (! parameters.equals(""))) { 702 bwrite.write("?"); 703 bwrite.write(parameters); 704 } 705 706 } else if (method == HttpConstants.POST) { 707 bwrite.write("POST " + requestPath); 708 } else { 709 throw new HttpException("HTTP method " + method + " not supported"); 710 } 711 712 bwrite.write(" "); 714 bwrite.write(HTTP_VERSION); 715 bwrite.write("\r\n"); 716 717 if (referer != null) { 719 bwrite.write("Referer: " + referer + "\r\n"); 720 } 721 722 if (cookiesEnabled) { 724 String cookieString = cookieManager.cookiesForURL(u); 725 if (cookieString != null) { 726 bwrite.write("Cookie: "); 727 bwrite.write(cookieString); 728 bwrite.write("\r\n"); 729 log.debug("Cookie request header: "+cookieString); 730 } 731 } 732 733 bwrite.write("Host: " + host + "\r\n"); 735 bwrite.write("User-Agent: " + agentName + "\r\n"); 736 bwrite.write("Accept: */*\r\n"); 737 if (ntlmAuthorizationInfo == null) { 738 bwrite.write("Connection: close\r\n"); 739 } else { 740 bwrite.write("Connection: keep-alive\r\n"); 741 } 742 743 if (fromAddress != null) { 745 bwrite.write("From: "+fromAddress+"\r\n"); 746 } 747 748 if (userinfo != null) { 751 userinfo = userinfo.replace('%','@'); 754 bwrite.write("Authorization: Basic "); 755 bwrite.write(Base64.encode(userinfo)); 756 bwrite.write("\r\n"); 757 log.debug(userinfo); 758 759 } 760 761 if (ntlmAuthorizationInfo != null) { 762 bwrite.write("Authorization: NTLM "); 763 bwrite.write(ntlmAuthorizationInfo); 764 bwrite.write("\r\n"); 765 } 766 767 768 if (modifyDate != null) { 770 String dateStr = df.format(modifyDate); 771 772 bwrite.write("If-Modified-Since: "); 773 bwrite.write(dateStr); 774 bwrite.write("\r\n"); 775 log.debug("If-Modified-Since header: "+dateStr); 776 } 777 778 if (method == HttpConstants.POST) { 780 bwrite.write("Content-Type: application/x-www-form-urlencoded\r\n"); 781 bwrite.write("Content-Length: "+parameters.length()+"\r\n"); 782 } 783 784 bwrite.write("\r\n"); 786 if (method == HttpConstants.POST) { 788 bwrite.write(parameters); 789 } 790 bwrite.flush(); 791 792 if (callback != null) { 793 callback.setHttpToolStatus(STATUS_RETRIEVING); 794 } 795 796 while ((i = is.read()) != 10) { 798 if (i == -1) { 799 throw new HttpException("Could not get HTTP return code "+ 800 "(buffer content is "+buff.toString()+")"); 801 } 802 buff.append((byte)i); 803 } 804 805 String httpCode = lineString(buff.getContent()); 806 buff.clean(); 807 doc.setHttpCode(httpCode); 808 809 810 boolean finishedHeaders = false; 812 while (!finishedHeaders) { 813 i = is.read(); 814 if (i == -1) { 815 throw new HttpException("Could not read HTTP headers"); 816 } 817 if (i >= 32) { 818 buff.append((byte)i); 819 } 820 if (i == LF) { 822 String line = lineString(buff.getContent()); 823 824 buff.clean(); 825 if (line.trim().equals("")) { 827 finishedHeaders = true; 828 } else { 829 HttpHeader head = new HttpHeader(line); 830 doc.addHeader(head); 831 832 if (cookiesEnabled 833 && head.isSetCookie()) { 834 try { 835 Cookie cookie = new Cookie(head.toLine(),u); 836 cookieManager.add(cookie); 837 log.debug("Got a cookie "+cookie); 838 } catch (CookieException e) { 839 log.info("Could not interpret cookie: "+e.getMessage()); 840 } 841 } 842 843 if (head.getName().equalsIgnoreCase("Transfer-Encoding") 845 && head.getValue().equalsIgnoreCase("chunked")) { 846 chunkedEncoding = true; 847 } 848 849 } 850 } 851 } 852 buff.clean(); 853 854 if (downloadRules != null) { 857 boolean isNotModified = false; 860 if (modifyDate != null) { 861 HttpHeader lastModifiedHeader = doc.getHttpHeader("Last-Modified"); 862 if (lastModifiedHeader != null) { 863 try { 864 Date lastModifiedDate = df.parse(lastModifiedHeader.getValue()); 865 if (lastModifiedDate.compareTo(modifyDate) <= 0) { 866 isNotModified = true; 867 } 868 } catch (ParseException e) {} 869 } 870 } 871 872 if (! downloadRules.downloadAllowed(doc.getHttpHeader()) || isNotModified) { 873 if (doc.isNotModified()) { 874 log.info("If-Not-Modified successfull for: " + u); 875 } else if (isNotModified) { 876 log.info("Header indicates not modified for: " + u); 877 } else { 878 log.info("Download not allowed by download rule."); 879 } 880 httpConn.close(); httpConn = null; 882 883 if (callback != null) { 884 callback.setHttpToolStatus(STATUS_DENIEDBYRULE); 885 } 886 return null; 887 } 888 } 889 890 891 if (chunkedEncoding) { 893 chunkStream = new ChunkedInputStream(is); 894 } 895 896 897 HttpHeader contentLength = doc.getHeader(HttpHeader.CONTENT_LENGTH); 899 if (contentLength != null) { 900 901 try { 902 docSize = Integer.parseInt(contentLength.getValue()); 903 } catch (NumberFormatException e) { 904 log.error("Got a malformed Content-Length header from the server"); 905 docSize = -1; 906 } 907 908 if (callback != null) { 910 callback.setHttpToolDocSize(docSize); 911 } 912 913 if (docSize > 0) { 916 buff.setSize(docSize); 917 } 918 } 919 920 boolean finished = false; 922 int count=0; 923 924 while (! finished) { 925 926 if (chunkedEncoding) { 927 i = chunkStream.read(); 928 } else { 929 i = is.read(); 930 } 931 932 if (i == -1) { 933 finished = true; 936 } else { 937 buff.append((byte)i); 938 count++; 939 } 940 941 942 945 if (count == docSize) { 948 finished = true; 949 } 950 951 if (chunkedEncoding) { 954 if (chunkStream.isDone()) { 955 finished=true; 956 } 957 } 958 959 960 if (callback != null) { 962 if (((buff.length() % updateInterval) == 0) 963 || finished) { 964 callback.setHttpToolDocCurrentSize(buff.length()); 965 } 966 } 967 968 969 } 970 971 doc.setContent(buff.getContent()); 972 973 if (ntlmAuthorizationInfo == null) { 974 httpConn.close(); httpConn = null; 978 } 979 980 if (callback != null) { 981 callback.setHttpToolStatus(STATUS_DONE); 982 } 983 984 } catch (IOException e) { 985 throw new HttpException(e.getMessage()); 986 } 987 988 DocAndConnection docAndConnection = new DocAndConnection(); 989 docAndConnection.httpDoc = doc; 990 docAndConnection.httpConnection = httpConn; 991 992 return docAndConnection; 993 } 994 995 996 997 1001 protected boolean useProxy() { 1002 return (proxyAddr != null); 1003 } 1004 1005 1006 1010 protected String lineString(byte[] b) { 1011 if (b.length == 0) { 1012 return ""; 1013 } 1014 1015 if (b[b.length-1] != CR) { 1016 return new String (b); 1017 } else { 1018 return new String (b,0,b.length-1); 1019 } 1020 } 1021 1022public void setNtlmAuthorization(NTLMAuthorization ntlmAuthorization) { 1023 this.ntlmAuthorization = ntlmAuthorization; 1024} 1025 1026public NTLMAuthorization getNtlmAuthorization() { 1027 return ntlmAuthorization; 1028} 1029 1030} 1031 | Popular Tags |