1 6 package net.sourceforge.cvsgrab; 7 8 import java.io.BufferedInputStream ; 9 import java.io.ByteArrayOutputStream ; 10 import java.io.File ; 11 import java.io.FileOutputStream ; 12 import java.io.IOException ; 13 import java.io.InputStream ; 14 import java.io.StringReader ; 15 import java.net.InetAddress ; 16 import java.net.UnknownHostException ; 17 import java.util.Iterator ; 18 import java.util.Properties ; 19 import java.util.StringTokenizer ; 20 import java.util.zip.GZIPInputStream ; 21 22 import net.sourceforge.cvsgrab.util.PasswordField; 23 24 import org.apache.commons.httpclient.Header; 25 import org.apache.commons.httpclient.HttpClient; 26 import org.apache.commons.httpclient.HttpMethod; 27 import org.apache.commons.httpclient.HttpMethodBase; 28 import org.apache.commons.httpclient.HttpRecoverableException; 29 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; 30 import org.apache.commons.httpclient.NTCredentials; 31 import org.apache.commons.httpclient.URIException; 32 import org.apache.commons.httpclient.UsernamePasswordCredentials; 33 import org.apache.commons.httpclient.cookie.CookiePolicy; 34 import org.apache.commons.httpclient.methods.GetMethod; 35 import org.apache.commons.httpclient.util.URIUtil; 36 import org.apache.xerces.parsers.DOMParser; 37 import org.apache.xerces.xni.parser.XMLInputSource; 38 import org.cyberneko.html.HTMLConfiguration; 39 import org.w3c.dom.Document ; 40 import org.xml.sax.SAXNotRecognizedException ; 41 import org.xml.sax.SAXNotSupportedException ; 42 43 50 public class WebBrowser { 51 52 private static WebBrowser _instance = new WebBrowser(); 53 54 private HttpClient _client; 55 private DOMParser _htmlParser; 56 57 60 public static WebBrowser getInstance() { 61 return _instance; 62 } 63 64 public static String forceFinalSlash(String s) { 65 if (!s.endsWith("/")) { 66 return s + "/"; 67 } 68 return s; 69 } 70 71 public static String removeFinalSlash(String s) { 72 if (s != null && s.endsWith("/")) { 73 return s.substring(0, s.length()-1); 74 } 75 return s; 76 } 77 78 public static String addQueryParam(String url, String queryParam) { 79 String newUrl = url; 80 if (queryParam != null) { 81 if (newUrl.indexOf('?') > 0) { 82 newUrl += "&"; 83 } else { 84 newUrl += "?"; 85 } 86 newUrl += queryParam; 87 } 88 return newUrl; 89 } 90 91 public static String addQueryParam(String url, String paramName, String paramValue) { 92 String newUrl = url; 93 if (paramName != null && paramValue != null) { 94 if (newUrl.indexOf('?') > 0) { 95 newUrl += "&"; 96 } else { 97 newUrl += "?"; 98 } 99 try { 100 newUrl += paramName + "=" + URIUtil.encodeQuery(paramValue); 101 } catch (URIException e) { 102 e.printStackTrace(); 103 throw new RuntimeException ("Cannot encode parameter value " + paramValue); 104 } 105 } 106 return newUrl; 107 } 108 109 114 public static Properties getQueryParams(String urlQuery) { 115 Properties p = new Properties (); 116 StringTokenizer st = new StringTokenizer (urlQuery, "?&;"); 117 while (st.hasMoreTokens()) { 118 String part = st.nextToken(); 119 String key = part.substring(0, part.indexOf('=')); 120 String value = part.substring(part.indexOf('=') + 1); 121 p.put(key, value); 122 } 123 return p; 124 } 125 126 131 public static String toQueryParams(Properties queryItems) { 132 StringBuffer sb = new StringBuffer (); 133 for (Iterator i = queryItems.keySet().iterator(); i.hasNext();) { 134 String key = (String ) i.next(); 135 String value = queryItems.getProperty(key); 136 sb.append(key); 137 sb.append('='); 138 sb.append(value); 139 if (i.hasNext()) { 140 sb.append('&'); 141 } 142 } 143 return sb.toString(); 144 } 145 146 149 public WebBrowser() { 150 super(); 151 CookiePolicy.setDefaultPolicy(CookiePolicy.COMPATIBILITY); 152 _client = new HttpClient(); 153 _client.setConnectionTimeout(5000); 154 _htmlParser = new DOMParser(new HTMLConfiguration()); 155 try { 156 _htmlParser.setProperty("http://cyberneko.org/html/properties/names/elems", "upper"); 157 _htmlParser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower"); 158 _htmlParser.setFeature("http://apache.org/xml/features/scanner/notify-builtin-refs", true); 159 _htmlParser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true); 160 _htmlParser.setFeature("http://xml.org/sax/features/namespaces", false); 161 } catch (SAXNotRecognizedException e) { 162 e.printStackTrace(); 163 } catch (SAXNotSupportedException e) { 164 e.printStackTrace(); 165 } 166 } 167 168 177 public void useProxy(String proxyHost, int proxyPort, final String ntDomain, final String userName, String password) { 178 CVSGrab.getLog().info("Using proxy " + proxyHost + ":" + proxyPort); 179 _client.getHostConfiguration().setProxy(proxyHost, proxyPort); 180 if (userName != null) { 181 if (password == null ) { 182 PasswordField pwdField = new PasswordField(); 183 try { 184 password = pwdField.getPassword("Enter the password for the proxy: "); 185 } catch (IOException ex) { 186 ex.printStackTrace(); 187 } 188 } 189 if (ntDomain == null) { 190 CVSGrab.getLog().info("Login on the proxy with user name " + userName); 191 _client.getState().setProxyCredentials(null, proxyHost, 192 new UsernamePasswordCredentials(userName, password)); 193 } else { 194 try { 195 String host = InetAddress.getLocalHost().getHostName(); 196 CVSGrab.getLog().info("Login on the NT proxy with user name " + userName 197 + ", host " + host + ", NT domain " + ntDomain); 198 _client.getState().setProxyCredentials(null, proxyHost, 199 new NTCredentials(userName, password, host, ntDomain)); 200 } catch (UnknownHostException ex) { 201 ex.printStackTrace(); 202 } 203 } 204 } 205 } 206 207 213 public void useWebAuthentification(final String userName, String password) { 214 CVSGrab.getLog().info("Login on the web server with user name " + userName + " and password " + password); 215 if (password == null ) { 216 PasswordField pwdField = new PasswordField(); 217 try { 218 password = pwdField.getPassword("Enter the password for the web server: "); 219 } catch (IOException ex) { 220 ex.printStackTrace(); 221 } 222 } 223 _client.getState().setCredentials(null, null, 224 new UsernamePasswordCredentials(userName, password)); 225 } 226 227 230 public void useMultithreading() { 231 _client = new HttpClient(new MultiThreadedHttpConnectionManager()); 232 } 233 234 241 public HttpMethod executeMethod(HttpMethod method, String url) { 242 int statusCode = -1; 243 int attempt = 0; 244 245 try { 246 method.setRequestHeader("User-Agent", "cvsgrab (http://cvsgrab.sourceforge.net)"); 247 method.setRequestHeader("Cache-Control", "no-cache"); 248 method.setRequestHeader("Accept-Encoding", "gzip"); 249 250 while ((statusCode == -1) && (attempt < 3)) { 252 try { 253 statusCode = _client.executeMethod(method); 255 CVSGrab.getLog().trace("Executed method " + url + " with status code " + statusCode); 256 } catch (HttpRecoverableException e) { 257 CVSGrab.getLog().warn("A recoverable exception occurred, retrying. " + e.getMessage()); 258 } catch (IOException e) { 259 CVSGrab.getLog().error("Failed to download file " + url); 260 e.printStackTrace(); 261 throw new RuntimeException ("Failed to download file " + url); 262 } 263 } 264 265 if (statusCode == -1) { 267 CVSGrab.getLog().error("Failed to recover from exception."); 268 throw new RuntimeException ("Error when reading " + url); 269 } 270 271 if (statusCode >= 400) { 272 CVSGrab.getLog().debug("Page not found (error " + statusCode + ")"); 273 throw new RuntimeException ("Error " + statusCode + " when reading " + url); 274 } 275 276 if ((statusCode >= 300) && (statusCode < 400)) { 278 Header locationHeader = method.getResponseHeader("location"); 279 280 if (locationHeader != null) { 281 String redirectLocation = locationHeader.getValue(); 282 283 method.releaseConnection(); 284 CVSGrab.getLog().debug("Redirect to " + redirectLocation); 285 286 HttpMethod redirectMethod = new GetMethod(redirectLocation); 287 288 executeMethod(redirectMethod, redirectLocation); 289 290 return redirectMethod; 291 } else { 292 CVSGrab.getLog().error("Page not found"); 296 throw new RuntimeException ("Error when reading " + url); 297 } 298 } 299 } catch (RuntimeException e) { 300 method.releaseConnection(); 301 throw e; 302 } 303 304 return method; 305 } 306 307 313 public String getResponse(HttpMethod method, String url) { 314 HttpMethod lastMethod = executeMethod(method, url); 315 String response = null; 316 try { 317 String contentEncoding = null; 319 if (lastMethod.getResponseHeader("Content-Encoding") != null) { 320 contentEncoding = lastMethod.getResponseHeader("Content-Encoding").getValue(); 321 } 322 if (contentEncoding != null && contentEncoding.toLowerCase().indexOf("gzip") >= 0) { 323 try { 324 InputStream inStream = lastMethod.getResponseBodyAsStream(); 325 if (inStream != null) { 326 inStream = new GZIPInputStream (lastMethod.getResponseBodyAsStream()); 327 if (inStream != null) { 328 response = getResponseContent(lastMethod, inStream); 329 } 330 } 331 } 332 catch (IOException e) { 333 CVSGrab.getLog().error("I/O failure reading response body", e); 334 } 335 } else { 336 try { 337 response = getResponseContent(lastMethod, lastMethod.getResponseBodyAsStream()); 338 } catch (IOException e) { 339 CVSGrab.getLog().error("I/O failure reading response body", e); 340 } 341 } 342 } finally { 343 lastMethod.releaseConnection(); 344 } 345 return response; 346 } 347 348 private String getResponseContent(HttpMethod lastMethod, InputStream inStream) throws IOException { 349 String response; 350 ByteArrayOutputStream outstream = new ByteArrayOutputStream (); 351 byte[] buffer = new byte[4096]; 352 int len; 353 while ((len = inStream.read(buffer)) > 0) { 354 outstream.write(buffer, 0, len); 355 } 356 outstream.close(); 357 response = new String (outstream.toByteArray(), 358 ((HttpMethodBase) lastMethod).getResponseCharSet()); 359 return response; 360 } 361 362 368 public Document getDocument(String url) throws Exception { 369 if (url.endsWith("/browse/")) { 371 if (url.indexOf("netbeans.org") >= 0) { 372 throw new Exception ("This url " + url + " doesn't work on Netbeans.org"); 373 } 374 if (url.indexOf("dev.java.net") >= 0) { 375 throw new Exception ("This url " + url + " doesn't work on dev.java.net"); 376 } 377 } 378 return getDocument(new GetMethod(url), url); 379 } 380 381 387 public Document getDocument(HttpMethod method, String url) throws Exception { 388 String response = getResponse(method, url); 389 return getDocumentFromSource(response); 390 } 391 392 public Document getDocumentFromSource(String docSource) throws Exception { 393 int pos = 0; 395 do { 396 pos = docSource.indexOf("xmlns", pos); 397 if (pos > 0) { 398 int eq = docSource.indexOf('=', pos); 399 int lt = docSource.indexOf('<', pos); 400 int gt = docSource.indexOf('>', pos); 401 if (eq > 0 && eq < gt && gt < lt) { 402 docSource = docSource.substring(0, pos) + docSource.substring(gt); 403 } 404 } 405 } while (pos > 0); 406 407 XMLInputSource source = new XMLInputSource(null, null, null, new StringReader (docSource), null); 408 409 _htmlParser.parse(source); 410 411 Document doc = _htmlParser.getDocument(); 412 return doc; 413 } 414 415 public void loadFile(String url, File destFile) throws Exception { 416 loadFile(new GetMethod(url), destFile, url); 417 } 418 419 public void loadFile(HttpMethod method, File destFile, String url) throws Exception { 420 HttpMethod lastMethod = executeMethod(method, url); 421 String contentEncoding = null; 422 if (lastMethod.getResponseHeader("Content-Encoding") != null) { 423 contentEncoding = lastMethod.getResponseHeader("Content-Encoding").getValue(); 424 } 425 try { 426 FileOutputStream out = null; 427 InputStream in = new BufferedInputStream (lastMethod.getResponseBodyAsStream()); 428 if (contentEncoding != null && contentEncoding.toLowerCase().indexOf("gzip") >= 0) { 429 in = new GZIPInputStream (lastMethod.getResponseBodyAsStream()); 430 } 431 try { 432 out = new FileOutputStream (destFile); 433 434 byte[] buffer = new byte[8 * 1024]; 435 int count = 0; 436 do { 437 out.write(buffer, 0, count); 438 count = in.read(buffer, 0, buffer.length); 439 } while (count != -1); 440 } finally { 441 if (out != null) { 442 out.close(); 443 } 444 if (in != null) { 445 in.close(); 446 } 447 } 448 } finally { 449 lastMethod.releaseConnection(); 450 } 451 } 452 453 } 454 | Popular Tags |