1 16 17 package org.apache.jetspeed.services.urlmanager; 18 19 import java.io.BufferedInputStream ; 21 import java.io.BufferedReader ; 22 import java.io.File ; 23 import java.io.InputStreamReader ; 24 import java.io.IOException ; 25 import java.io.Reader ; 26 import java.io.UnsupportedEncodingException ; 27 import java.net.HttpURLConnection ; 28 import java.net.MalformedURLException ; 29 import java.net.URL ; 30 import java.net.URLConnection ; 31 import java.util.Hashtable ; 32 import java.util.Vector ; 33 34 import org.apache.jetspeed.services.resources.JetspeedResources; 36 37 import org.apache.jetspeed.cache.disk.DiskCacheEntry; 39 import org.apache.jetspeed.cache.disk.DiskCacheUtils; 40 import org.apache.jetspeed.cache.disk.JetspeedDiskCache; 41 import org.apache.jetspeed.services.logging.JetspeedLogFactoryService; 42 import org.apache.jetspeed.services.logging.JetspeedLogger; 43 44 57 public class URLFetcher 58 { 59 62 private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(URLFetcher.class.getName()); 63 64 67 private static Hashtable realtime_urls = new Hashtable (); 68 69 72 static final boolean shouldFetchNow = 73 JetspeedResources.getBoolean( JetspeedResources.CACHE_REQUIRE_CACHED_KEY ); 74 75 static { 76 java.net.HttpURLConnection.setFollowRedirects(true); 78 } 79 80 public static final Reader fetch( String url ) throws IOException { 81 return fetch ( url, false ); 82 } 83 84 85 93 public static final Reader fetch( String url, 94 boolean force ) throws IOException { 95 96 if ( ! URLManager.isOK( url ) ) { 97 throw new URLNotAvailableException( url ); 98 } 99 100 if( force == false && DiskCacheUtils.isCached( url ) == true) 102 { 103 logger.info( "The url " + 104 url + 105 " is fetched from the Cache" ); 106 return JetspeedDiskCache.getInstance().getEntry( url ).getReader(); 107 } 108 109 if ( shouldFetchNow && 111 DiskCacheUtils.isCached( url ) == false && 112 isRealtimeURL( url ) == false && 113 force == false ) { 114 115 logger.info( "The url " + 116 url + 117 " is not in the cache and will be fetched now because you have configured -> " + 118 JetspeedResources.CACHE_REQUIRE_CACHED_KEY ); 119 120 JetspeedDiskCache.getInstance().refresh( url ); 123 124 throw new ContentNotAvailableException( url ); 126 } 127 128 if( isRealtimeURL( url ) == true ) { 129 addRealtimeURL( url ); 130 synchronized(url.intern()) 131 { 132 try 133 { 134 url.intern().wait(); 136 } catch (InterruptedException e) 137 { 138 logger.info("Wait Interrupted"); 139 } finally 140 { 141 removeRealtimeURL( url ); 142 } 143 } 144 return URLFetcher.fetch( url, force ); 146 } else { 147 addRealtimeURL( url ); 148 } 149 try { 150 151 URL content; 152 153 String protocol = url.substring(0, url.indexOf(":/")); 155 156 String proxyHost = URLManager.getProxyHost( protocol ); 158 if (proxyHost != null) 159 { 160 content = new URL (protocol, 162 proxyHost, 163 URLManager.getProxyPort( protocol ), 164 url); 165 } 166 else 167 { 168 content = new URL ( url ); 169 } 170 171 URLConnection conn = content.openConnection(); 172 return getReader( conn ); 173 174 } catch ( Throwable t ) { 175 176 String reason = ""; 177 178 if ( t instanceof MalformedURLException ) { 179 reason = "The URL is Malformed."; 180 } else { 181 reason = t.toString(); 182 } 183 184 if ( DiskCacheUtils.isCached( url ) == false ) { 187 URLManager.register( url, URLManagerService.STATUS_BAD, reason ); 190 } else { 191 JetspeedDiskCache.getInstance().remove(url); 194 } 195 196 197 throw new URLNotAvailableException( reason, url ); 198 199 } finally { 200 removeRealtimeURL( url ); 201 } 202 203 } 204 205 206 214 public static final boolean refresh( String url) throws IOException { 215 216 if ( ! URLManager.isOK( url ) ) { 217 if( DiskCacheUtils.isCached(url) ) 218 JetspeedDiskCache.getInstance().remove(url); 219 throw new URLNotAvailableException( url ); 220 } 221 222 if(isRealtimeURL(url)) { 223 return false; 224 } 225 226 227 DiskCacheEntry dce = null; 228 if( DiskCacheUtils.isCached(url) ) { 229 try { 230 dce = JetspeedDiskCache.getInstance().getEntry( url ); 231 if(!dce.hasExpired()) 232 { 233 return false; 234 } 235 addRealtimeURL( url ); 236 237 URL sock; 240 241 String protocol = url.substring(0, url.indexOf(":/")); 243 244 String proxyHost = URLManager.getProxyHost( protocol ); 246 if (proxyHost != null) 247 { 248 sock = new URL (protocol, 250 proxyHost, 251 URLManager.getProxyPort( protocol ), 252 url); 253 } 254 else 255 { 256 sock = new URL ( url ); 257 } 258 259 URLConnection conn = null; 260 conn = sock.openConnection(); 261 262 File file = dce.getFile(); 263 long mod = dce.getLastModified(); 264 long filesize = 0; 265 if(file != null) 266 { 267 filesize = file.length(); 268 } 269 270 if(mod > 0 || filesize > 0) 271 conn.setIfModifiedSince(mod); 272 273 conn.connect(); 274 long last = conn.getLastModified(); 275 long expires = conn.getExpiration(); 276 int clength = conn.getContentLength(); 277 int respCode = 200; 278 if(conn instanceof HttpURLConnection ) { 279 respCode = ( ( HttpURLConnection )conn ).getResponseCode(); 280 } 281 282 if (respCode != 304 && 283 (clength == -1 || clength > 0) && 284 ( last == 0 || 285 last > dce.getLastModified()) ) { 286 287 logger.info( "URLFetcher: Found updated URL: " + 288 url + 289 " Modified " + last + " Expires: " + expires + 290 " CLength: " + clength ); 291 292 294 JetspeedDiskCache.getInstance().getEntry( url, getReader( conn ) ); 295 conn.getInputStream().close(); 297 298 if(last > 0) 301 dce.setLastModified(last); 302 else 303 dce.setLastModified( System.currentTimeMillis() ); 304 dce.setExpirationTime(expires); 305 306 307 return true; 309 } else { 312 313 if(last > 0) 314 dce.setLastModified(last); 315 else 316 dce.setLastModified( System.currentTimeMillis() ); 317 dce.setExpirationTime(expires); 318 319 320 logger.info( "DiskCacheDaemon: URL still valid: " + url + 321 " Modified " + last + " Expires: " + expires + 322 " CLength: " + clength); 323 return false; 325 } 326 } catch (Throwable e) { 327 logger.error("Throwable", e); 329 URLManager.register( url, 330 URLManagerService.STATUS_BAD, 331 e.toString() ); 332 } finally { 333 removeRealtimeURL( url ); 334 } 335 336 } else { 337 logger.info( "URLFetcher: Cache miss during validation! Forcing url: " + url ); 338 removeRealtimeURL( url ); 339 JetspeedDiskCache.getInstance().getEntry( url, true ); 340 return true; 341 } 342 return false; 343 344 } 345 346 347 356 static final Reader getReader( URLConnection conn ) 357 throws IOException , UnsupportedEncodingException { 358 String enc = conn.getContentEncoding(); 359 if( enc == null ) { 360 enc = "ISO-8859-1"; 361 } 362 BufferedInputStream is = new BufferedInputStream ( conn.getInputStream() ); 367 370 is.mark( 20480 ); 373 BufferedReader asciiReader = new BufferedReader ( new InputStreamReader ( is, "ASCII" ) ); 374 String decl = asciiReader.readLine(); 375 String key = "encoding=\""; 377 if( decl != null ) { 379 int off = decl.indexOf( key ); 380 if( off > 0 ) { 381 enc = decl.substring( off + key.length(), 382 decl.indexOf( '"' , off + key.length()) ); 383 } 384 } 385 logger.info("URLFetcher: found URL with encoding -> " + enc ); 386 is.reset(); 388 Reader rdr = new InputStreamReader ( is, 389 enc ); 390 return rdr; 391 } 392 393 394 395 398 static final void addRealtimeURL( String url ) { 399 synchronized( realtime_urls ) 400 { 401 Vector threads = (Vector ) realtime_urls.get( url); 402 if(threads != null) 403 { 404 if(!threads.contains(Thread.currentThread())) 405 { 406 threads.addElement(Thread.currentThread() ); 407 } 408 } else { 409 threads = new Vector (); 410 threads.addElement(Thread.currentThread()); 411 realtime_urls.put( url, threads ); 412 } 413 } 414 415 } 416 417 420 static final void removeRealtimeURL( String url ) { 421 synchronized( realtime_urls ) 422 { 423 Vector threads = (Vector ) realtime_urls.get( url); 424 if(threads != null) 425 synchronized( threads ) 426 { 427 Thread realLoader = (Thread ) threads.firstElement(); 428 if(realLoader == Thread.currentThread()) 429 { 430 synchronized(url.intern()) 431 { 432 realtime_urls.remove(url); 433 url.intern().notifyAll(); 434 } 435 } else { 436 threads.removeElement(Thread.currentThread()); 437 } 438 } 439 } 440 441 } 442 443 446 static final boolean isRealtimeURL( String url ) { 447 448 synchronized( realtime_urls ) { 449 return realtime_urls.get( url ) != null; 450 } 451 452 } 453 454 457 public static final Hashtable getRealtimeURLs() { 458 synchronized(realtime_urls) { 459 return realtime_urls; 460 } 461 } 462 463 } 464 | Popular Tags |