KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > jetspeed > services > urlmanager > URLFetcher


1 /*
2  * Copyright 2000-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.jetspeed.services.urlmanager;
18
19 //standard Java stuff
20
import java.io.BufferedInputStream JavaDoc;
21 import java.io.BufferedReader JavaDoc;
22 import java.io.File JavaDoc;
23 import java.io.InputStreamReader JavaDoc;
24 import java.io.IOException JavaDoc;
25 import java.io.Reader JavaDoc;
26 import java.io.UnsupportedEncodingException JavaDoc;
27 import java.net.HttpURLConnection JavaDoc;
28 import java.net.MalformedURLException JavaDoc;
29 import java.net.URL JavaDoc;
30 import java.net.URLConnection JavaDoc;
31 import java.util.Hashtable JavaDoc;
32 import java.util.Vector JavaDoc;
33
34 //turbine stuff
35
import org.apache.jetspeed.services.resources.JetspeedResources;
36
37 //jetspeed stuff
38
import org.apache.jetspeed.cache.disk.DiskCacheEntry;
39 import org.apache.jetspeed.cache.disk.DiskCacheUtils;
40 import org.apache.jetspeed.cache.disk.JetspeedDiskCache;
41 import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
42 import org.apache.jetspeed.services.logging.JetspeedLogger;
43
44 /**
45 <p>
46 Handles fetching URLs and if for some reason anything happens add it to the
47 BadURLManager. There are also some util methods for downloading URLs that don't
48 use the Disk Cache.
49 </p>
50
51
52
53 @author <a HREF="mailto:burton@apache.org">Kevin A. Burton</a>
54 @author <a HREF="mailto:sgala@hisitech.com">Santiago Gala</a>
55 @version $Id: URLFetcher.java,v 1.14 2004/02/23 03:30:47 jford Exp $
56 */

57 public class URLFetcher
58 {
59     /**
60      * Static initialization of the logger for this class
61      */

62     private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(URLFetcher.class.getName());
63     
64     /**
65     URLs that Jetspeed is currently trying to fetch in real time.
66     */

67     private static Hashtable JavaDoc realtime_urls = new Hashtable JavaDoc();
68
69     /**
70      *
71      */

72     static final boolean shouldFetchNow =
73         JetspeedResources.getBoolean( JetspeedResources.CACHE_REQUIRE_CACHED_KEY );
74     
75     static {
76         //Looking for redirected channels...
77
java.net.HttpURLConnection.setFollowRedirects(true);
78     }
79
80     public static final Reader JavaDoc fetch( String JavaDoc url ) throws IOException JavaDoc {
81         return fetch ( url, false );
82     }
83
84     
85     /**
86     Try and fetch a URL as and get the content as a String and possibly add
87     the URL to the BadURLManager if anything goes wrong.
88     
89     @param url The URL to fetch
90     @param force if set to true then do not use force this entry to be in the cache...
91                  IE do not use CACHE_REQUIRE_CACHED
92     */

93     public static final Reader JavaDoc fetch( String JavaDoc url,
94                                            boolean force ) throws IOException JavaDoc {
95
96         if ( ! URLManager.isOK( url ) ) {
97             throw new URLNotAvailableException( url );
98         }
99
100         //SGP
101
if( force == false && DiskCacheUtils.isCached( url ) == true)
102         {
103             logger.info( "The url " +
104                       url +
105                       " is fetched from the Cache" );
106             return JetspeedDiskCache.getInstance().getEntry( url ).getReader();
107         }
108         
109         //do cache required checking
110
if ( shouldFetchNow &&
111              DiskCacheUtils.isCached( url ) == false &&
112              isRealtimeURL( url ) == false &&
113              force == false ) {
114
115             logger.info( "The url " +
116                       url +
117                       " is not in the cache and will be fetched now because you have configured -> " +
118                       JetspeedResources.CACHE_REQUIRE_CACHED_KEY );
119                  
120             //it is possible that two thread request the same URL.
121
//The refresh call in JetspeedDiskCache takes care of this.
122
JetspeedDiskCache.getInstance().refresh( url );
123             
124             //thow an Exception that this isn't in the cache.
125
throw new ContentNotAvailableException( url );
126         }
127
128         if( isRealtimeURL( url ) == true ) {
129             addRealtimeURL( url );
130           synchronized(url.intern())
131           {
132              try
133              {
134                //We wait for other thread to load
135
url.intern().wait();
136              } catch (InterruptedException JavaDoc e)
137              {
138                logger.info("Wait Interrupted");
139              } finally
140              {
141                   removeRealtimeURL( url );
142                }
143           }
144             // We try again
145
return URLFetcher.fetch( url, force );
146         } else {
147             addRealtimeURL( url );
148         }
149         try {
150             
151             URL JavaDoc content;
152
153         // Determine the URL's protocol
154
String JavaDoc protocol = url.substring(0, url.indexOf(":/"));
155         
156         // Check if a proxy is set. If no port is set, use the default port (-1)
157
String JavaDoc proxyHost = URLManager.getProxyHost( protocol );
158             if (proxyHost != null)
159             {
160                 // Open the URL using a proxy
161
content = new URL JavaDoc(protocol,
162                                   proxyHost,
163                                   URLManager.getProxyPort( protocol ),
164                                   url);
165             }
166             else
167             {
168                 content = new URL JavaDoc( url );
169             }
170
171             URLConnection JavaDoc conn = content.openConnection();
172             return getReader( conn );
173             
174         } catch ( Throwable JavaDoc t ) {
175             
176             String JavaDoc reason = "";
177             
178             if ( t instanceof MalformedURLException JavaDoc ) {
179                 reason = "The URL is Malformed.";
180             } else {
181                 reason = t.toString();
182             }
183             
184             //if the URL couldn't be fetched because it is remote AND
185
//it is not in the cache, add it to the bad URL list.
186
if ( DiskCacheUtils.isCached( url ) == false ) {
187                 //Reported up there...
188
//logger.error( t );
189
URLManager.register( url, URLManagerService.STATUS_BAD, reason );
190             } else {
191             //it is in the cache, remove it (could be broken in cache).
192
//next time we could be luckier.
193
JetspeedDiskCache.getInstance().remove(url);
194             }
195
196
197             throw new URLNotAvailableException( reason, url );
198
199         } finally {
200             removeRealtimeURL( url );
201         }
202
203     }
204
205
206     /**
207     Try and fetch a URL if the copy in the cache has expired and add
208     the URL to the BadURLManager if anything goes wrong.
209     
210     @param url The URL to fetch
211     @param force if set to true then do not use force this entry to be in the cache...
212                  IE do not use CACHE_REQUIRE_CACHED
213     */

214     public static final boolean refresh( String JavaDoc url) throws IOException JavaDoc {
215         
216         if ( ! URLManager.isOK( url ) ) {
217             if( DiskCacheUtils.isCached(url) )
218                 JetspeedDiskCache.getInstance().remove(url);
219             throw new URLNotAvailableException( url );
220         }
221         
222         if(isRealtimeURL(url)) {
223             return false;
224         }
225
226             
227          DiskCacheEntry dce = null;
228          if( DiskCacheUtils.isCached(url) ) {
229              try {
230                  dce = JetspeedDiskCache.getInstance().getEntry( url );
231                  if(!dce.hasExpired())
232                  {
233                          return false;
234                  }
235                  addRealtimeURL( url );
236
237                  //only update this if the URL on which it is based is newer
238
//than the one on disk.
239
URL JavaDoc sock;
240                    
241                  // Determine the URL's protocol
242
String JavaDoc protocol = url.substring(0, url.indexOf(":/"));
243            
244                  // Check if a proxy is set. If no port is set, use the default port (-1)
245
String JavaDoc proxyHost = URLManager.getProxyHost( protocol );
246                  if (proxyHost != null)
247                  {
248                      // Open the URL using a proxy
249
sock = new URL JavaDoc(protocol,
250                                     proxyHost,
251                                     URLManager.getProxyPort( protocol ),
252                                     url);
253                  }
254                  else
255                  {
256                      sock = new URL JavaDoc( url );
257                  }
258
259                  URLConnection JavaDoc conn = null;
260                  conn = sock.openConnection();
261
262                  File JavaDoc file = dce.getFile();
263                  long mod = dce.getLastModified();
264                  long filesize = 0;
265                  if(file != null)
266                  {
267                      filesize = file.length();
268                  }
269
270                  if(mod > 0 || filesize > 0)
271                      conn.setIfModifiedSince(mod);
272                    
273                  conn.connect();
274                  long last = conn.getLastModified();
275                  long expires = conn.getExpiration();
276                  int clength = conn.getContentLength();
277                  int respCode = 200;
278                  if(conn instanceof HttpURLConnection JavaDoc) {
279                      respCode = ( ( HttpURLConnection JavaDoc )conn ).getResponseCode();
280                  }
281                    
282                  if (respCode != 304 /*NOT MODIFIED*/ &&
283                      (clength == -1 || clength > 0) &&
284                      ( last == 0 ||
285                        last > dce.getLastModified()) ) {
286
287                      logger.info( "URLFetcher: Found updated URL: " +
288                                url +
289                                " Modified " + last + " Expires: " + expires +
290                                " CLength: " + clength );
291                 
292                      //force this URL to update.
293

294                      JetspeedDiskCache.getInstance().getEntry( url, getReader( conn ) );
295                      //Trying to deal with a problem under FreeBSD
296
conn.getInputStream().close();
297
298                      //Set the last modified and expiration times for entry
299
//FIXME: 0 is used in FileWatcher to mean not initialized...
300
if(last > 0)
301                          dce.setLastModified(last);
302                      else
303                          dce.setLastModified( System.currentTimeMillis() );
304                      dce.setExpirationTime(expires);
305
306
307                      //removeRealtimeURL( url ); (done in finally)
308
return true;
309                      //now make sure that the entry that depends on this HREF
310
//is updated in the PortletFactory.
311
} else {
312
313                      if(last > 0)
314                          dce.setLastModified(last);
315                      else
316                          dce.setLastModified( System.currentTimeMillis() );
317                      dce.setExpirationTime(expires);
318                            
319                        
320                      logger.info( "DiskCacheDaemon: URL still valid: " + url +
321                                " Modified " + last + " Expires: " + expires +
322                                " CLength: " + clength);
323                      //removeRealtimeURL( url ); (done in finally)
324
return false;
325                  }
326              } catch (Throwable JavaDoc e) {
327                  //Add as a Bad URL
328
logger.error("Throwable", e);
329                  URLManager.register( url,
330                                       URLManagerService.STATUS_BAD,
331                                       e.toString() );
332              } finally {
333                  removeRealtimeURL( url );
334              }
335                    
336          } else {
337              logger.info( "URLFetcher: Cache miss during validation! Forcing url: " + url );
338              removeRealtimeURL( url );
339              JetspeedDiskCache.getInstance().getEntry( url, true );
340              return true;
341          }
342            return false;
343                     
344     }
345
346
347     /**
348      *
349      * Return a Reader for a given HTTP connection.
350      * If the connection first line contains a XML declaration
351      * with encoding, honor this encoding.
352      * If not, use the encoding from the HTTP connection,
353      * taking ISO-8859-1 as default.
354      *
355     */

356     static final Reader JavaDoc getReader( URLConnection JavaDoc conn )
357         throws IOException JavaDoc, UnsupportedEncodingException JavaDoc {
358         String JavaDoc enc = conn.getContentEncoding();
359         if( enc == null ) {
360             enc = "ISO-8859-1";
361         }
362         // Some XML files come with a encoding attribute inside,
363
// different than the HTTP encoding. We will have
364
// to start reading the Reader, read the attribute and rewind
365
// the stream, generating a new reader with the "true" encoding
366
BufferedInputStream JavaDoc is = new BufferedInputStream JavaDoc( conn.getInputStream() );
367         //If document is XML, find the encoding and give it priority over
368
//the one returned by the connection
369

370         //we mark for resetting later. We need a big number to ensure
371
// stack of streams don't read it to fill buffers.
372
is.mark( 20480 );
373         BufferedReader JavaDoc asciiReader = new BufferedReader JavaDoc( new InputStreamReader JavaDoc( is, "ASCII" ) );
374         String JavaDoc decl = asciiReader.readLine();
375         //System.err.println( "Line: " + decl );
376
String JavaDoc key = "encoding=\"";
377         //decl nul means that the connection got reset...
378
if( decl != null ) {
379             int off = decl.indexOf( key );
380             if( off > 0 ) {
381                 enc = decl.substring( off + key.length(),
382                                       decl.indexOf( '"' , off + key.length()) );
383             }
384         }
385         logger.info("URLFetcher: found URL with encoding -> " + enc );
386         //Reset the bytes read
387
is.reset();
388         Reader JavaDoc rdr = new InputStreamReader JavaDoc( is,
389                                             enc );
390         return rdr;
391     }
392
393
394     
395     /**
396     Add a URL that is downloading in realtime
397     */

398     static final void addRealtimeURL( String JavaDoc url ) {
399         synchronized( realtime_urls )
400         {
401             Vector JavaDoc threads = (Vector JavaDoc) realtime_urls.get( url);
402             if(threads != null)
403                {
404                 if(!threads.contains(Thread.currentThread()))
405                    {
406                      threads.addElement(Thread.currentThread() );
407                    }
408                } else {
409                 threads = new Vector JavaDoc();
410                 threads.addElement(Thread.currentThread());
411                 realtime_urls.put( url, threads );
412                }
413         }
414         
415     }
416     
417     /**
418     Remove a URL because it isn't downloading anymore.
419     */

420     static final void removeRealtimeURL( String JavaDoc url ) {
421         synchronized( realtime_urls )
422         {
423            Vector JavaDoc threads = (Vector JavaDoc) realtime_urls.get( url);
424            if(threads != null)
425                synchronized( threads )
426                    {
427                     Thread JavaDoc realLoader = (Thread JavaDoc) threads.firstElement();
428                     if(realLoader == Thread.currentThread())
429                     {
430                       synchronized(url.intern())
431                      {
432                       realtime_urls.remove(url);
433                       url.intern().notifyAll();
434                       }
435                      } else {
436                      threads.removeElement(Thread.currentThread());
437                      }
438                     }
439         }
440         
441     }
442
443     /**
444     Return true if this URL isn't downloading in realtime.
445     */

446     static final boolean isRealtimeURL( String JavaDoc url ) {
447
448         synchronized( realtime_urls ) {
449             return realtime_urls.get( url ) != null;
450         }
451             
452     }
453
454     /**
455     Return the list of realtime URLs for debug
456     */

457     public static final Hashtable JavaDoc getRealtimeURLs() {
458         synchronized(realtime_urls) {
459             return realtime_urls;
460         }
461     }
462     
463 }
464
Popular Tags