KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > syndication > fetcher > impl > HttpURLFeedFetcher


1 /*
2  * Copyright 2004 Sun Microsystems, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17 package com.sun.syndication.fetcher.impl;
18
19 import java.io.BufferedInputStream JavaDoc;
20 import java.io.IOException JavaDoc;
21 import java.io.InputStream JavaDoc;
22 import java.net.HttpURLConnection JavaDoc;
23 import java.net.URL JavaDoc;
24 import java.net.URLConnection JavaDoc;
25 import java.util.zip.GZIPInputStream JavaDoc;
26
27 import com.sun.syndication.feed.synd.SyndFeed;
28 import com.sun.syndication.fetcher.FetcherEvent;
29 import com.sun.syndication.fetcher.FetcherException;
30 import com.sun.syndication.io.FeedException;
31 import com.sun.syndication.io.SyndFeedInput;
32 import com.sun.syndication.io.XmlReader;
33
34 /**
35  * <p>Class to retrieve syndication files via HTTP.</p>
36  *
37  * <p>If passed a {@link com.sun.syndication.fetcher.impl.FeedFetcherCache} in the
38  * constructor it will use conditional gets to only retrieve modified content.</p>
39  *
40  * <p>The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where
41  * supported by the server.</p>
42  *
43  * <p>Simple usage:
44  * <pre>
45  * // create the cache
46  * FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache();
47  * // retrieve the feed the first time
48  * // any subsequent request will use conditional gets and only
49  * // retrieve the resource if it has changed
50  * SyndFeed feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl);
51  *</pre>
52  *
53  * </p>
54  *
55  * @see <a HREF="http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers">http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers</a>
56  * @see <a HREF="http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level">http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level</a>
57  * @see <a HREF="http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html">http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html</a>
58  * @author Nick Lothian
59  */

60 public class HttpURLFeedFetcher extends AbstractFeedFetcher {
61     static final int POLL_EVENT = 1;
62     static final int RETRIEVE_EVENT = 2;
63     static final int UNCHANGED_EVENT = 3;
64
65     private FeedFetcherCache feedInfoCache;
66
67
68     /**
69      * Constructor to use HttpURLFeedFetcher without caching of feeds
70      *
71      */

72     public HttpURLFeedFetcher() {
73         super();
74     }
75
76     /**
77      * Constructor to enable HttpURLFeedFetcher to cache feeds
78      *
79      * @param feedCache - an instance of the FeedFetcherCache interface
80      */

81     public HttpURLFeedFetcher(FeedFetcherCache feedCache) {
82         this();
83         feedInfoCache = feedCache;
84     }
85
86     /**
87      * Retrieve a feed over HTTP
88      *
89      * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve
90      * @return A {@link com.sun.syndication.feed.synd.SyndFeed} object
91      * @throws IllegalArgumentException if the URL is null;
92      * @throws IOException if a TCP error occurs
93      * @throws FeedException if the feed is not valid
94      * @throws FetcherException if a HTTP error occurred
95      */

96     public SyndFeed retrieveFeed(URL JavaDoc feedUrl) throws IllegalArgumentException JavaDoc, IOException JavaDoc, FeedException, FetcherException {
97         if (feedUrl == null) {
98             throw new IllegalArgumentException JavaDoc("null is not a valid URL");
99         }
100         
101         URLConnection JavaDoc connection = feedUrl.openConnection();
102         if (!(connection instanceof HttpURLConnection JavaDoc)) {
103             throw new IllegalArgumentException JavaDoc(feedUrl.toExternalForm() + " is not a valid HTTP Url");
104         }
105         HttpURLConnection JavaDoc httpConnection = (HttpURLConnection JavaDoc)connection;
106         // httpConnection.setInstanceFollowRedirects(true); // this is true by default, but can be changed on a claswide basis
107

108         if (feedInfoCache != null) {
109             SyndFeedInfo syndFeedInfo = feedInfoCache.getFeedInfo(feedUrl);
110             setRequestHeaders(connection, syndFeedInfo);
111             connection.connect();
112             fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
113                             
114             if (syndFeedInfo == null) {
115                 // this is a feed that hasn't been retrieved
116
syndFeedInfo = new SyndFeedInfo();
117                 retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
118             } else {
119                 // check the response code
120
int responseCode = httpConnection.getResponseCode();
121                 if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) {
122                     // the response code is not 304 NOT MODIFIED
123
// This is either because the feed server
124
// does not support condition gets
125
// or because the feed hasn't changed
126
retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
127                 } else {
128                     // the feed does not need retrieving
129
fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection);
130                 }
131             }
132
133             return syndFeedInfo.getSyndFeed();
134         } else {
135             fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
136             try {
137                 InputStream JavaDoc inputStream = feedUrl.openStream();
138                 return getSyndFeedFromStream(inputStream, connection);
139             } catch (java.io.IOException JavaDoc e) {
140                 handleErrorCodes(((HttpURLConnection JavaDoc)connection).getResponseCode());
141             }
142             // we will never actually get to this line
143
return null;
144         }
145     }
146
147     protected void retrieveAndCacheFeed(URL JavaDoc feedUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection JavaDoc connection) throws IllegalArgumentException JavaDoc, FeedException, FetcherException, IOException JavaDoc {
148         handleErrorCodes(connection.getResponseCode());
149
150         resetFeedInfo(feedUrl, syndFeedInfo, connection);
151         // resetting feed info in the cache
152
// could be needed for some implementations
153
// of FeedFetcherCache (eg, distributed HashTables)
154
if (feedInfoCache != null) {
155             feedInfoCache.setFeedInfo(feedUrl, syndFeedInfo);
156         }
157     }
158
159     protected void resetFeedInfo(URL JavaDoc orignalUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection JavaDoc connection) throws IllegalArgumentException JavaDoc, IOException JavaDoc, FeedException {
160         // need to always set the URL because this may have changed due to 3xx redirects
161
syndFeedInfo.setUrl(connection.getURL());
162
163         // the ID is a persistant value that should stay the same even if the URL for the
164
// feed changes (eg, by 3xx redirects)
165
syndFeedInfo.setId(orignalUrl.toString());
166
167         // This will be 0 if the server doesn't support or isn't setting the last modified header
168
syndFeedInfo.setLastModified(new Long JavaDoc(connection.getLastModified()));
169
170         // This will be null if the server doesn't support or isn't setting the ETag header
171
syndFeedInfo.setETag(connection.getHeaderField("ETag"));
172
173         // get the contents
174
InputStream JavaDoc inputStream = null;
175         try {
176             inputStream = connection.getInputStream();
177             SyndFeed syndFeed = getSyndFeedFromStream(inputStream, connection);
178             
179             String JavaDoc imHeader = connection.getHeaderField("IM");
180             if (isUsingDeltaEncoding() && (imHeader!= null && imHeader.indexOf("feed") >= 0) && (feedInfoCache != null) && connection.getResponseCode() == 226) {
181                 // client is setup to use http delta encoding and the server supports it and has returned a delta encoded response
182
// This response only includes new items
183
SyndFeedInfo cachedInfo = feedInfoCache.getFeedInfo(orignalUrl);
184                 if (cachedInfo != null) {
185                     SyndFeed cachedFeed = cachedInfo.getSyndFeed();
186                     
187                     // set the new feed to be the orginal feed plus the new items
188
syndFeed = combineFeeds(cachedFeed, syndFeed);
189                 }
190             }
191             
192             syndFeedInfo.setSyndFeed(syndFeed);
193         } finally {
194             if (inputStream != null) {
195                 inputStream.close();
196             }
197         }
198     }
199
200     /**
201      * <p>Set appropriate HTTP headers, including conditional get and gzip encoding headers</p>
202      *
203      * @param connection A URLConnection
204      * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null
205      */

206     protected void setRequestHeaders(URLConnection JavaDoc connection, SyndFeedInfo syndFeedInfo) {
207         if (syndFeedInfo != null) {
208             // set the headers to get feed only if modified
209
// we support the use of both last modified and eTag headers
210
if (syndFeedInfo.getLastModified() != null) {
211                 Object JavaDoc lastModified = syndFeedInfo.getLastModified();
212                 if (lastModified instanceof Long JavaDoc) {
213                     connection.setIfModifiedSince(((Long JavaDoc)syndFeedInfo.getLastModified()).longValue());
214                 }
215             }
216             if (syndFeedInfo.getETag() != null) {
217                 connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag());
218             }
219
220         }
221         // header to retrieve feed gzipped
222
connection.setRequestProperty("Accept-Encoding", "gzip");
223
224         // set the user agent
225
connection.addRequestProperty("User-Agent", getUserAgent());
226         
227         if (isUsingDeltaEncoding()) {
228             connection.addRequestProperty("A-IM", "feed");
229         }
230     }
231
232     private SyndFeed getSyndFeedFromStream(InputStream JavaDoc inputStream, URLConnection JavaDoc connection) throws IOException JavaDoc, IllegalArgumentException JavaDoc, FeedException {
233         BufferedInputStream JavaDoc is;
234         if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) {
235             // handle gzip encoded content
236
is = new BufferedInputStream JavaDoc(new GZIPInputStream JavaDoc(inputStream));
237         } else {
238             is = new BufferedInputStream JavaDoc(inputStream);
239         }
240
241         //InputStreamReader reader = new InputStreamReader(is, ResponseHandler.getCharacterEncoding(connection));
242

243         //SyndFeedInput input = new SyndFeedInput();
244

245         XmlReader reader = null;
246         if (connection.getHeaderField("Content-Type") != null) {
247             reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true);
248         } else {
249             reader = new XmlReader(is, true);
250         }
251         
252         SyndFeed feed = new SyndFeedInput().build(reader);
253         fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed);
254         return feed;
255     }
256
257     /**
258      * @return The FeedFetcherCache used by this fetcher (Could be null)
259      */

260     public FeedFetcherCache getFeedInfoCache() {
261         return feedInfoCache;
262     }
263
264     /**
265      * @param cache The cache to be used by this fetcher (pass null to stop using a cache)
266      */

267     public void setFeedInfoCache(FeedFetcherCache cache) {
268         feedInfoCache = cache;
269     }
270 }
271
Popular Tags