KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > nava > informa > parsers > RSS_1_0_Parser


1 //
2
// Informa -- RSS Library for Java
3
// Copyright (c) 2002 by Niko Schmuck
4
//
5
// Niko Schmuck
6
// http://sourceforge.net/projects/informa
7
// mailto:niko_schmuck@users.sourceforge.net
8
//
9
// This library is free software.
10
//
11
// You may redistribute it and/or modify it under the terms of the GNU
12
// Lesser General Public License as published by the Free Software Foundation.
13
//
14
// Version 2.1 of the license should be included with this distribution in
15
// the file LICENSE. If the license is not included with this distribution,
16
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
17
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
18
// MA 02139 USA.
19
//
20
// This library is distributed in the hope that it will be useful,
21
// but WITHOUT ANY WARRANTY; without even the implied waranty of
22
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23
// Lesser General Public License for more details.
24
//
25

26
27 // $Id: RSS_1_0_Parser.java,v 1.39 2003/11/04 19:09:43 niko_schmuck Exp $
28

29 package de.nava.informa.parsers;
30
31 import java.net.URL JavaDoc;
32 import java.util.Date JavaDoc;
33 import java.util.Iterator JavaDoc;
34 import java.util.List JavaDoc;
35
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38
39 import org.jdom.Attribute;
40 import org.jdom.Element;
41 import org.jdom.Namespace;
42
43 import de.nava.informa.core.ChannelBuilderIF;
44 import de.nava.informa.core.ChannelFormat;
45 import de.nava.informa.core.ChannelIF;
46 import de.nava.informa.core.ImageIF;
47 import de.nava.informa.core.ItemIF;
48 import de.nava.informa.core.ItemSourceIF;
49 import de.nava.informa.core.ParseException;
50 import de.nava.informa.core.TextInputIF;
51 import de.nava.informa.utils.ParserUtils;
52
53 /**
54  * Parser which reads in document instances according to the RSS 1.0
55  * (RDF) specification and generates a news channel object.
56  *
57  * @author Niko Schmuck
58  */

59 class RSS_1_0_Parser {
60
61   private static Log logger = LogFactory.getLog(RSS_1_0_Parser.class);
62
63   static ChannelIF parse(ChannelBuilderIF cBuilder,
64                            Element root) throws ParseException {
65     if (cBuilder == null) {
66       throw new RuntimeException JavaDoc("Without builder no channel can " +
67                                  "be created.");
68     }
69     Date JavaDoc dateParsed = new Date JavaDoc();
70     Namespace defNS = ParserUtils.getDefaultNS(root);
71     if (defNS == null) {
72       defNS = Namespace.NO_NAMESPACE;
73       logger.info("No default namespace found.");
74     }
75
76     // RSS 1.0 Dublin Core Module namespace
77
Namespace dcNS = ParserUtils.getNamespace(root, "dc");
78     // fall back to default name space (for retrieving descriptions)
79
if (dcNS == null) {
80       dcNS = defNS;
81     }
82
83     // RSS 1.0 Syndication Module namespace
84
Namespace syNS = ParserUtils.getNamespace(root, "sy");
85
86     // RSS 1.0 Aggregation Module namespace
87
Namespace agNS = ParserUtils.getNamespace(root, "ag");
88
89     // RSS 1.0 Administration Module namespace
90
Namespace adminNS = ParserUtils.getNamespace(root, "admin");
91
92     // RSS 1.0 DCTerms Module namespace
93
Namespace dctermsNS = ParserUtils.getNamespace(root, "dcterms");
94
95     // RSS 1.0 Annotation Module namespace
96
Namespace annotateNS = ParserUtils.getNamespace(root, "annotate");
97
98     // RSS091 Module namespace
99
Namespace rss091NS = ParserUtils.getNamespace(root, "rss091");
100
101     // Get the channel element (only one occurs)
102
Element channel = root.getChild("channel", defNS);
103     if (channel == null) {
104       logger.warn("Channel element could not be retrieved from feed.");
105       throw new ParseException("No channel element found in feed.");
106     }
107
108     // ----------------------- read in channel information
109
// title element
110
ChannelIF chnl =
111       cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));
112
113     // set channel format
114
chnl.setFormat(ChannelFormat.RSS_1_0);
115
116     // description element
117
chnl.setDescription(channel.getChildTextTrim("description", defNS));
118
119     // link element
120
chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS)));
121
122     // creator element
123
Element creator = channel.getChild("creator", dcNS);
124     if (creator == null) {
125       creator = channel.getChild("managingEditor", rss091NS);
126     }
127     if (creator != null) {
128       chnl.setCreator(creator.getTextTrim());
129     }
130
131     // publisher element
132
String JavaDoc publisher = channel.getChildTextTrim("publisher", dcNS);
133     if (publisher == null) {
134       Element elErrorReportsTo = channel.getChild("errorReportsTo", adminNS);
135       if (elErrorReportsTo != null) {
136         publisher = elErrorReportsTo.getAttributeValue("resource",
137             ParserUtils.getNamespace(elErrorReportsTo, "rdf"));
138       }
139     }
140     if (publisher == null) {
141       publisher = channel.getChildTextTrim("webMaster", rss091NS);
142     }
143     chnl.setPublisher(publisher);
144
145     // language element
146
Element language = channel.getChild("language", dcNS);
147     if (language == null) {
148       language = channel.getChild("language", rss091NS);
149     }
150     if (language != null) {
151       chnl.setLanguage(language.getTextTrim());
152     }
153
154     // rights element
155
Element copyright = channel.getChild("rights", dcNS);
156     if (copyright == null) {
157       copyright = channel.getChild("copyright", rss091NS);
158     }
159     if (copyright != null) {
160       chnl.setCopyright(copyright.getTextTrim());
161     }
162
163     // 0..1 Rating element
164
Element rating = channel.getChild("rating", rss091NS);
165     if (rating != null) {
166       chnl.setRating(rating.getTextTrim());
167     }
168
169     // 0..1 Docs element
170
// use namespace URI
171
chnl.setDocs(defNS.getURI());
172
173     // 0..1 pubDate element
174
Element pubDate = channel.getChild("date", dcNS);
175     if (pubDate == null) {
176       pubDate = channel.getChild("issued", dctermsNS);
177     }
178     if (pubDate == null) {
179       pubDate = channel.getChild("pubdate", rss091NS);
180     }
181     if (pubDate != null) {
182       chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim()));
183     }
184
185     // 0..1 lastBuildDate element
186
Element lastBuildDate = channel.getChild("lastBuildDate");
187     if (lastBuildDate == null) {
188       lastBuildDate = channel.getChild("modified", dctermsNS);
189     }
190     if (lastBuildDate == null) {
191       lastBuildDate = channel.getChild("lastBuildDate", rss091NS);
192     }
193     if (lastBuildDate != null) {
194       chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim()));
195     }
196
197     // RSS 1.0 Administration Module support
198

199     // 0..1 generator element
200
Element elGenerator = channel.getChild("generatorAgent", adminNS);
201     if (elGenerator != null) {
202       Attribute generator = elGenerator.getAttribute("resource", ParserUtils.getNamespace(elGenerator, "rdf"));
203       if (generator != null) {
204         chnl.setGenerator(generator.getValue());
205       }
206     }
207
208     // RSS 1.0 Syndication Module support
209

210     // 0..1 update period element
211
Element updatePeriod = channel.getChild("updatePeriod", syNS);
212     if (updatePeriod != null) {
213       chnl.setUpdatePeriod(updatePeriod.getTextTrim());
214     }
215
216     // 0..1 update frequency element
217
Element updateFrequency = channel.getChild("updateFrequency", syNS);
218     if (updateFrequency != null) {
219       chnl.setUpdateFrequency((new Integer JavaDoc(updateFrequency.getTextTrim())).intValue());
220     }
221
222     // 0..1 update base element
223
Element updateBase = channel.getChild("updateBase", syNS);
224     if (updateBase != null) {
225       chnl.setUpdateBase(ParserUtils.getDate(updateBase.getTextTrim()));
226     }
227
228     if ((updatePeriod != null) && updateFrequency != null) {
229       int ttl = getTtl(chnl.getUpdatePeriod(), chnl.getUpdateFrequency());
230       chnl.setTtl(ttl);
231     }
232
233     // item elements
234
List JavaDoc items = root.getChildren("item", defNS);
235     Iterator JavaDoc i = items.iterator();
236     while (i.hasNext()) {
237       Element item = (Element) i.next();
238
239       // get title element
240
Element elTitle = item.getChild("title", defNS);
241       String JavaDoc strTitle = "<No Title>";
242       if (elTitle != null) {
243         strTitle = elTitle.getTextTrim();
244       }
245       if (logger.isDebugEnabled()) {
246         logger.debug("Item element found (" + strTitle + ").");
247       }
248
249       // get link element
250
Element elLink = item.getChild("link", defNS);
251       String JavaDoc strLink = "";
252       if (elLink != null) {
253         strLink = elLink.getTextTrim();
254       }
255
256       // get description element
257
Element elDesc = item.getChild("description", defNS);
258       if (elDesc == null) {
259         elDesc = item.getChild("description", dcNS);
260       }
261       String JavaDoc strDesc = "";
262       if (elDesc != null) {
263         strDesc = elDesc.getTextTrim();
264       }
265
266       // generate new RSS item (link to article)
267
ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc,
268                                            ParserUtils.getURL(strLink));
269       rssItem.setFound(dateParsed);
270
271       // get creator element
272
Element elCreator = item.getChild("creator", dcNS);
273       if (elCreator != null) {
274         rssItem.setCreator(elCreator.getTextTrim());
275       }
276
277       // get subject element
278
Element elSubject = item.getChild("subject", dcNS);
279       if (elSubject != null) {
280         // TODO: Mulitple subject elements not handled currently
281
rssItem.setSubject(elSubject.getTextTrim());
282       }
283
284       // get date element
285
Element elDate = item.getChild("date", dcNS);
286       if (elDate != null) {
287         rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim()));
288       }
289
290       // get source element - default to Aggregation module, then try Dublin Core
291
String JavaDoc sourceName = null;
292       String JavaDoc sourceLocation = null;
293       Date JavaDoc sourceTimestamp = null;
294
295       Element elSourceURL = item.getChild("sourceURL", agNS);
296       if (elSourceURL == null) { // No Aggregation module - try Dublin Core
297
elSourceURL = item.getChild("source", dcNS);
298         if (elSourceURL != null) {
299           sourceLocation = elSourceURL.getTextTrim();
300           sourceName = "Source";
301         }
302      } else { // Aggregation module
303
sourceLocation = elSourceURL.getTextTrim();
304         Element elSourceName = item.getChild("source", agNS);
305         if (elSourceName != null) {
306           sourceName = elSourceName.getTextTrim();
307         }
308         Element elSourceTimestamp = item.getChild("timestamp", agNS);
309         if (elSourceTimestamp != null) {
310           sourceTimestamp = ParserUtils.getDate(elSourceTimestamp.getTextTrim());
311         }
312       }
313
314       if (sourceLocation != null) {
315         ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, sourceTimestamp);
316         rssItem.setSource(itemSource);
317       }
318
319       // comments element - use Annotation module
320
Element elReference = item.getChild("reference", annotateNS);
321       if (elReference != null) {
322         Attribute resource = elReference.getAttribute("resource", ParserUtils.getNamespace(elReference, "rdf"));
323         if (resource != null) {
324           URL JavaDoc resourceURL = ParserUtils.getURL(resource.getValue());
325           if (resourceURL != null) {
326             rssItem.setComments(resourceURL);
327           }
328         }
329       }
330
331     }
332
333     // image element
334
Element image = root.getChild("image", defNS);
335     if (image != null) {
336       ImageIF rssImage =
337         cBuilder.createImage(image.getChildTextTrim("title", defNS),
338                            ParserUtils.getURL(image.getChildTextTrim("url", defNS)),
339                            ParserUtils.getURL(image.getChildTextTrim("link", defNS)));
340       Element imgWidth = image.getChild("width", defNS);
341       if (imgWidth != null) {
342         try {
343           rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
344         } catch (NumberFormatException JavaDoc e) {
345           logger.warn(e);
346         }
347       }
348       Element imgHeight = image.getChild("height", defNS);
349       if (imgHeight != null) {
350         try {
351           rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
352         } catch (NumberFormatException JavaDoc e) {
353           logger.warn(e);
354         }
355       }
356       Element imgDescr = image.getChild("description", defNS);
357       if (imgDescr != null) {
358         rssImage.setDescription(imgDescr.getTextTrim());
359       }
360       chnl.setImage(rssImage);
361     }
362
363     // textinput element
364
Element txtinp = root.getChild("textinput", defNS);
365     if (txtinp != null) {
366       String JavaDoc tiTitle = null;
367       if (txtinp.getChild("title", defNS) != null) {
368         tiTitle = txtinp.getChild("title", defNS).getTextTrim();
369       }
370       String JavaDoc tiDescr = null;
371       if (txtinp.getChild("description", defNS) != null) {
372         tiDescr = txtinp.getChild("description", defNS).getTextTrim();
373       }
374       String JavaDoc tiName = null;
375       if (txtinp.getChild("name", defNS) != null) {
376         tiName = txtinp.getChild("name", defNS).getTextTrim();
377       }
378       URL JavaDoc tiLink = null;
379       if (txtinp.getChild("link", defNS) != null) {
380         tiLink = ParserUtils.getURL(txtinp.getChild("link", defNS).getTextTrim());
381       }
382       TextInputIF rssTextInput =
383         cBuilder.createTextInput(tiTitle, tiDescr, tiName, tiLink);
384       chnl.setTextInput(rssTextInput);
385     }
386
387     chnl.setLastUpdated(dateParsed);
388
389     return chnl;
390   }
391
392   /**
393    * Returns the Ttl value corresponding to updatePeriod and updateFrequency
394    * @param updatePeriod the update period
395    * @param updateFrequency the update frequency
396    * @return the Ttl value
397    */

398   private static int getTtl(final String JavaDoc updatePeriod, int updateFrequency) {
399     int minutes = -1;
400     if (updatePeriod.equals(ChannelIF.UPDATE_HOURLY)) {
401       minutes = 60;
402     } else if (updatePeriod.equals(ChannelIF.UPDATE_DAILY)) {
403       minutes = 24 * 60;
404     } else if (updatePeriod.equals(ChannelIF.UPDATE_WEEKLY)) {
405       minutes = 168 * 60;
406     } else if (updatePeriod.equals(ChannelIF.UPDATE_MONTHLY)) {
407       minutes = 30 * 24 * 60;
408     } else if (updatePeriod.equals(ChannelIF.UPDATE_YEARLY)) {
409       minutes = 365 * 24 * 60;
410     } else {
411       minutes = 24 * 60;
412     }
413     return (minutes / updateFrequency);
414  }
415 }
416
Popular Tags