KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > nava > informa > parsers > RSS_0_91_Parser


1 //
2
// Informa -- RSS Library for Java
3
// Copyright (c) 2002 by Niko Schmuck
4
//
5
// Niko Schmuck
6
// http://sourceforge.net/projects/informa
7
// mailto:niko_schmuck@users.sourceforge.net
8
//
9
// This library is free software.
10
//
11
// You may redistribute it and/or modify it under the terms of the GNU
12
// Lesser General Public License as published by the Free Software Foundation.
13
//
14
// Version 2.1 of the license should be included with this distribution in
15
// the file LICENSE. If the license is not included with this distribution,
16
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
17
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
18
// MA 02139 USA.
19
//
20
// This library is distributed in the hope that it will be useful,
21
// but WITHOUT ANY WARRANTY; without even the implied waranty of
22
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23
// Lesser General Public License for more details.
24
//
25

26 // $Id: RSS_0_91_Parser.java,v 1.32 2003/11/04 19:09:43 niko_schmuck Exp $
27

28 package de.nava.informa.parsers;
29
30 import java.net.URL JavaDoc;
31 import java.util.Date JavaDoc;
32 import java.util.Iterator JavaDoc;
33 import java.util.List JavaDoc;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.jdom.Attribute;
38 import org.jdom.Element;
39
40 import de.nava.informa.core.ChannelBuilderIF;
41 import de.nava.informa.core.ChannelFormat;
42 import de.nava.informa.core.ChannelIF;
43 import de.nava.informa.core.ImageIF;
44 import de.nava.informa.core.ItemEnclosureIF;
45 import de.nava.informa.core.ItemIF;
46 import de.nava.informa.core.ItemSourceIF;
47 import de.nava.informa.core.ParseException;
48 import de.nava.informa.core.TextInputIF;
49 import de.nava.informa.utils.ParserUtils;
50
51 /**
52  * Parser which reads in document instances according to the RSS 0.91
53  * specification and generates a news channel object.
54  *
55  * @author Niko Schmuck
56  */

57 class RSS_0_91_Parser {
58
59   private static Log logger = LogFactory.getLog(RSS_0_91_Parser.class);
60
61   static ChannelIF parse(ChannelBuilderIF cBuilder, Element root)
62     throws ParseException {
63     if (cBuilder == null) {
64       throw new RuntimeException JavaDoc(
65         "Without builder no channel can " + "be created.");
66     }
67     Date JavaDoc dateParsed = new Date JavaDoc();
68     logger.debug("start parsing.");
69
70     // Get the channel element (only one occurs)
71
Element channel = root.getChild("channel");
72     if (channel == null) {
73       logger.warn("Channel element could not be retrieved from feed.");
74       throw new ParseException("No channel element found in feed.");
75     }
76
77     // --- read in channel information
78

79     // 1 title element
80
ChannelIF chnl =
81       cBuilder.createChannel(channel, channel.getChildTextTrim("title"));
82
83     chnl.setFormat(ChannelFormat.RSS_0_91);
84
85     // 1 description element
86
chnl.setDescription(channel.getChildTextTrim("description"));
87
88     // 1 link element
89
chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link")));
90
91     // 1 language element
92
chnl.setLanguage(channel.getChildTextTrim("language"));
93
94     // 1..n item elements
95
List JavaDoc items = channel.getChildren("item");
96     Iterator JavaDoc i = items.iterator();
97     while (i.hasNext()) {
98       Element item = (Element) i.next();
99
100       // get title element
101
Element elTitle = item.getChild("title");
102       String JavaDoc strTitle = "<No Title>";
103       if (elTitle != null) {
104         strTitle = elTitle.getTextTrim();
105       }
106       if (logger.isDebugEnabled()) {
107         logger.debug("Item element found (" + strTitle + ").");
108       }
109
110       // get link element
111
Element elLink = item.getChild("link");
112       String JavaDoc strLink = "";
113       if (elLink != null) {
114         strLink = elLink.getTextTrim();
115       }
116
117       // get description element
118
Element elDesc = item.getChild("description");
119       String JavaDoc strDesc = "";
120       if (elDesc != null) {
121         strDesc = elDesc.getTextTrim();
122       }
123
124       // generate new RSS item (link to article)
125
ItemIF rssItem =
126         cBuilder.createItem(
127           item,
128           chnl,
129           strTitle,
130           strDesc,
131           ParserUtils.getURL(strLink));
132       rssItem.setFound(dateParsed);
133
134       // get source element (an RSS 0.92 element)
135
Element source = item.getChild("source");
136       if (source != null) {
137         String JavaDoc sourceName = source.getTextTrim();
138         Attribute sourceAttribute = source.getAttribute("url");
139         if (sourceAttribute != null) {
140           String JavaDoc location = sourceAttribute.getValue().trim();
141           ItemSourceIF itemSource =
142             cBuilder.createItemSource(rssItem, sourceName, location, null);
143           rssItem.setSource(itemSource);
144         }
145       }
146
147       // get enclosure element (an RSS 0.92 element)
148
Element enclosure = item.getChild("enclosure");
149       if (enclosure != null) {
150         URL JavaDoc location = null;
151         String JavaDoc type = null;
152         int length = -1;
153         Attribute urlAttribute = enclosure.getAttribute("url");
154         if (urlAttribute != null) {
155           location = ParserUtils.getURL(urlAttribute.getValue().trim());
156         }
157         Attribute typeAttribute = enclosure.getAttribute("type");
158         if (typeAttribute != null) {
159           type = typeAttribute.getValue().trim();
160         }
161         Attribute lengthAttribute = enclosure.getAttribute("length");
162         if (lengthAttribute != null) {
163           try {
164             length = Integer.parseInt(lengthAttribute.getValue().trim());
165           } catch (NumberFormatException JavaDoc e) {
166             logger.warn(e);
167           }
168         }
169         ItemEnclosureIF itemEnclosure =
170           cBuilder.createItemEnclosure(rssItem, location, type, length);
171         rssItem.setEnclosure(itemEnclosure);
172       }
173     }
174
175     // 0..1 image element
176
Element image = channel.getChild("image");
177     if (image != null) {
178       ImageIF rssImage =
179         cBuilder.createImage(
180           image.getChildTextTrim("title"),
181           ParserUtils.getURL(image.getChildTextTrim("url")),
182           ParserUtils.getURL(image.getChildTextTrim("link")));
183       Element imgWidth = image.getChild("width");
184       if (imgWidth != null) {
185         try {
186           rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
187         } catch (NumberFormatException JavaDoc e) {
188           logger.warn(e);
189         }
190       }
191       Element imgHeight = image.getChild("height");
192       if (imgHeight != null) {
193         try {
194           rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
195         } catch (NumberFormatException JavaDoc e) {
196           logger.warn(e);
197         }
198       }
199       Element imgDescr = image.getChild("description");
200       if (imgDescr != null) {
201         rssImage.setDescription(imgDescr.getTextTrim());
202       }
203       chnl.setImage(rssImage);
204     }
205
206     // 0..1 textinput element
207
Element txtinp = channel.getChild("textinput");
208     if (txtinp != null) {
209       TextInputIF rssTextInput =
210         cBuilder.createTextInput(
211           txtinp.getChild("title").getTextTrim(),
212           txtinp.getChild("description").getTextTrim(),
213           txtinp.getChild("name").getTextTrim(),
214           ParserUtils.getURL(txtinp.getChild("link").getTextTrim()));
215       chnl.setTextInput(rssTextInput);
216     }
217
218     // 0..1 copyright element
219
Element copyright = channel.getChild("copyright");
220     if (copyright != null) {
221       chnl.setCopyright(copyright.getTextTrim());
222     }
223
224     // 0..1 rating element
225
Element rating = channel.getChild("rating");
226     if (rating != null) {
227       chnl.setRating(rating.getTextTrim());
228     }
229
230     // 0..1 pubDate element
231
Element pubDate = channel.getChild("pubDate");
232     if (pubDate != null) {
233       chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim()));
234     }
235
236     // 0..1 lastBuildDate element
237
Element lastBuildDate = channel.getChild("lastBuildDate");
238     if (lastBuildDate != null) {
239       chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim()));
240     }
241
242     // 0..1 docs element
243
Element docs = channel.getChild("docs");
244     if (docs != null) {
245       chnl.setDocs(docs.getTextTrim());
246     }
247
248     // 0..1 managingEditor element
249
Element managingEditor = channel.getChild("managingEditor");
250     if (managingEditor != null) {
251       chnl.setCreator(managingEditor.getTextTrim());
252     }
253
254     // 0..1 webMaster element
255
Element webMaster = channel.getChild("webMaster");
256     if (webMaster != null) {
257       chnl.setPublisher(webMaster.getTextTrim());
258     }
259
260     // 0..1 cloud element
261
Element cloud = channel.getChild("cloud");
262     if (cloud != null) {
263       String JavaDoc _port = cloud.getAttributeValue("port");
264       int port = -1;
265       if (_port != null) {
266         try {
267           port = Integer.parseInt(_port);
268         } catch (NumberFormatException JavaDoc e) {
269           logger.warn(e);
270         }
271       }
272       chnl.setCloud(
273         cBuilder.createCloud(
274           cloud.getAttributeValue("domain"),
275           port,
276           cloud.getAttributeValue("path"),
277           cloud.getAttributeValue("registerProcedure"),
278           cloud.getAttributeValue("protocol")));
279     }
280
281     chnl.setLastUpdated(dateParsed);
282     // 0..1 skipHours element
283
// 0..1 skipDays element
284

285     return chnl;
286   }
287
288 }
289
Popular Tags