KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > nava > informa > parsers > FeedParser


1 //
2
// Informa -- RSS Library for Java
3
// Copyright (c) 2002 by Niko Schmuck
4
//
5
// Niko Schmuck
6
// http://sourceforge.net/projects/informa
7
// mailto:niko_schmuck@users.sourceforge.net
8
//
9
// This library is free software.
10
//
11
// You may redistribute it and/or modify it under the terms of the GNU
12
// Lesser General Public License as published by the Free Software Foundation.
13
//
14
// Version 2.1 of the license should be included with this distribution in
15
// the file LICENSE. If the license is not included with this distribution,
16
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
17
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
18
// MA 02139 USA.
19
//
20
// This library is distributed in the hope that it will be useful,
21
// but WITHOUT ANY WARRANTY; without even the implied waranty of
22
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23
// Lesser General Public License for more details.
24
//
25

26
27 // $Id: FeedParser.java,v 1.6 2004/10/18 20:14:19 niko_schmuck Exp $
28

29 package de.nava.informa.parsers;
30
31 import java.io.File JavaDoc;
32 import java.io.IOException JavaDoc;
33 import java.io.InputStream JavaDoc;
34 import java.io.Reader JavaDoc;
35 import java.net.URL JavaDoc;
36 import java.util.Iterator JavaDoc;
37
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40 import org.jdom.Document;
41 import org.jdom.Element;
42 import org.jdom.JDOMException;
43 import org.jdom.input.SAXBuilder;
44 import org.xml.sax.InputSource JavaDoc;
45
46 import de.nava.informa.core.ChannelBuilderIF;
47 import de.nava.informa.core.ItemIF;
48 import de.nava.informa.core.ChannelIF;
49 import de.nava.informa.core.ParseException;
50 import de.nava.informa.core.UnsupportedFormatException;
51 //import de.nava.informa.utils.NoOpEntityResolver;
52

53 /**
54  * Parser class which allows reading in of RSS news channels.
55  * The concrete rules how the XML elements map to our channel object model
56  * are delegated to version specific private classes.</p>
57  *
58  * Currently the FeedParser support RSS formats 0.9x, 1.0 (RDF) and 2.0.
59  *
60  * @author Niko Schmuck
61  */

62 public class FeedParser {
63
64   private static Log logger = LogFactory.getLog(FeedParser.class);
65
66   public static ChannelIF parse(ChannelBuilderIF cBuilder, URL JavaDoc aURL)
67                 throws IOException JavaDoc, ParseException {
68     return parse(cBuilder, new InputSource JavaDoc(aURL.toExternalForm()), aURL);
69   }
70
71   public static ChannelIF parse(ChannelBuilderIF cBuilder, String JavaDoc url)
72                 throws IOException JavaDoc, ParseException {
73     URL JavaDoc aURL = null;
74     try {
75       aURL = new URL JavaDoc(url);
76     } catch (java.net.MalformedURLException JavaDoc e) {
77       logger.warn("Could not create URL for " + url);
78     }
79     return parse(cBuilder, new InputSource JavaDoc(url), aURL);
80   }
81
82   public static ChannelIF parse(ChannelBuilderIF cBuilder, Reader JavaDoc reader)
83                 throws IOException JavaDoc, ParseException {
84     return parse(cBuilder, new InputSource JavaDoc(reader), null);
85   }
86
87   public static ChannelIF parse(ChannelBuilderIF cBuilder, InputStream JavaDoc stream)
88                 throws IOException JavaDoc, ParseException {
89     return parse(cBuilder, new InputSource JavaDoc(stream), null);
90   }
91
92   public static ChannelIF parse(ChannelBuilderIF cBuilder, File JavaDoc aFile)
93                 throws IOException JavaDoc, ParseException {
94     URL JavaDoc aURL = null;
95     try {
96       aURL = aFile.toURL();
97     } catch (java.net.MalformedURLException JavaDoc e) {
98       throw new IOException JavaDoc("File " + aFile + " had invalid URL " +
99                             "representation.");
100     }
101     return parse(cBuilder, new InputSource JavaDoc(aURL.toExternalForm()), aURL);
102   }
103
104   public static ChannelIF parse(ChannelBuilderIF cBuilder,
105                                 InputSource JavaDoc inpSource,
106                                 URL JavaDoc baseLocation)
107                 throws IOException JavaDoc, ParseException {
108     // document reading without validation
109
SAXBuilder saxBuilder = new SAXBuilder(false);
110     // turn off DTD loading
111
// saxBuilder.setEntityResolver(new NoOpEntityResolver());
112
try {
113       Document doc = saxBuilder.build(inpSource);
114       ChannelIF channel = parse(cBuilder, doc);
115       channel.setLocation(baseLocation);
116       return channel;
117     } catch (JDOMException e) {
118       throw new ParseException(e);
119     }
120   }
121
122   // ------------------------------------------------------------
123
// internal helper methods
124
// ------------------------------------------------------------
125

126   private static synchronized ChannelIF parse(ChannelBuilderIF cBuilder,
127                                               Document doc)
128                               throws ParseException {
129
130     if (cBuilder == null) {
131       throw new RuntimeException JavaDoc("Without builder no channel can " +
132                                  "be created.");
133     }
134     logger.debug("start parsing.");
135     // Get the root element (must be rss)
136
Element root = doc.getRootElement();
137     String JavaDoc rootElement = root.getName().toLowerCase();
138     // Decide which parser to use
139
if (rootElement.startsWith("rss")) {
140       String JavaDoc rssVersion = root.getAttribute("version").getValue();
141       if (rssVersion.indexOf("0.91") >= 0) {
142         logger.info("Channel uses RSS root element (Version 0.91).");
143         return RSS_0_91_Parser.parse(cBuilder, root);
144       } else if (rssVersion.indexOf("0.92") >= 0) {
145         logger.info("Channel uses RSS root element (Version 0.92).");
146         // logger.warn("RSS 0.92 not fully supported yet, fall back to 0.91.");
147
// TODO: support RSS 0.92 when aware of all subtle differences.
148
return RSS_0_91_Parser.parse(cBuilder, root);
149       } else if (rootElement.indexOf("0.93") >= 0) {
150         logger.info("Channel uses RSS root element (Version 0.93).");
151         logger.warn("RSS 0.93 not fully supported yet, fall back to 0.91.");
152         // TODO: support RSS 0.93 when aware of all subtle differences.
153
} else if (rootElement.indexOf("0.94") >= 0) {
154         logger.info("Channel uses RSS root element (Version 0.94).");
155         logger.warn("RSS 0.94 not fully supported yet, will use RSS 2.0");
156         // TODO: support RSS 0.94 when aware of all subtle differences.
157
return RSS_2_0_Parser.parse(cBuilder, root);
158       } else if (rssVersion.indexOf("2.0") >= 0 || rssVersion.equals("2")) {
159         logger.info("Channel uses RSS root element (Version 2.0).");
160         return RSS_2_0_Parser.parse(cBuilder, root);
161       } else {
162         throw new UnsupportedFormatException("Unsupported RSS version [" +
163                                              rssVersion + "].");
164       }
165     } else if (rootElement.indexOf("rdf") >= 0) {
166       return RSS_1_0_Parser.parse(cBuilder, root);
167     } else if (rootElement.indexOf("feed") >= 0) {
168       String JavaDoc feedVersion = root.getAttribute("version").getValue();
169       if (feedVersion.indexOf("0.1") >= 0 || feedVersion.indexOf("0.2") >= 0) {
170         logger.info("Channel uses feed root element (Version " + feedVersion + ").");
171         logger.warn("This atom version is not really supported yet, assume Atom 0.3 format");
172         return Atom_0_3_Parser.parse(cBuilder, root);
173       } else if (feedVersion.indexOf("0.3") >= 0) {
174         logger.info("Channel uses feed root element (Version 0.3).");
175         return Atom_0_3_Parser.parse(cBuilder, root);
176       } else {
177         throw new UnsupportedFormatException("Unsupported feed version [" +
178                                              feedVersion + "].");
179       }
180     }
181
182     // did not match anything
183
throw new UnsupportedFormatException("Unsupported root element [" +
184                                          rootElement + "].");
185   }
186
187   
188   // ==========================================================
189

190   public static void main(String JavaDoc args[]) throws IOException JavaDoc, ParseException {
191     
192     if (args.length < 2) {
193       System.err.println("Usage: java " + FeedParser.class.getName() +
194                          " [-f <filename> | -u <url>]");
195       System.exit(1);
196     }
197     
198     String JavaDoc option = args[0];
199     String JavaDoc data = args[1];
200     
201     ChannelIF channel = null;
202     
203     ChannelBuilderIF builder = new de.nava.informa.impl.basic.ChannelBuilder();
204     if (option.trim().startsWith("-f")) {
205       channel = FeedParser.parse(builder, new File JavaDoc(data));
206     } else {
207       channel = FeedParser.parse(builder, new URL JavaDoc(data));
208     }
209
210     System.out.println("Channel format: " + channel.getFormat().toString());
211     System.out.println(channel);
212     System.out.println("containing " + channel.getItems().size() + " items");
213     Iterator JavaDoc items = channel.getItems().iterator();
214     while (items.hasNext()) {
215       ItemIF item = (ItemIF) items.next();
216       System.out.println(" - " + item);
217     }
218   }
219 }
220
Popular Tags