KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > syndication > io > impl > RSS090Parser


1 /*
2  * Copyright 2004 Sun Microsystems, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17 package com.sun.syndication.io.impl;
18
19 import com.sun.syndication.feed.WireFeed;
20 import com.sun.syndication.feed.rss.Channel;
21 import com.sun.syndication.feed.rss.Image;
22 import com.sun.syndication.feed.rss.Item;
23 import com.sun.syndication.feed.rss.TextInput;
24 import com.sun.syndication.io.FeedException;
25 import org.jdom.Document;
26 import org.jdom.Element;
27 import org.jdom.Namespace;
28
29 import java.util.ArrayList JavaDoc;
30 import java.util.Collection JavaDoc;
31 import java.util.Iterator JavaDoc;
32 import java.util.List JavaDoc;
33
34 /**
35  */

36 public class RSS090Parser extends BaseWireFeedParser {
37
38     private static final String JavaDoc RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
39     private static final String JavaDoc RSS_URI = "http://my.netscape.com/rdf/simple/0.9/";
40     
41     private static final Namespace RDF_NS = Namespace.getNamespace(RDF_URI);
42     private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI);
43
44
45     public RSS090Parser() {
46         this("rss_0.9");
47     }
48
49     protected RSS090Parser(String JavaDoc type) {
50         super(type);
51     }
52
53     public boolean isMyType(Document document) {
54         boolean ok = false;
55
56         Element rssRoot = document.getRootElement();
57         Namespace defaultNS = rssRoot.getNamespace();
58         List JavaDoc additionalNSs = rssRoot.getAdditionalNamespaces();
59
60         ok = defaultNS!=null && defaultNS.equals(getRDFNamespace());
61         if (ok) {
62             if (additionalNSs==null) {
63                 ok = false;
64             }
65             else {
66                 ok = false;
67                 for (int i=0;!ok && i<additionalNSs.size();i++) {
68                     ok = getRSSNamespace().equals(additionalNSs.get(i));
69                 }
70             }
71         }
72         return ok;
73     }
74
75     public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException JavaDoc,FeedException {
76         if (validate) {
77             validateFeed(document);
78         }
79         Element rssRoot = document.getRootElement();
80         return parseChannel(rssRoot);
81     }
82
83     protected void validateFeed(Document document) throws FeedException {
84         // TBD
85
// here we have to validate the Feed against a schema or whatever
86
// not sure how to do it
87
// one posibility would be to inject our own schema for the feed (they don't exist out there)
88
// to the document, produce an ouput and attempt to parse it again with validation turned on.
89
// otherwise will have to check the document elements by hand.
90
}
91
92     /**
93      * Returns the namespace used by RSS elements in document of the RSS version the parser supports.
94      * <P>
95      * This implementation returns the EMTPY namespace.
96      * <p>
97      *
98      * @return returns the EMPTY namespace.
99      */

100     protected Namespace getRSSNamespace() {
101         return RSS_NS;
102     }
103
104     /**
105      * Returns the namespace used by RDF elements in document of the RSS version the parser supports.
106      * <P>
107      * This implementation returns the EMTPY namespace.
108      * <p>
109      *
110      * @return returns the EMPTY namespace.
111      */

112     protected Namespace getRDFNamespace() {
113         return RDF_NS;
114     }
115
116     /**
117      * Parses the root element of an RSS document into a Channel bean.
118      * <p/>
119      * It reads title, link and description and delegates to parseImage, parseItems
120      * and parseTextInput. This delegation always passes the root element of the RSS
121      * document as different RSS version may have this information in different parts
122      * of the XML tree (no assumptions made thanks to the specs variaty)
123      * <p/>
124      *
125      * @param rssRoot the root element of the RSS document to parse.
126      * @return the parsed Channel bean.
127      */

128     protected WireFeed parseChannel(Element rssRoot) {
129         Element eChannel = rssRoot.getChild("channel",getRSSNamespace());
130
131         Channel channel = new Channel(getType());
132
133         Element e = eChannel.getChild("title",getRSSNamespace());
134         if (e!=null) {
135             channel.setTitle(e.getText());
136         }
137         e = eChannel.getChild("link",getRSSNamespace());
138         if (e!=null) {
139             channel.setLink(e.getText());
140         }
141         e = eChannel.getChild("description",getRSSNamespace());
142         if (e!=null) {
143             channel.setDescription(e.getText());
144         }
145
146         channel.setImage(parseImage(rssRoot));
147
148         channel.setTextInput(parseTextInput(rssRoot));
149
150         channel.setItems(parseItems(rssRoot));
151
152         channel.setModules(parseFeedModules(eChannel));
153
154         return channel;
155     }
156
157
158     /**
159      * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment.
160      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element.
161      * <p/>
162      */

163     protected List JavaDoc getItems(Element rssRoot) {
164         return rssRoot.getChildren("item",getRSSNamespace());
165     }
166
167     /**
168      * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment.
169      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
170      * <p/>
171      */

172     protected Element getImage(Element rssRoot) {
173         return rssRoot.getChild("image",getRSSNamespace());
174     }
175
176     /**
177      * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment.
178      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
179      * <p/>
180      */

181     protected Element getTextInput(Element rssRoot) {
182         return rssRoot.getChild("textinput",getRSSNamespace());
183     }
184
185     /**
186      * Parses the root element of an RSS document looking for image information.
187      * <p/>
188      * It reads title and url out of the 'image' element.
189      * <p/>
190      *
191      * @param rssRoot the root element of the RSS document to parse for image information.
192      * @return the parsed image bean.
193      */

194     protected Image parseImage(Element rssRoot) {
195         Image image = null;
196         Element eImage = getImage(rssRoot);
197         if (eImage!=null) {
198             image = new Image();
199
200             Element e = eImage.getChild("title",getRSSNamespace());
201             if (e!=null) {
202                 image.setTitle(e.getText());
203             }
204             e = eImage.getChild("url",getRSSNamespace());
205             if (e!=null) {
206                 image.setUrl(e.getText());
207             }
208             e = eImage.getChild("link",getRSSNamespace());
209             if (e!=null) {
210                 image.setLink(e.getText());
211             }
212         }
213         return image;
214     }
215
216     /**
217      * Parses the root element of an RSS document looking for all items information.
218      * <p/>
219      * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem()
220      * for each item element. The resulting RSSItem of each item element is stored in a list.
221      * <p/>
222      *
223      * @param rssRoot the root element of the RSS document to parse for all items information.
224      * @return a list with all the parsed RSSItem beans.
225      */

226     protected List JavaDoc parseItems(Element rssRoot) {
227         Collection JavaDoc eItems = getItems(rssRoot);
228
229         List JavaDoc items = new ArrayList JavaDoc();
230         for (Iterator JavaDoc i=eItems.iterator();i.hasNext();) {
231             Element eItem = (Element) i.next();
232             items.add(parseItem(rssRoot,eItem));
233         }
234         return items;
235     }
236
237     /**
238      * Parses an item element of an RSS document looking for item information.
239      * <p/>
240      * It reads title and link out of the 'item' element.
241      * <p/>
242      *
243      * @param rssRoot the root element of the RSS document in case it's needed for context.
244      * @param eItem the item element to parse.
245      * @return the parsed RSSItem bean.
246      */

247     protected Item parseItem(Element rssRoot,Element eItem) {
248         Item item = new Item();
249         Element e = eItem.getChild("title",getRSSNamespace());
250         if (e!=null) {
251             item.setTitle(e.getText());
252         }
253         e = eItem.getChild("link",getRSSNamespace());
254         if (e!=null) {
255             item.setLink(e.getText());
256         }
257         
258         item.setModules(parseItemModules(eItem));
259
260         return item;
261     }
262
263
264     /**
265      * Parses the root element of an RSS document looking for text-input information.
266      * <p/>
267      * It reads title, description, name and link out of the 'textinput' or 'textInput' element.
268      * <p/>
269      *
270      * @param rssRoot the root element of the RSS document to parse for text-input information.
271      * @return the parsed RSSTextInput bean.
272      */

273     protected TextInput parseTextInput(Element rssRoot) {
274         TextInput textInput = null;
275         Element eTextInput = getTextInput(rssRoot);
276         if (eTextInput!=null) {
277             textInput = new TextInput();
278             Element e = eTextInput.getChild("title",getRSSNamespace());
279             if (e!=null) {
280                 textInput.setTitle(e.getText());
281             }
282             e = eTextInput.getChild("description",getRSSNamespace());
283             if (e!=null) {
284                 textInput.setDescription(e.getText());
285             }
286             e = eTextInput.getChild("name",getRSSNamespace());
287             if (e!=null) {
288                 textInput.setName(e.getText());
289             }
290             e = eTextInput.getChild("link",getRSSNamespace());
291             if (e!=null) {
292                 textInput.setLink(e.getText());
293             }
294         }
295         return textInput;
296     }
297
298
299 }
300
Popular Tags