KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jperdian > rss2 > RssParser


1 /**
2  * RSS framework and reader
3  * Copyright (C) 2004 Christian Robert
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */

19
20 package org.jperdian.rss2;
21
22 import java.io.BufferedInputStream JavaDoc;
23 import java.io.IOException JavaDoc;
24 import java.io.InputStream JavaDoc;
25 import java.net.URL JavaDoc;
26 import java.util.List JavaDoc;
27
28 import javax.xml.parsers.DocumentBuilder JavaDoc;
29 import javax.xml.parsers.DocumentBuilderFactory JavaDoc;
30 import javax.xml.parsers.ParserConfigurationException JavaDoc;
31
32 import org.jperdian.rss2.dom.RssChannel;
33 import org.jperdian.rss2.dom.RssCloud;
34 import org.jperdian.rss2.dom.RssConstants;
35 import org.jperdian.rss2.dom.RssEnclosure;
36 import org.jperdian.rss2.dom.RssGuid;
37 import org.jperdian.rss2.dom.RssImage;
38 import org.jperdian.rss2.dom.RssItem;
39 import org.jperdian.rss2.dom.RssTextInput;
40 import org.w3c.dom.Document JavaDoc;
41 import org.w3c.dom.Element JavaDoc;
42 import org.w3c.dom.Node JavaDoc;
43 import org.w3c.dom.NodeList JavaDoc;
44 import org.xml.sax.SAXException JavaDoc;
45
46 /**
47  * The parser to process an XML document and transfer it into an
48  * <code>RssMessage</code>
49  *
50  * @author Christian Robert
51  */

52
53 public class RssParser {
54
55   private DocumentBuilder JavaDoc myDocumentBuilder = null;
56   
57   public RssParser() {
58     try {
59       DocumentBuilder JavaDoc builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
60       this.setDocumentBuilder(builder);
61     } catch(ParserConfigurationException JavaDoc e) {
62       throw new RuntimeException JavaDoc("Cannot create DocumentBuilder", e);
63     }
64   }
65   
66   /**
67    * Transfers the given XML document into a value <code>RssMessage</code>
68    * object
69    */

70   public RssChannel parse(URL JavaDoc sourceURL, RssChannel targetChannel) throws RssException {
71     try {
72       InputStream JavaDoc inStream = new BufferedInputStream JavaDoc(sourceURL.openStream());
73       Document JavaDoc document = this.getDocumentBuilder().parse(inStream);
74       inStream.close();
75       return this.parse(document, targetChannel);
76     } catch(SAXException JavaDoc e) {
77       throw new RssParseException("Illegal XML format \n[" + e.getMessage() + "]", e);
78     } catch(IOException JavaDoc e) {
79       throw new RssException("Cannot connect to source URL: " + sourceURL, e);
80     }
81   }
82   
83   /**
84    * Transfers the given XML document into a value <code>RssMessage</code>
85    * object
86    */

87   public RssChannel parse(Document JavaDoc xmlDocument, RssChannel targetChannel) throws RssParseException {
88     RssChannel resultChannel = null;
89     Element JavaDoc rootElement = xmlDocument.getDocumentElement();
90     NodeList JavaDoc rootSubNodes = rootElement.getChildNodes();
91     for(int i=0; i < rootSubNodes.getLength(); i++) {
92       Node JavaDoc subNode = rootSubNodes.item(i);
93       String JavaDoc subNodeName = subNode.getNodeName();
94       if(subNode.getNodeType() == Node.ELEMENT_NODE && subNodeName.equalsIgnoreCase("channel")) {
95         resultChannel = this.parseChannel((Element JavaDoc)rootSubNodes.item(i), targetChannel);
96       } else if(subNodeName.equalsIgnoreCase("item")) {
97         RssItem item = this.parseItem((Element JavaDoc)subNode);
98         item.setSource(resultChannel);
99         resultChannel.addItem(item);
100       }
101     }
102     if(resultChannel != null) {
103       return resultChannel;
104     } else {
105       throw new RssParseException("No channel element found in message");
106     }
107   }
108
109   /**
110    * Parses the content of the given <code>channel</code> element, analyze
111    * it's content and generate a valid <code>RssChannel</code> object
112    */

113   protected RssChannel parseChannel(Element JavaDoc channelElement, RssChannel channel) throws RssParseException {
114     List JavaDoc itemList = channel.getItemList();
115     if(itemList != null && itemList.size() > 0) {
116       itemList.clear();
117     }
118     NodeList JavaDoc subNodes = channelElement.getChildNodes();
119     for(int i=0; i < subNodes.getLength(); i++) {
120       if(subNodes.item(i).getNodeType() == Node.ELEMENT_NODE) {
121         Element JavaDoc subElement = (Element JavaDoc)subNodes.item(i);
122         String JavaDoc elemName = subElement.getNodeName();
123         if(elemName.equalsIgnoreCase("title")) {
124           channel.setTitle(RssParseHelper.parseContentChildren(subElement));
125         } else if(elemName.equalsIgnoreCase("link")) {
126           channel.setLink(RssParseHelper.parseContentURL(subElement));
127         } else if(elemName.equalsIgnoreCase("description")) {
128           channel.setDescription(RssParseHelper.parseContentChildren(subElement));
129         } else if(elemName.equalsIgnoreCase("copyright")) {
130           channel.setCopyright(RssParseHelper.parseContentChildren(subElement));
131         } else if(elemName.equalsIgnoreCase("managingEditor")) {
132           channel.setManagingEditor(RssParseHelper.parseContentChildren(subElement));
133         } else if(elemName.equalsIgnoreCase("webMaster")) {
134           channel.setWebmaster(RssParseHelper.parseContentChildren(subElement));
135         } else if(elemName.equalsIgnoreCase("pubDate")) {
136           channel.setPubDate(RssParseHelper.parseContentDate(subElement));
137         } else if(elemName.equalsIgnoreCase("lastBuildDate")) {
138           channel.setLastBuildDate(RssParseHelper.parseContentDate(subElement));
139         } else if(elemName.equalsIgnoreCase("category")) {
140           channel.addCategory(RssParseHelper.parseContentChildren(subElement));
141         } else if(elemName.equalsIgnoreCase("generator")) {
142           channel.setGenerator(RssParseHelper.parseContentChildren(subElement));
143         } else if(elemName.equalsIgnoreCase("docs")) {
144           channel.setDocs(RssParseHelper.parseContentURL(subElement));
145         } else if(elemName.equalsIgnoreCase("cloud")) {
146           channel.setCloud(this.parseCloud(subElement));
147         } else if(elemName.equalsIgnoreCase("ttl")) {
148           channel.setTtl(RssParseHelper.parseContentInt(subElement));
149         } else if(elemName.equalsIgnoreCase("image")) {
150           channel.setImage(this.parseImage(subElement));
151         } else if(elemName.equalsIgnoreCase("rating")) {
152           channel.setRating(RssParseHelper.parseContentChildren(subElement));
153         } else if(elemName.equalsIgnoreCase("textInput")) {
154           channel.setTextInput(this.parseTextInput(subElement));
155         } else if(elemName.equalsIgnoreCase("skipHours")) {
156           channel.addSkipHour(RssParseHelper.parseContentInt(subElement));
157         } else if(elemName.equalsIgnoreCase("skipDays")) {
158           channel.addSkipDay(RssParseHelper.parseContentChildren(subElement));
159         } else if(elemName.equalsIgnoreCase("item")) {
160           RssItem item = this.parseItem(subElement);
161           item.setSource(channel);
162           channel.addItem(item);
163         }
164       }
165     }
166     return channel;
167   }
168   
169   /**
170    * Parses a <tt>cloud</tt> element
171    */

172   protected RssCloud parseCloud(Element JavaDoc cloudElement) throws RssParseException {
173     RssCloud cloud = new RssCloud();
174     cloud.setDomain(cloudElement.getAttribute("domain"));
175     try {
176       cloud.setPort(Integer.parseInt(cloudElement.getAttribute("port")));
177     } catch(NumberFormatException JavaDoc e) {
178       throw new RssParseException("Illegal port entered for cloud: " + cloudElement.getAttribute("port"));
179     }
180     cloud.setPath(cloudElement.getAttribute("path"));
181     cloud.setRegisterProcedure(cloudElement.getAttribute("registerProcedure"));
182     return cloud;
183   }
184   
185   /**
186    * Parses a <tt>textInput</tt> element
187    */

188   protected RssTextInput parseTextInput(Element JavaDoc textInputElement) throws RssParseException {
189     RssTextInput textInput = new RssTextInput();
190     NodeList JavaDoc subNodes = textInputElement.getChildNodes();
191     for(int i=0; i < subNodes.getLength(); i++) {
192       if(subNodes.item(i).getNodeType() == Node.ELEMENT_NODE) {
193         Element JavaDoc subElement = (Element JavaDoc)subNodes.item(i);
194         String JavaDoc elemName = subElement.getNodeName();
195         if(elemName.equalsIgnoreCase("title")) {
196           textInput.setTitle(RssParseHelper.parseContentChildren(subElement));
197         } else if(elemName.equalsIgnoreCase("description")) {
198           textInput.setDescription(RssParseHelper.parseContentChildren(subElement));
199         } else if(elemName.equalsIgnoreCase("name")) {
200           textInput.setName(RssParseHelper.parseContentChildren(subElement));
201         } else if(elemName.equalsIgnoreCase("link")) {
202           textInput.setLink(RssParseHelper.parseContentURL(subElement));
203         }
204       }
205     }
206     return textInput;
207   }
208   
209   /**
210    * Parses a <tt>image</tt> element
211    */

212   protected RssImage parseImage(Element JavaDoc textInputElement) throws RssParseException {
213     RssImage image = new RssImage();
214     NodeList JavaDoc subNodes = textInputElement.getChildNodes();
215     for(int i=0; i < subNodes.getLength(); i++) {
216       if(subNodes.item(i).getNodeType() == Node.ELEMENT_NODE) {
217         Element JavaDoc subElement = (Element JavaDoc)subNodes.item(i);
218         String JavaDoc elemName = subElement.getNodeName();
219         if(elemName.equalsIgnoreCase("title")) {
220           image.setTitle(RssParseHelper.parseContentChildren(subElement));
221         } else if(elemName.equalsIgnoreCase("url")) {
222           image.setURL(RssParseHelper.parseContentURL(subElement));
223         } else if(elemName.equalsIgnoreCase("title")) {
224           image.setTitle(RssParseHelper.parseContentChildren(subElement));
225         } else if(elemName.equalsIgnoreCase("link")) {
226           image.setLink(RssParseHelper.parseContentURL(subElement));
227         } else if(elemName.equalsIgnoreCase("description")) {
228           image.setDescription(RssParseHelper.parseContentChildren(subElement));
229         } else if(elemName.equalsIgnoreCase("width")) {
230           image.setWidth(RssParseHelper.parseContentInt(subElement, RssConstants.DEFAULT_IMAGE_WIDTH));
231         } else if(elemName.equalsIgnoreCase("height")) {
232           image.setHeight(RssParseHelper.parseContentInt(subElement, RssConstants.DEFAULT_IMAGE_HEIGHT));
233         }
234       }
235     }
236     return image;
237   }
238
239   /**
240    * Parses a <tt>item</tt> element
241    */

242   protected RssItem parseItem(Element JavaDoc itemElement) throws RssParseException {
243     RssItem item = new RssItem();
244     NodeList JavaDoc subNodes = itemElement.getChildNodes();
245     for(int i=0; i < subNodes.getLength(); i++) {
246       if(subNodes.item(i).getNodeType() == Node.ELEMENT_NODE) {
247         Element JavaDoc subElement = (Element JavaDoc)subNodes.item(i);
248         String JavaDoc elemName = subElement.getNodeName();
249         if(elemName.equalsIgnoreCase("title")) {
250           item.setTitle(RssParseHelper.parseContentChildren(subElement));
251         } else if(elemName.equalsIgnoreCase("link")) {
252           item.setLink(RssParseHelper.parseContentURL(subElement));
253         } else if(elemName.equalsIgnoreCase("description")) {
254           item.setDescription(RssParseHelper.parseContentChildren(subElement));
255         } else if(elemName.equalsIgnoreCase("author")) {
256           item.setAuthor(RssParseHelper.parseContentChildren(subElement));
257         } else if(elemName.equalsIgnoreCase("category")) {
258           item.addCategory(RssParseHelper.parseContentChildren(subElement));
259         } else if(elemName.equalsIgnoreCase("comments")) {
260           item.setComments(RssParseHelper.parseContentChildren(subElement));
261         } else if(elemName.equalsIgnoreCase("enclosure")) {
262           item.setEnclosure(this.parseEnclosure(subElement));
263         } else if(elemName.equalsIgnoreCase("guid")) {
264           item.setGuid(this.parseGuid(subElement));
265         } else if(elemName.equalsIgnoreCase("pubDate")) {
266           item.setPubDate(RssParseHelper.parseContentDate(subElement));
267         }
268       }
269     }
270     return item;
271   }
272
273   /**
274    * Parses a <tt>enclosure</tt> element
275    */

276   protected RssEnclosure parseEnclosure(Element JavaDoc enclosureElement) throws RssParseException {
277     RssEnclosure enclosure = new RssEnclosure();
278     enclosure.setURL(RssParseHelper.parseURL(enclosureElement.getAttribute("url")));
279     try {
280       enclosure.setLength(Long.parseLong(enclosureElement.getAttribute("length")));
281     } catch(NumberFormatException JavaDoc e) {
282       throw new RssParseException("Illegal length entered for enclosure: " + enclosureElement.getAttribute("length"));
283     }
284     enclosure.setType(enclosureElement.getAttribute("type"));
285     return enclosure;
286   }
287   
288   /**
289    * Parses a <tt>guid</tt> element
290    */

291   protected RssGuid parseGuid(Element JavaDoc guidElement) throws RssParseException {
292     RssGuid guid = new RssGuid();
293     String JavaDoc permaLink = guidElement.getAttribute("isPermaLink");
294     if(permaLink != null) {
295       guid.setIsPermaLink(permaLink.equalsIgnoreCase("true"));
296     }
297     guid.setGuid(RssParseHelper.parseContentChildren(guidElement));
298     return guid;
299   }
300   
301   
302   // --------------------------------------------------------------------------
303
// -- Property access methods ---------------------------------------------
304
// --------------------------------------------------------------------------
305

306   /**
307    * Sets the <code>DocumentBuilder</code> used for XML parsing
308    */

309   protected void setDocumentBuilder(DocumentBuilder JavaDoc builder) {
310     this.myDocumentBuilder = builder;
311   }
312   
313   /**
314    * Gets the <code>DocumentBuilder</code> used for XML parsing
315    */

316   protected DocumentBuilder JavaDoc getDocumentBuilder() {
317     return this.myDocumentBuilder;
318   }
319   
320 }
Popular Tags