RssParseHelper


1   /**
2    * RSS framework and reader
3    * Copyright (C) 2004 Christian Robert
4    * 
5    * This library is free software; you can redistribute it and/or
6    * modify it under the terms of the GNU Lesser General Public
7    * License as published by the Free Software Foundation; either
8    * version 2.1 of the License, or (at your option) any later version.
9    * 
10   * This library is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   * Lesser General Public License for more details.
14   * 
15   * You should have received a copy of the GNU Lesser General Public
16   * License along with this library; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   */
19  
20  package org.jperdian.rss2;
21  
22  import java.net.MalformedURLException  ;
23  import java.net.URL  ;
24  import java.text.DateFormat  ;
25  import java.text.ParseException  ;
26  import java.text.SimpleDateFormat  ;
27  import java.util.Date  ;
28  import java.util.Locale  ;
29  
30  import org.w3c.dom.Element  ;
31  import org.w3c.dom.NamedNodeMap  ;
32  import org.w3c.dom.Node  ;
33  import org.w3c.dom.NodeList  ;
34  
35  /**
36   * Some static methods for easy access to parse additions
37   * 
38   * @author Christian Robert
39   */
40  
41  public class RssParseHelper {
42    
43    private static final DateFormat   DF      = new SimpleDateFormat  ("EEE, dd MMM yyyy HH:mm:ss Z", Locale.US);
44    
45    /**
46     * Parses the given source <code>String</code> into a <code>URL</code>. If
47     * no value has been entered <code>null</code> will be returned
48     * @exception RssParseException
49     *   thrown if the given URL is not valid 
50     */
51    public static URL   parseURL(String   sourceString) throws RssParseException {
52      if(sourceString == null || sourceString.length() < 1) {
53        return null;
54      } else {
55        try {
56          return new URL  (sourceString);
57        } catch(MalformedURLException   e) {
58          throw new RssParseException("Illegal URL found: " + sourceString);
59        }
60      }
61    }
62    
63    /**
64     * Parse the given node into the resulting content
65     * @param info
66     *   the currently available <code>RuntimeInfo</code>
67     * @param node
68     *   the node to be parsed
69     * @return
70     *   the content that has been generated during the parse process
71     */
72    private static String   parseContent(Node   node)  {
73  
74      StringBuffer   buffer = new StringBuffer  ();
75      short nodeType      = node.getNodeType();
76  
77      switch(nodeType) {
78  
79        case Node.CDATA_SECTION_NODE:
80        case Node.TEXT_NODE:
81          String   value = node.getNodeValue();
82          if(value.length() > 0) {
83            buffer.append(value);
84          }
85          break;
86  
87        case Node.ELEMENT_NODE:
88          Element   element = (Element  )node;
89  
90        buffer.append("<").append(element.getNodeName());
91  
92        // Add all attributes
93        NamedNodeMap   attributes = element.getAttributes();
94        for(int i=0; i < attributes.getLength(); i++) {
95          buffer.append(" ").append(attributes.item(i).getNodeName()).append("=\"");
96          buffer.append(attributes.item(i).getNodeValue()).append("\"");
97        }
98  
99        if(element.hasChildNodes()) {
100         buffer.append(">");
101         buffer.append(RssParseHelper.parseContentChildren(element));
102         buffer.append("</").append(element.getNodeName()).append(">");
103       } else {
104 
105         /*
106          * Several browsers have two problems in interpreting correct XHTML:
107          * <br></br> is interpreted as a double <br> and two breaks are inserted
108          * where only one should be.
109          * On the other hand a <textarea /> is interpretated as only the opening
110          * tag <textarea> and everything standing after <textarea /> will be
111          * interpreted as if it was inside the textarea.
112          * So it has to be checked wheter the tag itself is an empty-tag
113          */
114 
115         String   nodeName = element.getNodeName();
116         if(nodeName.equalsIgnoreCase("br") ||
117            nodeName.equalsIgnoreCase("hr") ||
118            nodeName.equalsIgnoreCase("input") ||
119            nodeName.equalsIgnoreCase("meta") ||
120            nodeName.equalsIgnoreCase("frame")
121           ) {
122           buffer.append(">");
123         } else {
124           buffer.append("></").append(element.getNodeName()).append(">");
125         }
126       }
127     }
128     return buffer.toString();
129   }
130 
131   /**
132    * Parse the content of the children from the specified node and return
133    * it as String.
134    * @param node the content which children should be parsed
135    * @return the parse result content
136    */
137   public static String   parseContentChildren(Node   node)  {
138     StringBuffer   result = new StringBuffer  ();
139     if(node.hasChildNodes()) {
140       NodeList   children = node.getChildNodes();
141       for(int i=0; i < children.getLength(); i++) {
142         result.append(RssParseHelper.parseContent(children.item(i)));
143       }
144     }
145     return result.toString().trim();
146   }
147 
148   /**
149    * Parses the content of the given element and formats it as date, or
150    * returns <code>null</code> if no content could be read
151    */
152   public static Date   parseContentDate(Element   node) throws RssParseException {
153     String   content    = RssParseHelper.parseContentChildren(node);
154     if(content.length() < 1) {
155       return null;
156     } else {
157       try {
158         return DF.parse(content);
159       } catch(ParseException   e) {
160 //        throw new IllegalArgumentException("Illegal date: " + content);        
161         return null;
162       }
163     }
164   }
165   
166   /**
167    * Parses the content of the given element and formats it as number, or
168    * returns <code>0</code> if no content could be read
169    */
170   public static int parseContentInt(Element   node) throws RssParseException {
171     return RssParseHelper.parseContentInt(node, 0);
172   }
173 
174   /**
175    * Parses the content of the given element and formats it as number, or
176    * returns the default value if no content could be read
177    */
178   public static int parseContentInt(Element   node, int defaultValue) throws RssParseException {
179     String   content   = RssParseHelper.parseContentChildren(node);
180     if(content.length() < 1) {
181       return defaultValue;
182     } else {
183       try {
184         return Integer.parseInt(content);
185       } catch(NumberFormatException   e) {
186         throw new RssParseException("Illegal integer value found: " + content);
187       }
188     }
189   }
190   
191   /**
192    * Parses the content of the given element and formats it as URL
193    */
194   public static URL   parseContentURL(Element   node) throws RssParseException {
195     String   content   = RssParseHelper.parseContentChildren(node);
196     return RssParseHelper.parseURL(content);
197   }
198   
199 }
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags