1 26 27 29 package de.nava.informa.parsers; 30 31 import java.io.File ; 32 import java.io.IOException ; 33 import java.io.InputStream ; 34 import java.io.Reader ; 35 import java.net.URL ; 36 import java.util.Iterator ; 37 38 import org.apache.commons.logging.Log; 39 import org.apache.commons.logging.LogFactory; 40 import org.jdom.Document; 41 import org.jdom.Element; 42 import org.jdom.JDOMException; 43 import org.jdom.input.SAXBuilder; 44 import org.xml.sax.InputSource ; 45 46 import de.nava.informa.core.ChannelBuilderIF; 47 import de.nava.informa.core.ItemIF; 48 import de.nava.informa.core.ChannelIF; 49 import de.nava.informa.core.ParseException; 50 import de.nava.informa.core.UnsupportedFormatException; 51 53 62 public class FeedParser { 63 64 private static Log logger = LogFactory.getLog(FeedParser.class); 65 66 public static ChannelIF parse(ChannelBuilderIF cBuilder, URL aURL) 67 throws IOException , ParseException { 68 return parse(cBuilder, new InputSource (aURL.toExternalForm()), aURL); 69 } 70 71 public static ChannelIF parse(ChannelBuilderIF cBuilder, String url) 72 throws IOException , ParseException { 73 URL aURL = null; 74 try { 75 aURL = new URL (url); 76 } catch (java.net.MalformedURLException e) { 77 logger.warn("Could not create URL for " + url); 78 } 79 return parse(cBuilder, new InputSource (url), aURL); 80 } 81 82 public static ChannelIF parse(ChannelBuilderIF cBuilder, Reader reader) 83 throws IOException , ParseException { 84 return parse(cBuilder, new InputSource (reader), null); 85 } 86 87 public static ChannelIF parse(ChannelBuilderIF cBuilder, InputStream stream) 88 throws IOException , ParseException { 89 return parse(cBuilder, new InputSource (stream), null); 90 } 91 92 public static ChannelIF parse(ChannelBuilderIF cBuilder, File aFile) 93 throws IOException , ParseException { 94 URL aURL = null; 95 try { 96 aURL = aFile.toURL(); 97 } catch (java.net.MalformedURLException e) { 98 throw new IOException ("File " + aFile + " had invalid URL " + 99 "representation."); 100 } 101 return parse(cBuilder, new InputSource (aURL.toExternalForm()), aURL); 102 } 103 104 public static ChannelIF parse(ChannelBuilderIF cBuilder, 105 InputSource inpSource, 106 URL baseLocation) 107 throws IOException , ParseException { 108 SAXBuilder saxBuilder = new SAXBuilder(false); 110 try { 113 Document doc = saxBuilder.build(inpSource); 114 ChannelIF channel = parse(cBuilder, doc); 115 channel.setLocation(baseLocation); 116 return channel; 117 } catch (JDOMException e) { 118 throw new ParseException(e); 119 } 120 } 121 122 126 private static synchronized ChannelIF parse(ChannelBuilderIF cBuilder, 127 Document doc) 128 throws ParseException { 129 130 if (cBuilder == null) { 131 throw new RuntimeException ("Without builder no channel can " + 132 "be created."); 133 } 134 logger.debug("start parsing."); 135 Element root = doc.getRootElement(); 137 String rootElement = root.getName().toLowerCase(); 138 if (rootElement.startsWith("rss")) { 140 String rssVersion = root.getAttribute("version").getValue(); 141 if (rssVersion.indexOf("0.91") >= 0) { 142 logger.info("Channel uses RSS root element (Version 0.91)."); 143 return RSS_0_91_Parser.parse(cBuilder, root); 144 } else if (rssVersion.indexOf("0.92") >= 0) { 145 logger.info("Channel uses RSS root element (Version 0.92)."); 146 return RSS_0_91_Parser.parse(cBuilder, root); 149 } else if (rootElement.indexOf("0.93") >= 0) { 150 logger.info("Channel uses RSS root element (Version 0.93)."); 151 logger.warn("RSS 0.93 not fully supported yet, fall back to 0.91."); 152 } else if (rootElement.indexOf("0.94") >= 0) { 154 logger.info("Channel uses RSS root element (Version 0.94)."); 155 logger.warn("RSS 0.94 not fully supported yet, will use RSS 2.0"); 156 return RSS_2_0_Parser.parse(cBuilder, root); 158 } else if (rssVersion.indexOf("2.0") >= 0 || rssVersion.equals("2")) { 159 logger.info("Channel uses RSS root element (Version 2.0)."); 160 return RSS_2_0_Parser.parse(cBuilder, root); 161 } else { 162 throw new UnsupportedFormatException("Unsupported RSS version [" + 163 rssVersion + "]."); 164 } 165 } else if (rootElement.indexOf("rdf") >= 0) { 166 return RSS_1_0_Parser.parse(cBuilder, root); 167 } else if (rootElement.indexOf("feed") >= 0) { 168 String feedVersion = root.getAttribute("version").getValue(); 169 if (feedVersion.indexOf("0.1") >= 0 || feedVersion.indexOf("0.2") >= 0) { 170 logger.info("Channel uses feed root element (Version " + feedVersion + ")."); 171 logger.warn("This atom version is not really supported yet, assume Atom 0.3 format"); 172 return Atom_0_3_Parser.parse(cBuilder, root); 173 } else if (feedVersion.indexOf("0.3") >= 0) { 174 logger.info("Channel uses feed root element (Version 0.3)."); 175 return Atom_0_3_Parser.parse(cBuilder, root); 176 } else { 177 throw new UnsupportedFormatException("Unsupported feed version [" + 178 feedVersion + "]."); 179 } 180 } 181 182 throw new UnsupportedFormatException("Unsupported root element [" + 184 rootElement + "]."); 185 } 186 187 188 190 public static void main(String args[]) throws IOException , ParseException { 191 192 if (args.length < 2) { 193 System.err.println("Usage: java " + FeedParser.class.getName() + 194 " [-f <filename> | -u <url>]"); 195 System.exit(1); 196 } 197 198 String option = args[0]; 199 String data = args[1]; 200 201 ChannelIF channel = null; 202 203 ChannelBuilderIF builder = new de.nava.informa.impl.basic.ChannelBuilder(); 204 if (option.trim().startsWith("-f")) { 205 channel = FeedParser.parse(builder, new File (data)); 206 } else { 207 channel = FeedParser.parse(builder, new URL (data)); 208 } 209 210 System.out.println("Channel format: " + channel.getFormat().toString()); 211 System.out.println(channel); 212 System.out.println("containing " + channel.getItems().size() + " items"); 213 Iterator items = channel.getItems().iterator(); 214 while (items.hasNext()) { 215 ItemIF item = (ItemIF) items.next(); 216 System.out.println(" - " + item); 217 } 218 } 219 } 220 | Popular Tags |