1 19 20 package org.jperdian.rss2; 21 22 import java.io.BufferedInputStream ; 23 import java.io.IOException ; 24 import java.io.InputStream ; 25 import java.net.URL ; 26 import java.util.List ; 27 28 import javax.xml.parsers.DocumentBuilder ; 29 import javax.xml.parsers.DocumentBuilderFactory ; 30 import javax.xml.parsers.ParserConfigurationException ; 31 32 import org.jperdian.rss2.dom.RssChannel; 33 import org.jperdian.rss2.dom.RssCloud; 34 import org.jperdian.rss2.dom.RssConstants; 35 import org.jperdian.rss2.dom.RssEnclosure; 36 import org.jperdian.rss2.dom.RssGuid; 37 import org.jperdian.rss2.dom.RssImage; 38 import org.jperdian.rss2.dom.RssItem; 39 import org.jperdian.rss2.dom.RssTextInput; 40 import org.w3c.dom.Document ; 41 import org.w3c.dom.Element ; 42 import org.w3c.dom.Node ; 43 import org.w3c.dom.NodeList ; 44 import org.xml.sax.SAXException ; 45 46 52 53 public class RssParser { 54 55 private DocumentBuilder myDocumentBuilder = null; 56 57 public RssParser() { 58 try { 59 DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); 60 this.setDocumentBuilder(builder); 61 } catch(ParserConfigurationException e) { 62 throw new RuntimeException ("Cannot create DocumentBuilder", e); 63 } 64 } 65 66 70 public RssChannel parse(URL sourceURL, RssChannel targetChannel) throws RssException { 71 try { 72 InputStream inStream = new BufferedInputStream (sourceURL.openStream()); 73 Document document = this.getDocumentBuilder().parse(inStream); 74 inStream.close(); 75 return this.parse(document, targetChannel); 76 } catch(SAXException e) { 77 throw new RssParseException("Illegal XML format \n[" + e.getMessage() + "]", e); 78 } catch(IOException e) { 79 throw new RssException("Cannot connect to source URL: " + sourceURL, e); 80 } 81 } 82 83 87 public RssChannel parse(Document xmlDocument, RssChannel targetChannel) throws RssParseException { 88 RssChannel resultChannel = null; 89 Element rootElement = xmlDocument.getDocumentElement(); 90 NodeList rootSubNodes = rootElement.getChildNodes(); 91 for(int i=0; i < rootSubNodes.getLength(); i++) { 92 Node subNode = rootSubNodes.item(i); 93 String subNodeName = subNode.getNodeName(); 94 if(subNode.getNodeType() == Node.ELEMENT_NODE && subNodeName.equalsIgnoreCase("channel")) { 95 resultChannel = this.parseChannel((Element )rootSubNodes.item(i), targetChannel); 96 } else if(subNodeName.equalsIgnoreCase("item")) { 97 RssItem item = this.parseItem((Element )subNode); 98 item.setSource(resultChannel); 99 resultChannel.addItem(item); 100 } 101 } 102 if(resultChannel != null) { 103 return resultChannel; 104 } else { 105 throw new RssParseException("No channel element found in message"); 106 } 107 } 108 109 113 protected RssChannel parseChannel(Element channelElement, RssChannel channel) throws RssParseException { 114 List itemList = channel.getItemList(); 115 if(itemList != null && itemList.size() > 0) { 116 itemList.clear(); 117 } 118 NodeList subNodes = channelElement.getChildNodes(); 119 for(int i=0; i < subNodes.getLength(); i++) { 120 if(subNodes.item(i).getNodeType() == Node.ELEMENT_NODE) { 121 Element subElement = (Element )subNodes.item(i); 122 String elemName = subElement.getNodeName(); 123 if(elemName.equalsIgnoreCase("title")) { 124 channel.setTitle(RssParseHelper.parseContentChildren(subElement)); 125 } else if(elemName.equalsIgnoreCase("link")) { 126 channel.setLink(RssParseHelper.parseContentURL(subElement)); 127 } else if(elemName.equalsIgnoreCase("description")) { 128 channel.setDescription(RssParseHelper.parseContentChildren(subElement)); 129 } else if(elemName.equalsIgnoreCase("copyright")) { 130 channel.setCopyright(RssParseHelper.parseContentChildren(subElement)); 131 } else if(elemName.equalsIgnoreCase("managingEditor")) { 132 channel.setManagingEditor(RssParseHelper.parseContentChildren(subElement)); 133 } else if(elemName.equalsIgnoreCase("webMaster")) { 134 channel.setWebmaster(RssParseHelper.parseContentChildren(subElement)); 135 } else if(elemName.equalsIgnoreCase("pubDate")) { 136 channel.setPubDate(RssParseHelper.parseContentDate(subElement)); 137 } else if(elemName.equalsIgnoreCase("lastBuildDate")) { 138 channel.setLastBuildDate(RssParseHelper.parseContentDate(subElement)); 139 } else if(elemName.equalsIgnoreCase("category")) { 140 channel.addCategory(RssParseHelper.parseContentChildren(subElement)); 141 } else if(elemName.equalsIgnoreCase("generator")) { 142 channel.setGenerator(RssParseHelper.parseContentChildren(subElement)); 143 } else if(elemName.equalsIgnoreCase("docs")) { 144 channel.setDocs(RssParseHelper.parseContentURL(subElement)); 145 } else if(elemName.equalsIgnoreCase("cloud")) { 146 channel.setCloud(this.parseCloud(subElement)); 147 } else if(elemName.equalsIgnoreCase("ttl")) { 148 channel.setTtl(RssParseHelper.parseContentInt(subElement)); 149 } else if(elemName.equalsIgnoreCase("image")) { 150 channel.setImage(this.parseImage(subElement)); 151 } else if(elemName.equalsIgnoreCase("rating")) { 152 channel.setRating(RssParseHelper.parseContentChildren(subElement)); 153 } else if(elemName.equalsIgnoreCase("textInput")) { 154 channel.setTextInput(this.parseTextInput(subElement)); 155 } else if(elemName.equalsIgnoreCase("skipHours")) { 156 channel.addSkipHour(RssParseHelper.parseContentInt(subElement)); 157 } else if(elemName.equalsIgnoreCase("skipDays")) { 158 channel.addSkipDay(RssParseHelper.parseContentChildren(subElement)); 159 } else if(elemName.equalsIgnoreCase("item")) { 160 RssItem item = this.parseItem(subElement); 161 item.setSource(channel); 162 channel.addItem(item); 163 } 164 } 165 } 166 return channel; 167 } 168 169 172 protected RssCloud parseCloud(Element cloudElement) throws RssParseException { 173 RssCloud cloud = new RssCloud(); 174 cloud.setDomain(cloudElement.getAttribute("domain")); 175 try { 176 cloud.setPort(Integer.parseInt(cloudElement.getAttribute("port"))); 177 } catch(NumberFormatException e) { 178 throw new RssParseException("Illegal port entered for cloud: " + cloudElement.getAttribute("port")); 179 } 180 cloud.setPath(cloudElement.getAttribute("path")); 181 cloud.setRegisterProcedure(cloudElement.getAttribute("registerProcedure")); 182 return cloud; 183 } 184 185 188 protected RssTextInput parseTextInput(Element textInputElement) throws RssParseException { 189 RssTextInput textInput = new RssTextInput(); 190 NodeList subNodes = textInputElement.getChildNodes(); 191 for(int i=0; i < subNodes.getLength(); i++) { 192 if(subNodes.item(i).getNodeType() == Node.ELEMENT_NODE) { 193 Element subElement = (Element )subNodes.item(i); 194 String elemName = subElement.getNodeName(); 195 if(elemName.equalsIgnoreCase("title")) { 196 textInput.setTitle(RssParseHelper.parseContentChildren(subElement)); 197 } else if(elemName.equalsIgnoreCase("description")) { 198 textInput.setDescription(RssParseHelper.parseContentChildren(subElement)); 199 } else if(elemName.equalsIgnoreCase("name")) { 200 textInput.setName(RssParseHelper.parseContentChildren(subElement)); 201 } else if(elemName.equalsIgnoreCase("link")) { 202 textInput.setLink(RssParseHelper.parseContentURL(subElement)); 203 } 204 } 205 } 206 return textInput; 207 } 208 209 212 protected RssImage parseImage(Element textInputElement) throws RssParseException { 213 RssImage image = new RssImage(); 214 NodeList subNodes = textInputElement.getChildNodes(); 215 for(int i=0; i < subNodes.getLength(); i++) { 216 if(subNodes.item(i).getNodeType() == Node.ELEMENT_NODE) { 217 Element subElement = (Element )subNodes.item(i); 218 String elemName = subElement.getNodeName(); 219 if(elemName.equalsIgnoreCase("title")) { 220 image.setTitle(RssParseHelper.parseContentChildren(subElement)); 221 } else if(elemName.equalsIgnoreCase("url")) { 222 image.setURL(RssParseHelper.parseContentURL(subElement)); 223 } else if(elemName.equalsIgnoreCase("title")) { 224 image.setTitle(RssParseHelper.parseContentChildren(subElement)); 225 } else if(elemName.equalsIgnoreCase("link")) { 226 image.setLink(RssParseHelper.parseContentURL(subElement)); 227 } else if(elemName.equalsIgnoreCase("description")) { 228 image.setDescription(RssParseHelper.parseContentChildren(subElement)); 229 } else if(elemName.equalsIgnoreCase("width")) { 230 image.setWidth(RssParseHelper.parseContentInt(subElement, RssConstants.DEFAULT_IMAGE_WIDTH)); 231 } else if(elemName.equalsIgnoreCase("height")) { 232 image.setHeight(RssParseHelper.parseContentInt(subElement, RssConstants.DEFAULT_IMAGE_HEIGHT)); 233 } 234 } 235 } 236 return image; 237 } 238 239 242 protected RssItem parseItem(Element itemElement) throws RssParseException { 243 RssItem item = new RssItem(); 244 NodeList subNodes = itemElement.getChildNodes(); 245 for(int i=0; i < subNodes.getLength(); i++) { 246 if(subNodes.item(i).getNodeType() == Node.ELEMENT_NODE) { 247 Element subElement = (Element )subNodes.item(i); 248 String elemName = subElement.getNodeName(); 249 if(elemName.equalsIgnoreCase("title")) { 250 item.setTitle(RssParseHelper.parseContentChildren(subElement)); 251 } else if(elemName.equalsIgnoreCase("link")) { 252 item.setLink(RssParseHelper.parseContentURL(subElement)); 253 } else if(elemName.equalsIgnoreCase("description")) { 254 item.setDescription(RssParseHelper.parseContentChildren(subElement)); 255 } else if(elemName.equalsIgnoreCase("author")) { 256 item.setAuthor(RssParseHelper.parseContentChildren(subElement)); 257 } else if(elemName.equalsIgnoreCase("category")) { 258 item.addCategory(RssParseHelper.parseContentChildren(subElement)); 259 } else if(elemName.equalsIgnoreCase("comments")) { 260 item.setComments(RssParseHelper.parseContentChildren(subElement)); 261 } else if(elemName.equalsIgnoreCase("enclosure")) { 262 item.setEnclosure(this.parseEnclosure(subElement)); 263 } else if(elemName.equalsIgnoreCase("guid")) { 264 item.setGuid(this.parseGuid(subElement)); 265 } else if(elemName.equalsIgnoreCase("pubDate")) { 266 item.setPubDate(RssParseHelper.parseContentDate(subElement)); 267 } 268 } 269 } 270 return item; 271 } 272 273 276 protected RssEnclosure parseEnclosure(Element enclosureElement) throws RssParseException { 277 RssEnclosure enclosure = new RssEnclosure(); 278 enclosure.setURL(RssParseHelper.parseURL(enclosureElement.getAttribute("url"))); 279 try { 280 enclosure.setLength(Long.parseLong(enclosureElement.getAttribute("length"))); 281 } catch(NumberFormatException e) { 282 throw new RssParseException("Illegal length entered for enclosure: " + enclosureElement.getAttribute("length")); 283 } 284 enclosure.setType(enclosureElement.getAttribute("type")); 285 return enclosure; 286 } 287 288 291 protected RssGuid parseGuid(Element guidElement) throws RssParseException { 292 RssGuid guid = new RssGuid(); 293 String permaLink = guidElement.getAttribute("isPermaLink"); 294 if(permaLink != null) { 295 guid.setIsPermaLink(permaLink.equalsIgnoreCase("true")); 296 } 297 guid.setGuid(RssParseHelper.parseContentChildren(guidElement)); 298 return guid; 299 } 300 301 302 306 309 protected void setDocumentBuilder(DocumentBuilder builder) { 310 this.myDocumentBuilder = builder; 311 } 312 313 316 protected DocumentBuilder getDocumentBuilder() { 317 return this.myDocumentBuilder; 318 } 319 320 } | Popular Tags |