1 26 package de.nava.informa.parsers; 27 28 import java.net.URL ; 29 import java.util.*; 30 31 import org.apache.commons.logging.Log; 32 import org.apache.commons.logging.LogFactory; 33 import org.jdom.Attribute; 34 import org.jdom.Element; 35 import org.jdom.Namespace; 36 37 import de.nava.informa.core.*; 38 import de.nava.informa.utils.ParserUtils; 39 import de.nava.informa.impl.basic.ChannelBuilder; 40 41 47 class RSS_2_0_Parser { 48 49 private static Log logger = LogFactory.getLog(RSS_2_0_Parser.class); 50 51 private static CategoryIF getCategoryList(CategoryIF parent, String title, Hashtable children) { 52 55 ChannelBuilder builder = new ChannelBuilder(); 57 58 CategoryIF cat = builder.createCategory(parent, title); 60 Enumeration itChild = children.keys(); 62 while (itChild.hasMoreElements()) { 63 String childKey = (String )itChild.nextElement(); 64 getCategoryList(cat, childKey, (Hashtable)children.get(childKey)); 66 } 67 return cat; 68 } 69 70 static ChannelIF parse(ChannelBuilderIF cBuilder, Element root) 71 throws ParseException { 72 if (cBuilder == null) { 73 throw new RuntimeException ( 74 "Without builder no channel can " + "be created."); 75 } 76 Date dateParsed = new Date(); 77 logger.debug("start parsing."); 78 79 Namespace defNS = ParserUtils.getDefaultNS(root); 80 if (defNS == null) { 81 defNS = Namespace.NO_NAMESPACE; 82 logger.info("No default namespace found."); 83 } 84 Namespace dcNS = ParserUtils.getNamespace(root, "dc"); 85 if (dcNS == null) { 87 dcNS = defNS; 88 } 89 90 Element channel = root.getChild("channel", defNS); 92 if (channel == null) { 93 logger.warn("Channel element could not be retrieved from feed."); 94 throw new ParseException("No channel element found in feed."); 95 } 96 97 99 ChannelIF chnl = 101 cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); 102 103 chnl.setFormat(ChannelFormat.RSS_2_0); 105 106 chnl.setDescription(channel.getChildTextTrim("description", defNS)); 108 109 chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS))); 111 112 chnl.setLanguage(channel.getChildTextTrim("language", defNS)); 114 115 List items = channel.getChildren("item", defNS); 117 Iterator i = items.iterator(); 118 while (i.hasNext()) { 119 Element item = (Element) i.next(); 120 121 Element elTitle = item.getChild("title", defNS); 123 String strTitle = "<No Title>"; 124 if (elTitle != null) { 125 strTitle = elTitle.getTextTrim(); 126 } 127 if (logger.isDebugEnabled()) { 128 logger.debug("Item element found (" + strTitle + ")."); 129 } 130 131 Element elLink = item.getChild("link", defNS); 133 String strLink = ""; 134 if (elLink != null) { 135 strLink = elLink.getTextTrim(); 136 } 137 138 Element elDesc = item.getChild("description", defNS); 140 String strDesc = ""; 141 if (elDesc != null) { 142 strDesc = elDesc.getTextTrim(); 143 } 144 145 ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, 147 ParserUtils.getURL(strLink)); 148 149 Element elSubject = item.getChild("subject", defNS); 151 if (elSubject == null) { 152 elSubject = item.getChild("subject", dcNS); 154 } 155 if (elSubject != null) { 156 rssItem.setSubject(elSubject.getTextTrim()); 157 } 158 159 List listCategory = item.getChildren("category", defNS); 162 if (listCategory.size() < 1) { 163 listCategory = item.getChildren("category", dcNS); 165 } 166 if (listCategory.size() > 0) { 167 Hashtable catTable = new Hashtable(); 168 169 Iterator itCat = listCategory.iterator(); 171 while (itCat.hasNext()) { 172 Hashtable currTable = catTable; 173 Element elCategory = (Element)itCat.next(); 174 String [] titles = elCategory.getTextNormalize().split("/"); 176 for (int x=0; x<titles.length; x++) { 177 if (currTable.containsKey(titles[x]) == false) { 179 currTable.put(titles[x], new Hashtable()); 181 } 182 currTable = (Hashtable)currTable.get(titles[x]); 184 } 185 } 186 ArrayList catList = new ArrayList(); 187 Enumeration enumCategories = catTable.keys(); 189 while (enumCategories.hasMoreElements()) { 190 String key = (String )enumCategories.nextElement(); 191 CategoryIF cat = getCategoryList(null, key, (Hashtable)catTable.get(key)); 193 catList.add(cat); 194 } 195 if (catList.size() > 0) { 196 rssItem.setCategories(catList); 198 } 199 } 200 201 Element elDate = item.getChild("pubDate", defNS); 203 if (elDate == null) { 204 elDate = item.getChild("date", dcNS); 206 } 207 if (elDate != null) { 208 rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim())); 209 } 210 211 rssItem.setFound(dateParsed); 212 213 Element elAuthor = item.getChild("author", defNS); 215 if (elAuthor == null) { 216 elAuthor = item.getChild("creator", dcNS); 218 } 219 if (elAuthor != null) 220 rssItem.setCreator(elAuthor.getTextTrim()); 221 222 Element elComments = item.getChild("comments", defNS); 224 String strComments = ""; 225 if (elComments != null) { 226 strComments = elComments.getTextTrim(); 227 } 228 rssItem.setComments(ParserUtils.getURL(strComments)); 229 230 Element elGuid = item.getChild("guid", defNS); 232 if (elGuid != null) { 233 String guidUrl = elGuid.getTextTrim(); 234 if (guidUrl != null) { 235 boolean permaLink = true; 236 Attribute permaLinkAttribute = elGuid.getAttribute("isPermaLink", defNS); 237 if (permaLinkAttribute != null) { 238 String permaLinkStr = permaLinkAttribute.getValue(); 239 if (permaLinkStr != null) { 240 permaLink = Boolean.valueOf(permaLinkStr).booleanValue(); 241 } 242 } 243 ItemGuidIF itemGuid = 244 cBuilder.createItemGuid(rssItem, guidUrl, permaLink); 245 rssItem.setGuid(itemGuid); 246 } 247 } 248 249 Element elSource = item.getChild("source", defNS); 251 if (elSource != null) { 252 String sourceName = elSource.getTextTrim(); 253 Attribute sourceAttribute = elSource.getAttribute("url", defNS); 254 if (sourceAttribute != null) { 255 String sourceLocation = sourceAttribute.getValue().trim(); 256 ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, 257 sourceLocation, null); 258 rssItem.setSource(itemSource); 259 } 260 } 261 262 Element elEnclosure = item.getChild("enclosure", defNS); 264 if (elEnclosure != null) { 265 URL location = null; 266 String type = null; 267 int length = -1; 268 Attribute urlAttribute = elEnclosure.getAttribute("url", defNS); 269 if (urlAttribute != null) { 270 location = ParserUtils.getURL(urlAttribute.getValue().trim()); 271 } 272 Attribute typeAttribute = elEnclosure.getAttribute("type", defNS); 273 if (typeAttribute != null) { 274 type = typeAttribute.getValue().trim(); 275 } 276 Attribute lengthAttribute = elEnclosure.getAttribute("length", defNS); 277 if (lengthAttribute != null) { 278 try { 279 length = Integer.parseInt(lengthAttribute.getValue().trim()); 280 } catch (NumberFormatException e) { 281 logger.warn(e); 282 } 283 } 284 ItemEnclosureIF itemEnclosure = 285 cBuilder.createItemEnclosure(rssItem, location, type, length); 286 rssItem.setEnclosure(itemEnclosure); 287 } 288 } 289 290 Element image = channel.getChild("image", defNS); 292 if (image != null) { 293 ImageIF rssImage = 294 cBuilder.createImage( 295 image.getChildTextTrim("title", defNS), 296 ParserUtils.getURL(image.getChildTextTrim("url", defNS)), 297 ParserUtils.getURL(image.getChildTextTrim("link", defNS))); 298 Element imgWidth = image.getChild("width", defNS); 299 if (imgWidth != null) { 300 try { 301 rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); 302 } catch (NumberFormatException e) { 303 logger.warn("Error parsing width: " + e.getMessage()); 304 } 305 } 306 Element imgHeight = image.getChild("height", defNS); 307 if (imgHeight != null) { 308 try { 309 rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); 310 } catch (NumberFormatException e) { 311 logger.warn("Error parsing height: " + e.getMessage()); 312 } 313 } 314 Element imgDescr = image.getChild("description", defNS); 315 if (imgDescr != null) { 316 rssImage.setDescription(imgDescr.getTextTrim()); 317 } 318 chnl.setImage(rssImage); 319 } 320 321 Element txtinp = channel.getChild("textinput", defNS); 323 if (txtinp != null) { 324 TextInputIF rssTextInput = 325 cBuilder.createTextInput( 326 txtinp.getChildTextTrim("title", defNS), 327 txtinp.getChildTextTrim("description", defNS), 328 txtinp.getChildTextTrim("name", defNS), 329 ParserUtils.getURL(txtinp.getChildTextTrim("link", defNS))); 330 chnl.setTextInput(rssTextInput); 331 } 332 333 Element copyright = channel.getChild("copyright", defNS); 335 if (copyright != null) { 336 chnl.setCopyright(copyright.getTextTrim()); 337 } 338 339 Element rating = channel.getChild("rating", defNS); 341 if (rating != null) { 342 chnl.setRating(rating.getTextTrim()); 343 } 344 345 Element docs = channel.getChild("docs", defNS); 347 if (docs != null) { 348 chnl.setDocs(docs.getTextTrim()); 349 } 350 351 Element generator = channel.getChild("generator", defNS); 353 if (generator != null) { 354 chnl.setGenerator(generator.getTextTrim()); 355 } 356 357 Element ttl = channel.getChild("ttl", defNS); 359 if (ttl != null) { 360 chnl.setTtl(Integer.parseInt(ttl.getTextTrim())); 361 } 362 363 Element pubDate = channel.getChild("pubDate", defNS); 365 if (pubDate != null) { 366 chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim())); 367 } 368 369 Element lastBuildDate = channel.getChild("lastBuildDate", defNS); 371 if (lastBuildDate != null) { 372 chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim())); 373 } 374 375 List listCategory = channel.getChildren("category", defNS); 378 if (listCategory.size() < 1) { 379 listCategory = channel.getChildren("category", dcNS); 381 } 382 if (listCategory.size() > 0) { 383 Hashtable catTable = new Hashtable(); 384 Iterator itCat = listCategory.iterator(); 386 while (itCat.hasNext()) { 387 Hashtable currTable = catTable; 388 Element elCategory = (Element)itCat.next(); 389 String [] titles = elCategory.getTextNormalize().split("/"); 391 for (int x=0; x<titles.length; x++) { 392 if (currTable.containsKey(titles[x]) == false) { 394 currTable.put(titles[x], new Hashtable()); 396 } 397 currTable = (Hashtable)currTable.get(titles[x]); 399 } 400 } 401 ArrayList catList = new ArrayList(); 402 Enumeration enumCategories = catTable.keys(); 404 while (enumCategories.hasMoreElements()) { 405 String key = (String )enumCategories.nextElement(); 406 CategoryIF cat = getCategoryList(null, key, (Hashtable)catTable.get(key)); 408 catList.add(cat); 409 } 410 if (catList.size() > 0) { 411 chnl.setCategories(catList); 413 } 414 } 415 416 Element managingEditor = channel.getChild("managingEditor", defNS); 418 if (managingEditor != null) { 419 chnl.setCreator(managingEditor.getTextTrim()); 420 } 421 422 Element webMaster = channel.getChild("webMaster", defNS); 424 if (webMaster != null) { 425 chnl.setPublisher(webMaster.getTextTrim()); 426 } 427 428 Element cloud = channel.getChild("cloud", defNS); 430 if (cloud != null) { 431 String _port = cloud.getAttributeValue("port", defNS); 432 int port = -1; 433 if (_port != null) { 434 try { 435 port = Integer.parseInt(_port); 436 } catch (NumberFormatException e) { 437 logger.warn(e); 438 } 439 } 440 chnl.setCloud(cBuilder.createCloud(cloud.getAttributeValue("domain", defNS), 441 port, 442 cloud.getAttributeValue("path", defNS), 443 cloud.getAttributeValue("registerProcedure", defNS), 444 cloud.getAttributeValue("protocol", defNS))); 445 } 446 447 chnl.setLastUpdated(dateParsed); 448 449 452 return chnl; 453 } 454 455 } 456 | Popular Tags |