1 26 27 29 package de.nava.informa.parsers; 30 31 import java.net.URL ; 32 import java.util.Date ; 33 import java.util.Iterator ; 34 import java.util.List ; 35 36 import org.apache.commons.logging.Log; 37 import org.apache.commons.logging.LogFactory; 38 39 import org.jdom.Attribute; 40 import org.jdom.Element; 41 import org.jdom.Namespace; 42 43 import de.nava.informa.core.ChannelBuilderIF; 44 import de.nava.informa.core.ChannelFormat; 45 import de.nava.informa.core.ChannelIF; 46 import de.nava.informa.core.ImageIF; 47 import de.nava.informa.core.ItemIF; 48 import de.nava.informa.core.ItemSourceIF; 49 import de.nava.informa.core.ParseException; 50 import de.nava.informa.core.TextInputIF; 51 import de.nava.informa.utils.ParserUtils; 52 53 59 class RSS_1_0_Parser { 60 61 private static Log logger = LogFactory.getLog(RSS_1_0_Parser.class); 62 63 static ChannelIF parse(ChannelBuilderIF cBuilder, 64 Element root) throws ParseException { 65 if (cBuilder == null) { 66 throw new RuntimeException ("Without builder no channel can " + 67 "be created."); 68 } 69 Date dateParsed = new Date (); 70 Namespace defNS = ParserUtils.getDefaultNS(root); 71 if (defNS == null) { 72 defNS = Namespace.NO_NAMESPACE; 73 logger.info("No default namespace found."); 74 } 75 76 Namespace dcNS = ParserUtils.getNamespace(root, "dc"); 78 if (dcNS == null) { 80 dcNS = defNS; 81 } 82 83 Namespace syNS = ParserUtils.getNamespace(root, "sy"); 85 86 Namespace agNS = ParserUtils.getNamespace(root, "ag"); 88 89 Namespace adminNS = ParserUtils.getNamespace(root, "admin"); 91 92 Namespace dctermsNS = ParserUtils.getNamespace(root, "dcterms"); 94 95 Namespace annotateNS = ParserUtils.getNamespace(root, "annotate"); 97 98 Namespace rss091NS = ParserUtils.getNamespace(root, "rss091"); 100 101 Element channel = root.getChild("channel", defNS); 103 if (channel == null) { 104 logger.warn("Channel element could not be retrieved from feed."); 105 throw new ParseException("No channel element found in feed."); 106 } 107 108 ChannelIF chnl = 111 cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); 112 113 chnl.setFormat(ChannelFormat.RSS_1_0); 115 116 chnl.setDescription(channel.getChildTextTrim("description", defNS)); 118 119 chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS))); 121 122 Element creator = channel.getChild("creator", dcNS); 124 if (creator == null) { 125 creator = channel.getChild("managingEditor", rss091NS); 126 } 127 if (creator != null) { 128 chnl.setCreator(creator.getTextTrim()); 129 } 130 131 String publisher = channel.getChildTextTrim("publisher", dcNS); 133 if (publisher == null) { 134 Element elErrorReportsTo = channel.getChild("errorReportsTo", adminNS); 135 if (elErrorReportsTo != null) { 136 publisher = elErrorReportsTo.getAttributeValue("resource", 137 ParserUtils.getNamespace(elErrorReportsTo, "rdf")); 138 } 139 } 140 if (publisher == null) { 141 publisher = channel.getChildTextTrim("webMaster", rss091NS); 142 } 143 chnl.setPublisher(publisher); 144 145 Element language = channel.getChild("language", dcNS); 147 if (language == null) { 148 language = channel.getChild("language", rss091NS); 149 } 150 if (language != null) { 151 chnl.setLanguage(language.getTextTrim()); 152 } 153 154 Element copyright = channel.getChild("rights", dcNS); 156 if (copyright == null) { 157 copyright = channel.getChild("copyright", rss091NS); 158 } 159 if (copyright != null) { 160 chnl.setCopyright(copyright.getTextTrim()); 161 } 162 163 Element rating = channel.getChild("rating", rss091NS); 165 if (rating != null) { 166 chnl.setRating(rating.getTextTrim()); 167 } 168 169 chnl.setDocs(defNS.getURI()); 172 173 Element pubDate = channel.getChild("date", dcNS); 175 if (pubDate == null) { 176 pubDate = channel.getChild("issued", dctermsNS); 177 } 178 if (pubDate == null) { 179 pubDate = channel.getChild("pubdate", rss091NS); 180 } 181 if (pubDate != null) { 182 chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim())); 183 } 184 185 Element lastBuildDate = channel.getChild("lastBuildDate"); 187 if (lastBuildDate == null) { 188 lastBuildDate = channel.getChild("modified", dctermsNS); 189 } 190 if (lastBuildDate == null) { 191 lastBuildDate = channel.getChild("lastBuildDate", rss091NS); 192 } 193 if (lastBuildDate != null) { 194 chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim())); 195 } 196 197 199 Element elGenerator = channel.getChild("generatorAgent", adminNS); 201 if (elGenerator != null) { 202 Attribute generator = elGenerator.getAttribute("resource", ParserUtils.getNamespace(elGenerator, "rdf")); 203 if (generator != null) { 204 chnl.setGenerator(generator.getValue()); 205 } 206 } 207 208 210 Element updatePeriod = channel.getChild("updatePeriod", syNS); 212 if (updatePeriod != null) { 213 chnl.setUpdatePeriod(updatePeriod.getTextTrim()); 214 } 215 216 Element updateFrequency = channel.getChild("updateFrequency", syNS); 218 if (updateFrequency != null) { 219 chnl.setUpdateFrequency((new Integer (updateFrequency.getTextTrim())).intValue()); 220 } 221 222 Element updateBase = channel.getChild("updateBase", syNS); 224 if (updateBase != null) { 225 chnl.setUpdateBase(ParserUtils.getDate(updateBase.getTextTrim())); 226 } 227 228 if ((updatePeriod != null) && updateFrequency != null) { 229 int ttl = getTtl(chnl.getUpdatePeriod(), chnl.getUpdateFrequency()); 230 chnl.setTtl(ttl); 231 } 232 233 List items = root.getChildren("item", defNS); 235 Iterator i = items.iterator(); 236 while (i.hasNext()) { 237 Element item = (Element) i.next(); 238 239 Element elTitle = item.getChild("title", defNS); 241 String strTitle = "<No Title>"; 242 if (elTitle != null) { 243 strTitle = elTitle.getTextTrim(); 244 } 245 if (logger.isDebugEnabled()) { 246 logger.debug("Item element found (" + strTitle + ")."); 247 } 248 249 Element elLink = item.getChild("link", defNS); 251 String strLink = ""; 252 if (elLink != null) { 253 strLink = elLink.getTextTrim(); 254 } 255 256 Element elDesc = item.getChild("description", defNS); 258 if (elDesc == null) { 259 elDesc = item.getChild("description", dcNS); 260 } 261 String strDesc = ""; 262 if (elDesc != null) { 263 strDesc = elDesc.getTextTrim(); 264 } 265 266 ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, 268 ParserUtils.getURL(strLink)); 269 rssItem.setFound(dateParsed); 270 271 Element elCreator = item.getChild("creator", dcNS); 273 if (elCreator != null) { 274 rssItem.setCreator(elCreator.getTextTrim()); 275 } 276 277 Element elSubject = item.getChild("subject", dcNS); 279 if (elSubject != null) { 280 rssItem.setSubject(elSubject.getTextTrim()); 282 } 283 284 Element elDate = item.getChild("date", dcNS); 286 if (elDate != null) { 287 rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim())); 288 } 289 290 String sourceName = null; 292 String sourceLocation = null; 293 Date sourceTimestamp = null; 294 295 Element elSourceURL = item.getChild("sourceURL", agNS); 296 if (elSourceURL == null) { elSourceURL = item.getChild("source", dcNS); 298 if (elSourceURL != null) { 299 sourceLocation = elSourceURL.getTextTrim(); 300 sourceName = "Source"; 301 } 302 } else { sourceLocation = elSourceURL.getTextTrim(); 304 Element elSourceName = item.getChild("source", agNS); 305 if (elSourceName != null) { 306 sourceName = elSourceName.getTextTrim(); 307 } 308 Element elSourceTimestamp = item.getChild("timestamp", agNS); 309 if (elSourceTimestamp != null) { 310 sourceTimestamp = ParserUtils.getDate(elSourceTimestamp.getTextTrim()); 311 } 312 } 313 314 if (sourceLocation != null) { 315 ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, sourceTimestamp); 316 rssItem.setSource(itemSource); 317 } 318 319 Element elReference = item.getChild("reference", annotateNS); 321 if (elReference != null) { 322 Attribute resource = elReference.getAttribute("resource", ParserUtils.getNamespace(elReference, "rdf")); 323 if (resource != null) { 324 URL resourceURL = ParserUtils.getURL(resource.getValue()); 325 if (resourceURL != null) { 326 rssItem.setComments(resourceURL); 327 } 328 } 329 } 330 331 } 332 333 Element image = root.getChild("image", defNS); 335 if (image != null) { 336 ImageIF rssImage = 337 cBuilder.createImage(image.getChildTextTrim("title", defNS), 338 ParserUtils.getURL(image.getChildTextTrim("url", defNS)), 339 ParserUtils.getURL(image.getChildTextTrim("link", defNS))); 340 Element imgWidth = image.getChild("width", defNS); 341 if (imgWidth != null) { 342 try { 343 rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); 344 } catch (NumberFormatException e) { 345 logger.warn(e); 346 } 347 } 348 Element imgHeight = image.getChild("height", defNS); 349 if (imgHeight != null) { 350 try { 351 rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); 352 } catch (NumberFormatException e) { 353 logger.warn(e); 354 } 355 } 356 Element imgDescr = image.getChild("description", defNS); 357 if (imgDescr != null) { 358 rssImage.setDescription(imgDescr.getTextTrim()); 359 } 360 chnl.setImage(rssImage); 361 } 362 363 Element txtinp = root.getChild("textinput", defNS); 365 if (txtinp != null) { 366 String tiTitle = null; 367 if (txtinp.getChild("title", defNS) != null) { 368 tiTitle = txtinp.getChild("title", defNS).getTextTrim(); 369 } 370 String tiDescr = null; 371 if (txtinp.getChild("description", defNS) != null) { 372 tiDescr = txtinp.getChild("description", defNS).getTextTrim(); 373 } 374 String tiName = null; 375 if (txtinp.getChild("name", defNS) != null) { 376 tiName = txtinp.getChild("name", defNS).getTextTrim(); 377 } 378 URL tiLink = null; 379 if (txtinp.getChild("link", defNS) != null) { 380 tiLink = ParserUtils.getURL(txtinp.getChild("link", defNS).getTextTrim()); 381 } 382 TextInputIF rssTextInput = 383 cBuilder.createTextInput(tiTitle, tiDescr, tiName, tiLink); 384 chnl.setTextInput(rssTextInput); 385 } 386 387 chnl.setLastUpdated(dateParsed); 388 389 return chnl; 390 } 391 392 398 private static int getTtl(final String updatePeriod, int updateFrequency) { 399 int minutes = -1; 400 if (updatePeriod.equals(ChannelIF.UPDATE_HOURLY)) { 401 minutes = 60; 402 } else if (updatePeriod.equals(ChannelIF.UPDATE_DAILY)) { 403 minutes = 24 * 60; 404 } else if (updatePeriod.equals(ChannelIF.UPDATE_WEEKLY)) { 405 minutes = 168 * 60; 406 } else if (updatePeriod.equals(ChannelIF.UPDATE_MONTHLY)) { 407 minutes = 30 * 24 * 60; 408 } else if (updatePeriod.equals(ChannelIF.UPDATE_YEARLY)) { 409 minutes = 365 * 24 * 60; 410 } else { 411 minutes = 24 * 60; 412 } 413 return (minutes / updateFrequency); 414 } 415 } 416 | Popular Tags |