|                                                                                                              1
 26
 28  package de.nava.informa.parsers;
 29
 30  import java.net.URL
  ; 31  import java.util.Date
  ; 32  import java.util.Iterator
  ; 33  import java.util.List
  ; 34
 35  import org.apache.commons.logging.Log;
 36  import org.apache.commons.logging.LogFactory;
 37  import org.jdom.Element;
 38  import org.jdom.Namespace;
 39
 40  import de.nava.informa.core.ChannelBuilderIF;
 41  import de.nava.informa.core.ChannelFormat;
 42  import de.nava.informa.core.ChannelIF;
 43  import de.nava.informa.core.ItemIF;
 44  import de.nava.informa.core.ParseException;
 45  import de.nava.informa.utils.ParserUtils;
 46
 47
 54  class Atom_0_3_Parser {
 55
 56    private static Log logger = LogFactory.getLog(Atom_0_3_Parser.class);
 57
 58    static ChannelIF parse(ChannelBuilderIF cBuilder, Element channel)
 59      throws ParseException {
 60      if (cBuilder == null) {
 61        throw new RuntimeException
  ( 62          "Without builder no channel can " + "be created.");
 63      }
 64      Date
  dateParsed = new Date  (); 65      Namespace defNS = ParserUtils.getDefaultNS(channel);
 66      if (defNS == null) {
 67        defNS = Namespace.NO_NAMESPACE;
 68        logger.info("No default namespace found.");
 69      }
 70          Namespace dcNS = ParserUtils.getNamespace(channel, "dc");
 72      if (dcNS == null) {
 73        logger.debug("No namespace for dublin core found");
 74        dcNS = defNS;
 75      }
 76
 77      logger.debug("start parsing.");
 78
 79          String
  formatVersion = "0.3"; 81      if (channel.getAttribute("version") != null) {
 82        formatVersion = channel.getAttribute("version").getValue().trim();
 83        logger.debug("Atom version " + formatVersion + " specified in document.");
 84      } else {
 85        logger.info("No format version specified, using default.");
 86      }
 87
 88
 90          ChannelIF chnl =
 92        cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));
 93
 95      chnl.setFormat(ChannelFormat.ATOM_0_3);
 96
 97          String
  language = channel.getAttributeValue("lang",Namespace.XML_NAMESPACE); 99      if (language != null) {
 100       chnl.setLanguage( language );
 101     }
 102
 103         if (channel.getChild("description") != null) {
 105       chnl.setDescription(channel.getChildTextTrim("description", defNS));
 106     } else {
 107             chnl.setDescription(channel.getChildTextTrim("tagline", defNS));
 109     }
 110
 111         Element ttl = channel.getChild("ttl", dcNS);
 113     if (ttl != null) {
 114       String
  ttlString =  ttl.getTextTrim(); 115       if ( ttlString!= null) {
 116         chnl.setTtl( Integer.parseInt(ttlString));
 117       }
 118     }
 119
 120         Element modified = channel.getChild("modified", defNS);
 122     if (modified != null) {
 123       chnl.setPubDate(ParserUtils.getDate(modified.getTextTrim()));
 124     }
 125
 126
 133
 134         Element author = channel.getChild("author", defNS);
 136     if (author != null) {
 137       chnl.setCreator(author.getChildTextTrim("name", defNS));
 138     }
 139
 140         Element generator = channel.getChild("generator", defNS);
 142     if (generator != null) {
 143       chnl.setGenerator(generator.getTextTrim());
 144     }
 145
 146         Element copyright = channel.getChild("copyright", defNS);
 148     if (copyright != null) {
 149       chnl.setCopyright(getValue(copyright));
 150     }
 151
 152             List
  links = channel.getChildren("link", defNS); 155     Iterator
  i = links.iterator(); 156     while (i.hasNext()) {
 157       Element linkElement = (Element) i.next();
 158             String
  rel = linkElement.getAttributeValue("rel"); 160       String
  href = linkElement.getAttributeValue("href"); 161       if (rel != null && href != null && rel.equals("alternate")) {
 162         URL
  linkURL = ParserUtils.getURL(href); 163         chnl.setSite(linkURL);
 164         break;
 165       }
 166
 167           }
 169
 170         List
  items = channel.getChildren("entry", defNS); 172     i = items.iterator();
 173     while (i.hasNext()) {
 174       Element item = (Element) i.next();
 175
 176                   Element elTitle = item.getChild("title", defNS);
 179       String
  strTitle = "<No Title>"; 180       if (elTitle != null) {
 181         strTitle = getValue( elTitle );
 182         logger.debug("Parsing title "+elTitle.getTextTrim()+"->"+strTitle);
 183       }
 184       if (logger.isDebugEnabled()) {
 185         logger.debug("Entry element found (" + strTitle + ").");
 186       }
 187                   Element elLink = item.getChild("link", defNS);
 190       String
  strLink = ""; 191       if (elLink != null) {
 192         strLink = elLink.getAttributeValue("href").trim();
 193       }
 194             logger.info("url read : "+strLink+" ,"+elLink.getAttributeValue("rel"));
 196
 197             Element elDesc = item.getChild("summary", defNS);
 199       String
  strDesc = ""; 200       if (elDesc != null) {
 201         strDesc = getValue(elDesc);       }
 203
 205             ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc,
 207                                            ParserUtils.getURL(strLink));
 208       curItem.setFound(dateParsed);
 209
 210             Element elIssued = item.getChild("issued", defNS);
 212       curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim()));
 213
 214             Element elSubject = item.getChild("subject", dcNS);
 216       if (elSubject != null) {
 217                 curItem.setSubject(elSubject.getTextTrim());
 219       }
 220     }
 221
 222         chnl.setLastUpdated(dateParsed);
 224
 225     return chnl;
 226   }
 227
 228   private static String
  getValue(Element elt) { 229     if (elt == null)
 230       return "";
 231     String
  typeElt = elt.getAttributeValue("type"); 232     String
  modeElt = elt.getAttributeValue("mode"); 233     String
  value; 234     String
  unescapedValue = elt.getTextTrim(); 235
 236     if( unescapedValue==null || unescapedValue.length()==0) {
 237             java.util.List
  listContent = elt.getContent(); 239       Iterator
  iter = listContent.iterator(); 240       while ( iter.hasNext() ) {
 241         org.jdom.Content content = (org.jdom.Content) iter.next();
 242         logger.debug("getValue : element read = "+ content);
 243         if (content instanceof Element) {
 244           unescapedValue = ((Element)content).getValue();
 245           logger.debug("elt found in type :"+unescapedValue);
 246         }
 247       }
 248     }
 249     value = unescapedValue;
 250     if ("base64".equals(modeElt)) {
 251       value = ParserUtils.decodeBase64(value);
 252     }
 253     if ("text/html".equals(typeElt) || "application/xhtml+xml".equals(typeElt)) {
 254       value = ParserUtils.unEscape(value);
 255     }
 256
 257     return value;
 258   }
 259
 260 }
 261
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |