1 26 28 package de.nava.informa.parsers; 29 30 import java.net.URL ; 31 import java.util.Date ; 32 import java.util.Iterator ; 33 import java.util.List ; 34 35 import org.apache.commons.logging.Log; 36 import org.apache.commons.logging.LogFactory; 37 import org.jdom.Element; 38 import org.jdom.Namespace; 39 40 import de.nava.informa.core.ChannelBuilderIF; 41 import de.nava.informa.core.ChannelFormat; 42 import de.nava.informa.core.ChannelIF; 43 import de.nava.informa.core.ItemIF; 44 import de.nava.informa.core.ParseException; 45 import de.nava.informa.utils.ParserUtils; 46 47 54 class Atom_0_3_Parser { 55 56 private static Log logger = LogFactory.getLog(Atom_0_3_Parser.class); 57 58 static ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) 59 throws ParseException { 60 if (cBuilder == null) { 61 throw new RuntimeException ( 62 "Without builder no channel can " + "be created."); 63 } 64 Date dateParsed = new Date (); 65 Namespace defNS = ParserUtils.getDefaultNS(channel); 66 if (defNS == null) { 67 defNS = Namespace.NO_NAMESPACE; 68 logger.info("No default namespace found."); 69 } 70 Namespace dcNS = ParserUtils.getNamespace(channel, "dc"); 72 if (dcNS == null) { 73 logger.debug("No namespace for dublin core found"); 74 dcNS = defNS; 75 } 76 77 logger.debug("start parsing."); 78 79 String formatVersion = "0.3"; 81 if (channel.getAttribute("version") != null) { 82 formatVersion = channel.getAttribute("version").getValue().trim(); 83 logger.debug("Atom version " + formatVersion + " specified in document."); 84 } else { 85 logger.info("No format version specified, using default."); 86 } 87 88 90 ChannelIF chnl = 92 cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); 93 95 chnl.setFormat(ChannelFormat.ATOM_0_3); 96 97 String language = channel.getAttributeValue("lang",Namespace.XML_NAMESPACE); 99 if (language != null) { 100 chnl.setLanguage( language ); 101 } 102 103 if (channel.getChild("description") != null) { 105 chnl.setDescription(channel.getChildTextTrim("description", defNS)); 106 } else { 107 chnl.setDescription(channel.getChildTextTrim("tagline", defNS)); 109 } 110 111 Element ttl = channel.getChild("ttl", dcNS); 113 if (ttl != null) { 114 String ttlString = ttl.getTextTrim(); 115 if ( ttlString!= null) { 116 chnl.setTtl( Integer.parseInt(ttlString)); 117 } 118 } 119 120 Element modified = channel.getChild("modified", defNS); 122 if (modified != null) { 123 chnl.setPubDate(ParserUtils.getDate(modified.getTextTrim())); 124 } 125 126 133 134 Element author = channel.getChild("author", defNS); 136 if (author != null) { 137 chnl.setCreator(author.getChildTextTrim("name", defNS)); 138 } 139 140 Element generator = channel.getChild("generator", defNS); 142 if (generator != null) { 143 chnl.setGenerator(generator.getTextTrim()); 144 } 145 146 Element copyright = channel.getChild("copyright", defNS); 148 if (copyright != null) { 149 chnl.setCopyright(getValue(copyright)); 150 } 151 152 List links = channel.getChildren("link", defNS); 155 Iterator i = links.iterator(); 156 while (i.hasNext()) { 157 Element linkElement = (Element) i.next(); 158 String rel = linkElement.getAttributeValue("rel"); 160 String href = linkElement.getAttributeValue("href"); 161 if (rel != null && href != null && rel.equals("alternate")) { 162 URL linkURL = ParserUtils.getURL(href); 163 chnl.setSite(linkURL); 164 break; 165 } 166 167 } 169 170 List items = channel.getChildren("entry", defNS); 172 i = items.iterator(); 173 while (i.hasNext()) { 174 Element item = (Element) i.next(); 175 176 Element elTitle = item.getChild("title", defNS); 179 String strTitle = "<No Title>"; 180 if (elTitle != null) { 181 strTitle = getValue( elTitle ); 182 logger.debug("Parsing title "+elTitle.getTextTrim()+"->"+strTitle); 183 } 184 if (logger.isDebugEnabled()) { 185 logger.debug("Entry element found (" + strTitle + ")."); 186 } 187 Element elLink = item.getChild("link", defNS); 190 String strLink = ""; 191 if (elLink != null) { 192 strLink = elLink.getAttributeValue("href").trim(); 193 } 194 logger.info("url read : "+strLink+" ,"+elLink.getAttributeValue("rel")); 196 197 Element elDesc = item.getChild("summary", defNS); 199 String strDesc = ""; 200 if (elDesc != null) { 201 strDesc = getValue(elDesc); } 203 205 ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, 207 ParserUtils.getURL(strLink)); 208 curItem.setFound(dateParsed); 209 210 Element elIssued = item.getChild("issued", defNS); 212 curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim())); 213 214 Element elSubject = item.getChild("subject", dcNS); 216 if (elSubject != null) { 217 curItem.setSubject(elSubject.getTextTrim()); 219 } 220 } 221 222 chnl.setLastUpdated(dateParsed); 224 225 return chnl; 226 } 227 228 private static String getValue(Element elt) { 229 if (elt == null) 230 return ""; 231 String typeElt = elt.getAttributeValue("type"); 232 String modeElt = elt.getAttributeValue("mode"); 233 String value; 234 String unescapedValue = elt.getTextTrim(); 235 236 if( unescapedValue==null || unescapedValue.length()==0) { 237 java.util.List listContent = elt.getContent(); 239 Iterator iter = listContent.iterator(); 240 while ( iter.hasNext() ) { 241 org.jdom.Content content = (org.jdom.Content) iter.next(); 242 logger.debug("getValue : element read = "+ content); 243 if (content instanceof Element) { 244 unescapedValue = ((Element)content).getValue(); 245 logger.debug("elt found in type :"+unescapedValue); 246 } 247 } 248 } 249 value = unescapedValue; 250 if ("base64".equals(modeElt)) { 251 value = ParserUtils.decodeBase64(value); 252 } 253 if ("text/html".equals(typeElt) || "application/xhtml+xml".equals(typeElt)) { 254 value = ParserUtils.unEscape(value); 255 } 256 257 return value; 258 } 259 260 } 261 | Popular Tags |