KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > nava > informa > parsers > Atom_0_3_Parser


1 //
2
// Informa -- RSS Library for Java
3
// Copyright (c) 2002 by Niko Schmuck
4
//
5
// Niko Schmuck
6
// http://sourceforge.net/projects/informa
7
// mailto:niko_schmuck@users.sourceforge.net
8
//
9
// This library is free software.
10
//
11
// You may redistribute it and/or modify it under the terms of the GNU
12
// Lesser General Public License as published by the Free Software Foundation.
13
//
14
// Version 2.1 of the license should be included with this distribution in
15
// the file LICENSE. If the license is not included with this distribution,
16
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
17
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
18
// MA 02139 USA.
19
//
20
// This library is distributed in the hope that it will be useful,
21
// but WITHOUT ANY WARRANTY; without even the implied waranty of
22
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23
// Lesser General Public License for more details.
24
//
25

26 // $Id: Atom_0_3_Parser.java,v 1.6 2004/07/29 12:23:22 pitosalas Exp $
27

28 package de.nava.informa.parsers;
29
30 import java.net.URL JavaDoc;
31 import java.util.Date JavaDoc;
32 import java.util.Iterator JavaDoc;
33 import java.util.List JavaDoc;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.jdom.Element;
38 import org.jdom.Namespace;
39
40 import de.nava.informa.core.ChannelBuilderIF;
41 import de.nava.informa.core.ChannelFormat;
42 import de.nava.informa.core.ChannelIF;
43 import de.nava.informa.core.ItemIF;
44 import de.nava.informa.core.ParseException;
45 import de.nava.informa.utils.ParserUtils;
46
47 /**
48  * Parser which reads in document instances according to the Atom 0.3
49  * specification and generates a news channel object. Currently the
50  * support for the atom syntax is not complete.
51  *
52  * @author Niko Schmuck
53  */

54 class Atom_0_3_Parser {
55
56   private static Log logger = LogFactory.getLog(Atom_0_3_Parser.class);
57
58   static ChannelIF parse(ChannelBuilderIF cBuilder, Element channel)
59     throws ParseException {
60     if (cBuilder == null) {
61       throw new RuntimeException JavaDoc(
62         "Without builder no channel can " + "be created.");
63     }
64     Date JavaDoc dateParsed = new Date JavaDoc();
65     Namespace defNS = ParserUtils.getDefaultNS(channel);
66     if (defNS == null) {
67       defNS = Namespace.NO_NAMESPACE;
68       logger.info("No default namespace found.");
69     }
70     // RSS 1.0 Dublin Core Module namespace
71
Namespace dcNS = ParserUtils.getNamespace(channel, "dc");
72     if (dcNS == null) {
73       logger.debug("No namespace for dublin core found");
74       dcNS = defNS;
75     }
76     
77     logger.debug("start parsing.");
78
79     // get version attribute
80
String JavaDoc formatVersion = "0.3";
81     if (channel.getAttribute("version") != null) {
82       formatVersion = channel.getAttribute("version").getValue().trim();
83       logger.debug("Atom version " + formatVersion + " specified in document.");
84     } else {
85       logger.info("No format version specified, using default.");
86     }
87
88     // --- read in channel information
89

90     // title element
91
ChannelIF chnl =
92       cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));
93     // TODO: support attributes: type, mode
94

95     chnl.setFormat(ChannelFormat.ATOM_0_3);
96
97     // language
98
String JavaDoc language = channel.getAttributeValue("lang",Namespace.XML_NAMESPACE);
99     if (language != null) {
100       chnl.setLanguage( language );
101     }
102         
103     // description element
104
if (channel.getChild("description") != null) {
105       chnl.setDescription(channel.getChildTextTrim("description", defNS));
106     } else {
107       // fallback
108
chnl.setDescription(channel.getChildTextTrim("tagline", defNS));
109     }
110
111     // ttl in dc namespace
112
Element ttl = channel.getChild("ttl", dcNS);
113     if (ttl != null) {
114       String JavaDoc ttlString = ttl.getTextTrim();
115       if ( ttlString!= null) {
116         chnl.setTtl( Integer.parseInt(ttlString));
117       }
118     }
119     
120     // lastbuild element : modified ?
121
Element modified = channel.getChild("modified", defNS);
122     if (modified != null) {
123       chnl.setPubDate(ParserUtils.getDate(modified.getTextTrim()));
124     }
125   
126     // TODO : issued value
127
/*
128     if (modified != null) {
129       modified = channel.getChild("issued", defNS);
130       chnl.setLastBuildDate (ParserUtils.getDate(modified.getTextTrim()));
131     }
132     */

133     
134     // author element
135
Element author = channel.getChild("author", defNS);
136     if (author != null) {
137       chnl.setCreator(author.getChildTextTrim("name", defNS));
138     }
139
140     // generator element
141
Element generator = channel.getChild("generator", defNS);
142     if (generator != null) {
143       chnl.setGenerator(generator.getTextTrim());
144     }
145
146     // copyright element
147
Element copyright = channel.getChild("copyright", defNS);
148     if (copyright != null) {
149       chnl.setCopyright(getValue(copyright));
150     }
151     
152     // n link elements
153
// TODO : type attribut of link (text, application...)
154
List JavaDoc links = channel.getChildren("link", defNS);
155     Iterator JavaDoc i = links.iterator();
156     while (i.hasNext()) {
157       Element linkElement = (Element) i.next();
158       // use first 'alternate' link
159
String JavaDoc rel = linkElement.getAttributeValue("rel");
160       String JavaDoc href = linkElement.getAttributeValue("href");
161       if (rel != null && href != null && rel.equals("alternate")) {
162         URL JavaDoc linkURL = ParserUtils.getURL(href);
163         chnl.setSite(linkURL);
164         break;
165       }
166       
167       // TODO: further extraction of link information
168
}
169
170     // 1..n entry elements
171
List JavaDoc items = channel.getChildren("entry", defNS);
172     i = items.iterator();
173     while (i.hasNext()) {
174       Element item = (Element) i.next();
175
176       // get title element
177
// TODO : deal with type attribut
178
Element elTitle = item.getChild("title", defNS);
179       String JavaDoc strTitle = "<No Title>";
180       if (elTitle != null) {
181         strTitle = getValue( elTitle );
182         logger.debug("Parsing title "+elTitle.getTextTrim()+"->"+strTitle);
183       }
184       if (logger.isDebugEnabled()) {
185         logger.debug("Entry element found (" + strTitle + ").");
186       }
187       // TODO : deal with multiple links
188
// get link element
189
Element elLink = item.getChild("link", defNS);
190       String JavaDoc strLink = "";
191       if (elLink != null) {
192         strLink = elLink.getAttributeValue("href").trim();
193       }
194       // TODO dealing with elLink.getAttributeValue("rel")
195
logger.info("url read : "+strLink+" ,"+elLink.getAttributeValue("rel"));
196       
197       // get description element
198
Element elDesc = item.getChild("summary", defNS);
199       String JavaDoc strDesc = "";
200       if (elDesc != null) {
201         strDesc = getValue(elDesc); //.getTextTrim();
202
}
203       // TODO: where to store the content?
204

205       // generate new news item (link to article)
206
ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc,
207                                            ParserUtils.getURL(strLink));
208       curItem.setFound(dateParsed);
209
210       // get issued element (required)
211
Element elIssued = item.getChild("issued", defNS);
212       curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim()));
213
214       // get subject element
215
Element elSubject = item.getChild("subject", dcNS);
216       if (elSubject != null) {
217         // TODO: Mulitple subject elements not handled currently
218
curItem.setSubject(elSubject.getTextTrim());
219       }
220     }
221
222     // set to current date
223
chnl.setLastUpdated(dateParsed);
224
225     return chnl;
226   }
227
228   private static String JavaDoc getValue(Element elt) {
229     if (elt == null)
230       return "";
231     String JavaDoc typeElt = elt.getAttributeValue("type");
232     String JavaDoc modeElt = elt.getAttributeValue("mode");
233     String JavaDoc value;
234     String JavaDoc unescapedValue = elt.getTextTrim();
235     
236     if( unescapedValue==null || unescapedValue.length()==0) {
237       // maybe value in a div ?
238
java.util.List JavaDoc listContent = elt.getContent();
239       Iterator JavaDoc iter = listContent.iterator();
240       while ( iter.hasNext() ) {
241         org.jdom.Content content = (org.jdom.Content) iter.next();
242         logger.debug("getValue : element read = "+ content);
243         if (content instanceof Element) {
244           unescapedValue = ((Element)content).getValue();
245           logger.debug("elt found in type :"+unescapedValue);
246         }
247       }
248     }
249     value = unescapedValue;
250     if ("base64".equals(modeElt)) {
251       value = ParserUtils.decodeBase64(value);
252     }
253     if ("text/html".equals(typeElt) || "application/xhtml+xml".equals(typeElt)) {
254       value = ParserUtils.unEscape(value);
255     }
256   
257     return value;
258   }
259
260 }
261
Popular Tags