KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > nava > informa > utils > ParserUtils


1 //
2
// // Informa -- RSS Library for Java
3
// Copyright (c) 2002 by Niko Schmuck
4
//
5
// Niko Schmuck
6
// http://sourceforge.net/projects/informa
7
// mailto:niko_schmuck@users.sourceforge.net
8
//
9
// This library is free software.
10
//
11
// You may redistribute it and/or modify it under the terms of the GNU
12
// Lesser General Public License as published by the Free Software Foundation.
13
//
14
// Version 2.1 of the license should be included with this distribution in
15
// the file LICENSE. If the license is not included with this distribution,
16
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
17
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
18
// MA 02139 USA.
19
//
20
// This library is distributed in the hope that it will be useful,
21
// but WITHOUT ANY WARRANTY; without even the implied waranty of
22
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23
// Lesser General Public License for more details.
24
//
25

26 // $Id: ParserUtils.java,v 1.12 2004/06/28 19:33:44 niko_schmuck Exp $
27

28 package de.nava.informa.utils;
29
30 import java.net.URL JavaDoc;
31 import java.text.SimpleDateFormat JavaDoc;
32 import java.util.Calendar JavaDoc;
33 import java.util.Date JavaDoc;
34 import java.util.Locale JavaDoc;
35 import java.util.TimeZone JavaDoc;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.jdom.Element;
40 import org.jdom.Namespace;
41
42 /**
43  * Utility class providing convenience methods to (XML) parsing mechanisms.
44  *
45  * @author Niko Schmuck (niko@nava.de)
46  */

47 public final class ParserUtils {
48
49   private static Log logger = LogFactory.getLog(ParserUtils.class);
50
51   private ParserUtils() {
52   }
53
54   public static URL JavaDoc getURL(String JavaDoc toURL) {
55     URL JavaDoc result = null;
56     try {
57       if ((toURL != null) && (toURL.trim().length() > 0))
58           result = new URL JavaDoc(toURL);
59     } catch (java.net.MalformedURLException JavaDoc e) {
60       logger.warn("Invalid URL " + toURL + " given.");
61     }
62     return result;
63   }
64
65   public static Namespace getDefaultNS(Element element) {
66     return getNamespace(element, "");
67   }
68
69   public static Namespace getNamespace(Element element, String JavaDoc prefix) {
70     // Namespace ns = null;
71
// Iterator it = element.getAdditionalNamespaces().iterator();
72
// while (it.hasNext()) {
73
// Namespace curNS = (Namespace) it.next();
74
// if (curNS.getPrefix().equals(prefix)) {
75
// ns = curNS;
76
// break;
77
// }
78
// }
79
Namespace ns = (prefix == null) ? element.getNamespace("") : element
80         .getNamespace(prefix);
81     return ns;
82   }
83
84   private static SimpleDateFormat JavaDoc[] dateFormats = null;
85
86   static {
87     final String JavaDoc[] possibleDateFormats = { "EEE, dd MMM yyyy HH:mm:ss z", //RFC_822
88
"yyyy-MM-dd'T'HH:mm:ssZ", "yyyy-MM-dd'T'HH:mm:sszzzz",
89         "yyyy-MM-dd'T'HH:mm:ss z", "yyyy-MM-dd'T'HH:mm:ssz", //ISO_8601
90
"yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd'T'HHmmss.SSSz",
91
92         "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"};
93
94     dateFormats = new SimpleDateFormat JavaDoc[possibleDateFormats.length];
95     TimeZone JavaDoc gmtTZ = TimeZone.getTimeZone("GMT");
96     for (int i = 0; i < possibleDateFormats.length; i++) {
97       dateFormats[i] = new SimpleDateFormat JavaDoc(possibleDateFormats[i],
98           Locale.ENGLISH);
99       dateFormats[i].setTimeZone(gmtTZ);
100     }
101
102   }
103
104   // Mon, 07 Oct 2002 03:16:15 GMT
105
private static SimpleDateFormat JavaDoc dfA = new SimpleDateFormat JavaDoc(
106       "EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH);
107
108   // 2002-09-19T02:51:16+0200
109
private static SimpleDateFormat JavaDoc dfB = new SimpleDateFormat JavaDoc(
110       "yyyy-MM-dd'T'HH:mm:ssZ");
111
112   // 2002-09-19T02:51:16
113
private static SimpleDateFormat JavaDoc dfC = new SimpleDateFormat JavaDoc(
114       "yyyy-MM-dd'T'HH:mm:ss");
115
116   // 2002-09-19
117
private static SimpleDateFormat JavaDoc dfD = new SimpleDateFormat JavaDoc("yyyy-MM-dd");
118
119   public static Date JavaDoc getDate(String JavaDoc strdate) {
120     Date JavaDoc result = null;
121     strdate = strdate.trim();
122     if (strdate.length() > 10) {
123
124       // TODO deal with +4:00 (no zero before hour)
125
if ((strdate.substring(strdate.length() - 5).indexOf("+") == 0 || strdate
126           .substring(strdate.length() - 5).indexOf("-") == 0)
127           && strdate.substring(strdate.length() - 5).indexOf(":") == 2) {
128
129         String JavaDoc sign = strdate.substring(strdate.length() - 5,
130             strdate.length() - 4);
131
132         strdate = strdate.substring(0, strdate.length() - 5) + sign + "0"
133             + strdate.substring(strdate.length() - 4);
134         // logger.debug("CASE1 : new date " + strdate + " ? "
135
// + strdate.substring(0, strdate.length() - 5));
136

137       }
138
139       String JavaDoc dateEnd = strdate.substring(strdate.length() - 6);
140
141       // try to deal with -05:00 or +02:00 at end of date
142
// replace with -0500 or +0200
143
if ((dateEnd.indexOf("-") == 0 || dateEnd.indexOf("+") == 0)
144           && dateEnd.indexOf(":") == 3) {
145         // TODO deal with GMT-00:03
146
if ("GMT".equals(strdate.substring(strdate.length() - 9, strdate
147             .length() - 6))) {
148           logger.debug("General time zone with offset, no change ");
149         } else {
150           // continue treatment
151
String JavaDoc oldDate = strdate;
152           String JavaDoc newEnd = dateEnd.substring(0, 3) + dateEnd.substring(4);
153           strdate = oldDate.substring(0, oldDate.length() - 6) + newEnd;
154           // logger.debug("!!modifying string ->"+strdate);
155
}
156       }
157     }
158     int i = 0;
159     while (i < dateFormats.length) {
160       try {
161         result = dateFormats[i].parse(strdate);
162         // logger.debug("******Parsing Success "+strdate+"->"+result+" with
163
// "+dateFormats[i].toPattern());
164
break;
165       } catch (java.text.ParseException JavaDoc eA) {
166         logger.debug("parsing " + strdate + " ["
167             + dateFormats[i].toPattern() + "] without success, trying again.");
168         i++;
169       }
170     }
171
172     return result;
173   }
174
175   /**
176    * Tries different date formats to parse against the given string
177    * representation to retrieve a valid Date object.
178    */

179   public static Date JavaDoc getDateOLD(String JavaDoc strdate) {
180     Date JavaDoc result = null;
181
182     try {
183       result = dfA.parse(strdate);
184     } catch (java.text.ParseException JavaDoc eA) {
185       logger.warn("Error parsing date (A): " + eA.getMessage());
186       try {
187         result = dfB.parse(strdate);
188       } catch (java.text.ParseException JavaDoc eB) {
189         logger.warn("Error parsing date (B): " + eB.getMessage());
190         try {
191           result = dfC.parse(strdate);
192           // try to retrieve the timezone anyway
193
result = extractTimeZone(strdate, result);
194         } catch (java.text.ParseException JavaDoc eC) {
195           logger.warn("Error parsing date (C): " + eC.getMessage());
196           try {
197             result = dfD.parse(strdate);
198           } catch (java.text.ParseException JavaDoc eD) {
199             logger.warn("Error parsing date (D): " + eD.getMessage());
200             eD.printStackTrace();
201           }
202         }
203       }
204     }
205     if (logger.isDebugEnabled()) {
206       logger.debug("Parsing date '" + strdate + "' resulted in: " + result);
207     }
208     if (result == null) {
209       logger.warn("No appropiate date could be extracted from " + strdate);
210
211     }
212     return result;
213   }
214
215   private static Date JavaDoc extractTimeZone(String JavaDoc strdate, Date JavaDoc thedate) {
216     // try to extract -06:00
217
String JavaDoc tzSign = strdate.substring(strdate.length() - 6,
218         strdate.length() - 5);
219     String JavaDoc tzHour = strdate.substring(strdate.length() - 5,
220         strdate.length() - 3);
221     String JavaDoc tzMin = strdate.substring(strdate.length() - 2);
222     if (tzSign.equals("-") || tzSign.equals("+")) {
223       int h = Integer.parseInt(tzHour);
224       int m = Integer.parseInt(tzMin);
225       // NOTE: this is really plus, since perspective is from GMT
226
if (tzSign.equals("+")) {
227         h = -1 * h;
228         m = -1 * m;
229       }
230       Calendar JavaDoc cal = Calendar.getInstance();
231       cal.setTime(thedate);
232       cal.add(Calendar.HOUR_OF_DAY, h);
233       cal.add(Calendar.MINUTE, m);
234       // calculate according the used timezone
235
cal.add(Calendar.MILLISECOND, localTimeDiff(cal.getTimeZone(), thedate));
236       thedate = cal.getTime();
237     }
238     return thedate;
239   }
240
241   private static int localTimeDiff(TimeZone JavaDoc tz, Date JavaDoc date) {
242     if (tz.inDaylightTime(date)) {
243       int dstSavings = 0;
244       if (tz.useDaylightTime()) {
245         dstSavings = 3600000; // shortcut, JDK 1.4 allows cleaner impl
246
}
247       return tz.getRawOffset() + dstSavings;
248     }
249     return tz.getRawOffset();
250   }
251
252   public static String JavaDoc formatDate(Date JavaDoc aDate) {
253     return dfA.format(aDate);
254   }
255
256   public static String JavaDoc decodeBase64(String JavaDoc s) {
257     //use private class
258
return Base64Decoder.decode(s);
259   }
260
261   public static String JavaDoc unEscape(String JavaDoc s) {
262     String JavaDoc value = s;
263     value = value.replaceAll("&lt;", "<");
264     value = value.replaceAll("&gt;", ">");
265     value = value.replaceAll("&amp;", "&");
266     value = value.replaceAll("&quot;", "\"");
267     value = value.replaceAll("&apos;", "'");
268     return value;
269   }
270
271 }
Popular Tags