KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > quercus > lib > i18n > UnicodeModule


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  *
23  * Free Software Foundation, Inc.
24  * 59 Temple Place, Suite 330
25  * Boston, MA 02111-1307 USA
26  *
27  * @author Scott Ferguson
28  */

29
30 package com.caucho.quercus.lib.i18n;
31
32 import com.caucho.quercus.UnimplementedException;
33 import com.caucho.quercus.annotation.Optional;
34 import com.caucho.quercus.env.*;
35 import com.caucho.quercus.module.AbstractQuercusModule;
36 import com.caucho.util.L10N;
37
38 import javax.mail.Header JavaDoc;
39 import javax.mail.MessagingException JavaDoc;
40 import javax.mail.internet.InternetHeaders JavaDoc;
41 import java.io.UnsupportedEncodingException JavaDoc;
42 import java.util.Enumeration JavaDoc;
43 import java.util.HashMap JavaDoc;
44 import java.util.Map JavaDoc;
45 import java.util.logging.Level JavaDoc;
46 import java.util.logging.Logger JavaDoc;
47 /**
48  * Unicode handling. Also includes iconv, etc.
49  */

50 public class UnicodeModule extends AbstractQuercusModule {
51   private static final HashMap JavaDoc<String JavaDoc,StringValue> _iniMap
52     = new HashMap JavaDoc<String JavaDoc,StringValue>();
53
54   private static final Logger JavaDoc log =
55                        Logger.getLogger(UnicodeModule.class.getName());
56   private static final L10N L = new L10N(UnicodeModule.class);
57
58   public static final int U_INVALID_STOP = 0;
59   public static final int U_INVALID_SKIP = 1;
60   public static final int U_INVALID_SUBSTITUTE = 2;
61   public static final int U_INVALID_ESCAPE = 3;
62
63   public static final String JavaDoc ICONV_IMPL = "QuercusIconv";
64   public static final String JavaDoc ICONV_VERSION = "1.0";
65   
66   public static final int ICONV_MIME_DECODE_STRICT = 1;
67   public static final int ICONV_MIME_DECODE_CONTINUE_ON_ERROR = 2;
68
69   /**
70    * Returns the default quercus.ini values.
71    */

72   public Map JavaDoc<String JavaDoc,StringValue> getDefaultIni()
73   {
74     return _iniMap;
75   }
76
77   public static BooleanValue unicode_semantics(Env env)
78   {
79     return BooleanValue.TRUE;
80   }
81
82   public static UnicodeValue unicode_decode(Env env,
83                               BinaryValue str,
84                               String JavaDoc encoding)
85   {
86     return str.toUnicodeValue(env, encoding);
87   }
88
89   public static BinaryValue unicode_encode(Env env,
90                               UnicodeValue str,
91                               String JavaDoc encoding)
92   {
93     return str.toBinaryValue(env, encoding);
94   }
95
96   /**
97    * Returns the current encoding.
98    *
99    * @param env
100    * @param type setting to return
101    * @return iconv environment settings
102    */

103   public static Value iconv_get_encoding(Env env,
104                        @Optional("all") String JavaDoc type)
105   {
106     type = type.toLowerCase();
107
108     if ("all".equals(type)) {
109       ArrayValue array = new ArrayValueImpl();
110       array.put("input_encoding",
111           env.getIniString("iconv.input_encoding"));
112       array.put("output_encoding",
113           env.getIniString("iconv.output_encoding"));
114       array.put("internal_encoding",
115           env.getIniString("iconv.internal_encoding"));
116       return array;
117     }
118
119     if ("input_encoding".equals(type))
120       return new StringValueImpl(env.getIniString("iconv.input_encoding"));
121     else if ("output_encoding".equals(type))
122       return new StringValueImpl(env.getIniString("iconv.output_encoding"));
123     else if ("internal_encoding".equals(type))
124       return new StringValueImpl(env.getIniString("iconv.internal_encoding"));
125
126     return BooleanValue.FALSE;
127   }
128
129   /**
130    * Decodes all the headers and place them in an array.
131    * Use iconv.internal_encoding.
132    * XXX: mode, line-length, line-break-chars
133    *
134    * @param env
135    * @param encoded_headers
136    * @param mode controls error recovery
137    * @param charset
138    */

139   public static Value iconv_mime_decode_headers(Env env,
140                        StringValue encoded_headers,
141                        @Optional() int mode,
142                        @Optional() String JavaDoc charset)
143   {
144     if (charset.length() == 0)
145       charset = env.getIniString("iconv.internal_encoding");
146
147     try {
148       return decodeMimeHeaders(env, encoded_headers, charset);
149     }
150     catch (UnsupportedEncodingException JavaDoc e) {
151       log.log(Level.FINE, e.getMessage(), e);
152       env.warning(L.l(e.getMessage()));
153     }
154     catch (MessagingException JavaDoc e) {
155       log.log(Level.FINE, e.getMessage(), e);
156       env.warning(L.l(e.getMessage()));
157     }
158
159     return BooleanValue.FALSE;
160   }
161
162   private static Value decodeMimeHeaders(Env env,
163                               StringValue encodedHeaders,
164                               String JavaDoc charset)
165     throws MessagingException JavaDoc, UnsupportedEncodingException JavaDoc
166   {
167     ArrayValue headers = new ArrayValueImpl();
168
169     Enumeration JavaDoc<Header JavaDoc> enumeration
170         = new InternetHeaders JavaDoc(encodedHeaders.toInputStream()).getAllHeaders();
171
172     while (enumeration.hasMoreElements()) {
173       Header JavaDoc header = enumeration.nextElement();
174
175       StringValue name
176           = IconvUtility.decodeMime(env, header.getName(), charset);
177       StringValue val
178           = IconvUtility.decodeMime(env, header.getValue(), charset);
179
180       Value headerName;
181       if ((headerName = headers.containsKey(name)) == null) {
182         headers.put(name, val);
183         continue;
184       }
185
186       ArrayValue inner;
187       if (headerName.isArray()) {
188         inner = headerName.toArrayValue(env);
189       }
190       else {
191         inner = new ArrayValueImpl();
192         inner.put(headerName);
193       }
194
195       inner.put(val);
196       headers.put(name, inner);
197     }
198
199     return headers;
200   }
201
202   /**
203    * Uses iconv.internal_encoding.
204    *
205    * XXX: mode ignored
206    *
207    * @param env
208    * @param encoded_header
209    * @param mode controls error recovery
210    * @param charset to encode resultant
211    */

212   public static Value iconv_mime_decode(Env env,
213                               StringValue encoded_header,
214                               @Optional("1") int mode,
215                               @Optional("") String JavaDoc charset)
216   {
217     if (charset.length() == 0)
218       charset = env.getIniString("iconv.internal_encoding");
219
220     try {
221       Enumeration JavaDoc<String JavaDoc> enumeration = new InternetHeaders JavaDoc(
222           encoded_header.toInputStream()).getAllHeaderLines();
223
224       if (! enumeration.hasMoreElements()) {
225         env.warning(L.l("Error parsing header."));
226         return BooleanValue.FALSE;
227       }
228
229       return IconvUtility.decodeMime(env, enumeration.nextElement(), charset);
230
231     }
232     catch (MessagingException JavaDoc e) {
233       log.log(Level.FINE, e.getMessage(), e);
234       env.warning(L.l(e.getMessage()));
235     }
236     catch (UnsupportedEncodingException JavaDoc e) {
237       log.log(Level.FINE, e.getMessage(), e);
238       env.warning(L.l(e.getMessage()));
239     }
240
241     return BooleanValue.FALSE;
242   }
243
244   /**
245    * Encodes a MIME header.
246    *
247    * XXX: preferences
248    *
249    * @param field_name header field name
250    * @param field_value header field value
251    * @param preferences
252    */

253   public static Value iconv_mime_encode(Env env,
254                               StringValue field_name,
255                               StringValue field_value,
256                               @Optional() ArrayValue preferences)
257   {
258     try {
259       String JavaDoc scheme = "B";
260       String JavaDoc lineBreakChars = "\r\n";
261       String JavaDoc inCharset = env.getIniString("iconv.internal_encoding");
262       String JavaDoc outCharset = inCharset;
263       int lineLength = 76;
264
265       if (preferences != null) {
266         Value tmp = new StringValueImpl("scheme");
267         if ((tmp = preferences.get(tmp)).isset())
268           scheme = tmp.toString();
269
270         tmp = new StringValueImpl("line-break-chars");
271         if ((tmp = preferences.get(tmp)).isset())
272           lineBreakChars = tmp.toString();
273
274         tmp = new StringValueImpl("input-charset");
275         if ((tmp = preferences.get(tmp)).isset())
276           inCharset = tmp.toString();
277
278         tmp = new StringValueImpl("output-charset");
279         if ((tmp = preferences.get(tmp)).isset())
280           outCharset = tmp.toString();
281
282         tmp = new StringValueImpl("line-length");
283         if ((tmp = preferences.get(tmp)).isset()) {
284         if (tmp.isLongConvertible())
285           lineLength = (int)tmp.toLong();
286         }
287       }
288
289       return IconvUtility.encodeMime(env,
290                                      field_name,
291                                      field_value,
292                                      inCharset,
293                                      outCharset,
294                                      scheme,
295                                      lineBreakChars,
296                                      lineLength);
297
298     }
299     catch (UnsupportedEncodingException JavaDoc e) {
300       log.log(Level.FINE, e.getMessage(), e);
301       env.warning(L.l(e.getMessage()));
302
303       return BooleanValue.FALSE;
304     }
305   }
306
307   /**
308    * Sets the current encoding.
309    * @param env
310    * @param type
311    * @param charset
312    */

313   public static BooleanValue iconv_set_encoding(Env env,
314                        String JavaDoc type,
315                        StringValue charset)
316   {
317     type = type.toLowerCase();
318
319     if ("input_encoding".equals(type)) {
320       env.setIni("iconv.input_encoding", charset);
321       return BooleanValue.TRUE;
322     }
323     else if ("output_encoding".equals(type)) {
324       env.setIni("iconv.output_encoding", charset);
325       return BooleanValue.TRUE;
326     }
327     else if ("internal_encoding".equals(type)) {
328       env.setIni("iconv.internal_encoding", charset);
329       return BooleanValue.TRUE;
330     }
331
332     return BooleanValue.FALSE;
333   }
334
335   /**
336    * Returns the length of the decoded string.
337    * Uses iconv.internal_encoding.
338    *
339    * @param env
340    * @param str
341    * @param charset
342    */

343   public static Value iconv_strlen(Env env,
344                        StringValue str,
345                        @Optional("") String JavaDoc charset)
346   {
347     if (charset.length() == 0 )
348       charset = env.getIniString("iconv.internal_encoding");
349
350     return LongValue.create(str.toUnicodeValue(env, charset).length());
351   }
352
353   /**
354    * Returns the first occurence of the substring in the haystack.
355    * Uses iconv.internal_encoding.
356    *
357    * @param env
358    * @param haystack
359    * @param needle
360    * @param offset
361    * @param charset
362    * @return first occurence of needle in haystack, FALSE otherwise
363    */

364   public static Value iconv_strpos(Env env,
365                        StringValue haystack,
366                        StringValue needle,
367                        @Optional("0") int offset,
368                        @Optional("") String JavaDoc charset)
369   {
370     if (charset.length() == 0)
371       charset = env.getIniString("iconv.internal_encoding");
372
373     haystack = haystack.toUnicodeValue(env, charset);
374     needle = needle.toUnicodeValue(env, charset);
375
376     int index = haystack.indexOf(needle, offset);
377
378     if (index < 0)
379       return BooleanValue.FALSE;
380
381     return LongValue.create(index);
382   }
383
384   /**
385    * Returns the last occurence of the substring in the haystack.
386    * Uses iconv.internal_encoding.
387    *
388    * @param env
389    * @param haystack
390    * @param needle
391    * @param charset encoding of StringValue arguments
392    *
393    * @return last occurence of needle in haystack, FALSE otherwise
394    */

395   public static Value iconv_strrpos(Env env,
396                        StringValue haystack,
397                        StringValue needle,
398                        @Optional("") String JavaDoc charset)
399   {
400     if (charset.length() == 0)
401       charset = env.getIniString("iconv.internal_encoding");
402
403     haystack = haystack.toUnicodeValue(env, charset);
404     needle = needle.toUnicodeValue(env, charset);
405
406     int index = haystack.lastIndexOf(needle);
407
408     if (index < 0)
409       return BooleanValue.FALSE;
410
411     return LongValue.create(index);
412   }
413
414   /**
415    * Uses iconv.internal_encoding.
416    *
417    * @param env
418    * @param str encoded string
419    * @param offset of str after decoding
420    * @param length of str after decoding
421    * @param charset encoding of StringValue argument
422    * @return substring of argument string.
423    *
424    */

425   public static Value iconv_substr(Env env,
426                        StringValue str,
427                        int offset,
428                        @Optional("7fffffff") int length,
429                        @Optional("") String JavaDoc charset)
430   {
431     if (charset.length() == 0)
432       charset = env.getIniString("iconv.internal_encoding");
433
434     str = str.toUnicodeValue(env, charset);
435
436     int tail;
437     int strlen = str.length();
438
439     // Imitating PHP5 behavior
440
if (offset < 0)
441       offset = strlen + offset;
442
443     if (length < 0)
444       tail = strlen + length;
445     else if (length > strlen - offset)
446       tail = strlen;
447     else
448       tail = offset + length;
449
450     if (offset < 0 || tail < offset)
451       return StringValue.EMPTY;
452
453     str = str.substring(offset, tail);
454
455     return str.toBinaryValue(env, charset);
456   }
457
458   /**
459    * Returns encoded string from decoded argument string.
460    *
461    * @param env
462    * @param in_charset charset to decode from
463    * @param out_charset charset to decode to
464    * @param str to decode and encode
465    */

466   public static Value iconv(Env env,
467                        String JavaDoc in_charset,
468                        String JavaDoc out_charset,
469                        StringValue str)
470   {
471     try {
472       return IconvUtility.decodeEncode(str, in_charset, out_charset);
473     }
474     catch (UnsupportedEncodingException JavaDoc e) {
475       log.log(Level.FINE, e.getMessage(), e);
476       env.warning(L.l("error converting {1} to {2}", in_charset, out_charset));
477
478       return BooleanValue.FALSE;
479     }
480   }
481
482   public static StringValue ob_iconv_handler(
483                        StringValue contents,
484                        int status)
485   {
486     throw new UnimplementedException("ob_iconv_handler");
487   }
488
489   static {
490     addIni(_iniMap, "unicode.fallback_encoding", "ISO-8859-1", PHP_INI_ALL);
491     addIni(_iniMap, "unicode.from_error_mode", "2", PHP_INI_ALL);
492     addIni(_iniMap, "unicode.from_error_subst_char", "3f", PHP_INI_ALL);
493     addIni(_iniMap, "unicode.http_input_encoding", null, PHP_INI_ALL);
494     addIni(_iniMap, "unicode.output_encoding", null, PHP_INI_ALL);
495     addIni(_iniMap, "unicode.runtime_encoding", null, PHP_INI_ALL);
496     addIni(_iniMap, "unicode.script_encoding", null, PHP_INI_ALL);
497     addIni(_iniMap, "unicode.semantics", "on", PHP_INI_ALL);
498
499     addIni(_iniMap, "iconv.input_encoding", "ISO-8859-1", PHP_INI_ALL);
500     addIni(_iniMap, "iconv.output_encoding", "ISO-8859-1", PHP_INI_ALL);
501     addIni(_iniMap, "iconv.internal_encoding", "ISO-8859-1", PHP_INI_ALL);
502   }
503 }
504
505 // XXX: "//TRANSLIT" and "//IGNORE" charset suffixes
506
Popular Tags