KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > vfs > Encoding


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  *
23  * Free Software Foundation, Inc.
24  * 59 Temple Place, Suite 330
25  * Boston, MA 02111-1307 USA
26  *
27  * @author Scott Ferguson
28  */

29
30 package com.caucho.vfs;
31
32 import com.caucho.util.CharBuffer;
33 import com.caucho.vfs.i18n.EncodingReader;
34 import com.caucho.vfs.i18n.EncodingWriter;
35 import com.caucho.vfs.i18n.ISO8859_1Writer;
36 import com.caucho.vfs.i18n.JDKReader;
37 import com.caucho.vfs.i18n.JDKWriter;
38
39 import java.io.InputStream JavaDoc;
40 import java.io.Reader JavaDoc;
41 import java.io.UnsupportedEncodingException JavaDoc;
42 import java.util.HashMap JavaDoc;
43 import java.util.Hashtable JavaDoc;
44 import java.util.Locale JavaDoc;
45
46 /**
47  * Converts between the mime encoding names and Java encoding names.
48  */

49 public class Encoding {
50   static HashMap JavaDoc<String JavaDoc,String JavaDoc> _javaName;
51   static Hashtable JavaDoc<String JavaDoc,String JavaDoc> _mimeName;
52   static HashMap JavaDoc<String JavaDoc,String JavaDoc> _localeName;
53
54   // map from an encoding name to its EncodingReader factory.
55
static final HashMap JavaDoc<String JavaDoc,EncodingReader> _readEncodingFactories =
56     new HashMap JavaDoc<String JavaDoc,EncodingReader>();
57   
58   // map from an encoding name to its EncodingWriter factory.
59
static final HashMap JavaDoc<String JavaDoc,EncodingWriter> _writeEncodingFactories =
60     new HashMap JavaDoc<String JavaDoc,EncodingWriter>();
61
62   static final EncodingWriter _latin1Writer = new ISO8859_1Writer();
63
64   /**
65    * Can't create an instance of the encoding class.
66    */

67   private Encoding() {}
68
69   /**
70    * Returns the canonical mime name for the given character encoding.
71    *
72    * @param encoding character encoding name, possibly an alias
73    *
74    * @return canonical mime name for the encoding.
75    */

76   public static String JavaDoc getMimeName(String JavaDoc encoding)
77   {
78     if (encoding == null)
79       return null;
80
81     String JavaDoc value = _mimeName.get(encoding);
82     if (value != null)
83       return value;
84
85     String JavaDoc upper = normalize(encoding);
86
87     String JavaDoc lookup = _mimeName.get(upper);
88
89     value = lookup == null ? upper : lookup;
90
91     _mimeName.put(encoding, value);
92
93     return value;
94   }
95
96   /**
97    * Returns the canonical mime name for the given locale.
98    *
99    * @param locale locale to use.
100    *
101    * @return canonical mime name for the encoding.
102    */

103   public static String JavaDoc getMimeName(Locale JavaDoc locale)
104   {
105     if (locale == null)
106       return "ISO-8859-1";
107
108     String JavaDoc mimeName = _localeName.get(locale.toString());
109     if (mimeName == null)
110       mimeName = _localeName.get(locale.getLanguage());
111
112     if (mimeName == null)
113       return "ISO-8859-1";
114     else
115       return mimeName;
116   }
117
118   /**
119    * Returns a Reader to translate bytes to characters. If a specialized
120    * reader exists in com.caucho.vfs.i18n, use it.
121    *
122    * @param is the input stream.
123    * @param encoding the encoding name.
124    *
125    * @return a reader for the translation
126    */

127   public static Reader getReadEncoding(InputStream JavaDoc is, String JavaDoc encoding)
128     throws UnsupportedEncodingException JavaDoc
129   {
130     EncodingReader factory = null;
131     
132     synchronized (_readEncodingFactories) {
133       factory = _readEncodingFactories.get(encoding);
134
135       if (factory == null) {
136         try {
137           String JavaDoc javaEncoding = Encoding.getJavaName(encoding);
138
139           if (javaEncoding == null)
140             javaEncoding = "ISO8859_1";
141
142           String JavaDoc className = "com.caucho.vfs.i18n." + javaEncoding + "Reader";
143         
144           Class JavaDoc cl = Class.forName(className);
145
146           factory = (EncodingReader) cl.newInstance();
147           factory.setJavaEncoding(javaEncoding);
148         } catch (Throwable JavaDoc e) {
149         }
150
151         if (factory == null) {
152           String JavaDoc javaEncoding = Encoding.getJavaName(encoding);
153
154           if (javaEncoding == null)
155             javaEncoding = "ISO8859_1";
156           
157           factory = new JDKReader();
158           factory.setJavaEncoding(javaEncoding);
159         }
160
161         _readEncodingFactories.put(encoding, factory);
162       }
163     }
164
165     return factory.create(is, factory.getJavaEncoding());
166   }
167
168   /**
169    * Returns an EncodingWriter to translate characters to bytes.
170    *
171    * @param encoding the encoding name.
172    *
173    * @return a writer for the translation
174    */

175   public static EncodingWriter getWriteEncoding(String JavaDoc encoding)
176   {
177     EncodingWriter factory = _writeEncodingFactories.get(encoding);
178
179     if (factory != null)
180       return factory.create();
181
182     synchronized (_writeEncodingFactories) {
183       factory = _writeEncodingFactories.get(encoding);
184
185       if (factory == null) {
186         try {
187           String JavaDoc javaEncoding = Encoding.getJavaName(encoding);
188
189           if (javaEncoding == null)
190             javaEncoding = "ISO8859_1";
191
192           String JavaDoc className = "com.caucho.vfs.i18n." + javaEncoding + "Writer";
193         
194           Class JavaDoc cl = Class.forName(className);
195
196           factory = (EncodingWriter) cl.newInstance();
197       factory.setJavaEncoding(javaEncoding);
198         } catch (Throwable JavaDoc e) {
199         }
200
201         if (factory == null) {
202           factory = new JDKWriter();
203           String JavaDoc javaEncoding = Encoding.getJavaName(encoding);
204
205           if (javaEncoding == null)
206             javaEncoding = "ISO8859_1";
207           factory.setJavaEncoding(javaEncoding);
208         }
209
210         _writeEncodingFactories.put(encoding, factory);
211       }
212     }
213
214     // return factory.create(factory.getJavaEncoding());
215
// charset uses the original encoding, not the java encoding
216
return factory.create(encoding);
217   }
218
219   /**
220    * Returns the latin 1 writer.
221    */

222   public static EncodingWriter getLatin1Writer()
223   {
224     return _latin1Writer;
225   }
226
227   /**
228    * Returns the Java name for the given encoding.
229    *
230    * @param encoding character encoding name
231    *
232    * @return Java encoding name
233    */

234   public static String JavaDoc getJavaName(String JavaDoc encoding)
235   {
236     if (encoding == null)
237       return null;
238
239     String JavaDoc upper = normalize(encoding);
240
241     String JavaDoc javaName = null;
242     
243     javaName = _javaName.get(upper);
244     if (javaName != null)
245       return javaName;
246     
247     String JavaDoc lookup = _mimeName.get(upper);
248
249     if (lookup != null)
250       javaName = _javaName.get(lookup);
251
252     return javaName == null ? upper : javaName;
253   }
254
255   /**
256    * Returns the Java name for the given locale.
257    *
258    * @param locale the locale to use
259    *
260    * @return Java encoding name
261    */

262   public static String JavaDoc getJavaName(Locale JavaDoc locale)
263   {
264     if (locale == null)
265       return null;
266
267     return getJavaName(getMimeName(locale));
268   }
269
270   /**
271    * Normalize the user's encoding name to avoid case issues.
272    */

273   private static String JavaDoc normalize(String JavaDoc name)
274   {
275     CharBuffer cb = CharBuffer.allocate();
276
277     int len = name.length();
278     for (int i = 0; i < len; i++) {
279       char ch = name.charAt(i);
280       
281       if (Character.isLowerCase(ch))
282         cb.append(Character.toUpperCase(ch));
283       else if (ch == '_')
284         cb.append('-');
285       else
286         cb.append(ch);
287     }
288
289     return cb.close();
290   }
291       
292
293   static {
294     _javaName = new HashMap JavaDoc<String JavaDoc,String JavaDoc>();
295     _mimeName = new Hashtable JavaDoc<String JavaDoc,String JavaDoc>();
296     _localeName = new HashMap JavaDoc<String JavaDoc,String JavaDoc>();
297
298     _mimeName.put("ANSI-X3.4-1968", "US-ASCII");
299     _mimeName.put("ISO-IR-6", "US-ASCII");
300     _mimeName.put("ISO-646.IRV:1991", "US-ASCII");
301     _mimeName.put("ASCII", "US-ASCII");
302     _mimeName.put("ISO646-US", "US-ASCII");
303     _mimeName.put("US-ASCII", "US-ASCII");
304     _mimeName.put("us", "US-ASCII");
305     _mimeName.put("IBM367", "US-ASCII");
306     _mimeName.put("CP367", "US-ASCII");
307     _mimeName.put("CSASCII", "US-ASCII");
308     _javaName.put("US-ASCII", "ISO8859_1");
309
310     _mimeName.put("ISO-2022-KR", "ISO-2022-KR");
311     _mimeName.put("CSISO2022KR", "ISO-2022-KR");
312     _mimeName.put("ISO2022-KR", "ISO-2022-KR");
313     _javaName.put("ISO-2022-KR", "ISO2022_KR");
314
315     _mimeName.put("EUC-KR", "EUC-KR");
316     _mimeName.put("CSEUCKR", "EUC-KR");
317     _javaName.put("EUC-KR", "EUC_KR");
318
319     _mimeName.put("ISO-2022-JP", "ISO-2022-JP");
320     _mimeName.put("CSISO2022JP", "ISO-2022-JP");
321     _mimeName.put("ISO2022-JP", "ISO-2022-JP");
322     _javaName.put("ISO-2022-JP", "ISO2022JP");
323
324     _mimeName.put("ISO-2022-JP-2", "ISO-2022-JP-2");
325     _mimeName.put("CSISO2022JP2", "ISO-2022-JP-2");
326     _mimeName.put("ISO2022-JP2", "ISO-2022-JP-2");
327     _javaName.put("ISO-2022-JP-2", "ISO2022_JP2");
328
329     _mimeName.put("ISO_8859-1:1987", "ISO-8859-1");
330     _mimeName.put("ISO-IR-100", "ISO-8859-1");
331     _mimeName.put("ISO-8859-1", "ISO-8859-1");
332     _mimeName.put("LATIN1", "ISO-8859-1");
333     _mimeName.put("LATIN-1", "ISO-8859-1");
334     _mimeName.put("L1", "ISO-8859-1");
335     _mimeName.put("IBM819", "ISO-8859-1");
336     _mimeName.put("CP819", "ISO-8859-1");
337     _mimeName.put("CSISOLATIN1", "ISO-8859-1");
338     _mimeName.put("ISO8859-1", "ISO-8859-1");
339     _mimeName.put("8859-1", "ISO-8859-1");
340     _mimeName.put("8859_1", "ISO-8859-1");
341     _javaName.put("ISO-8859-1", "ISO8859_1");
342
343     _mimeName.put("ISO-8859-2:1987", "ISO-8859-2");
344     _mimeName.put("ISO-IR-101", "ISO-8859-2");
345     _mimeName.put("ISO-8859-2", "ISO-8859-2");
346     _mimeName.put("LATIN2", "ISO-8859-2");
347     _mimeName.put("LATIN-2", "ISO-8859-2");
348     _mimeName.put("L2", "ISO-8859-2");
349     _mimeName.put("CSISOLATIN2", "ISO-8859-2");
350     _mimeName.put("ISO8859-2", "ISO-8859-2");
351     _javaName.put("ISO-8859-2", "ISO8859_2");
352
353     _mimeName.put("ISO-8859-3:1988", "ISO-8859-3");
354     _mimeName.put("ISO-IR-109", "ISO-8859-3");
355     _mimeName.put("ISO-8859-3", "ISO-8859-3");
356     _mimeName.put("ISO-8859-3", "ISO-8859-3");
357     _mimeName.put("LATIN3", "ISO-8859-3");
358     _mimeName.put("LATIN-3", "ISO-8859-3");
359     _mimeName.put("L3", "ISO-8859-3");
360     _mimeName.put("CSISOLATIN3", "ISO-8859-3");
361     _mimeName.put("ISO8859-3", "ISO-8859-3");
362     _javaName.put("ISO-8859-3", "ISO8859_3");
363
364     _mimeName.put("ISO-8859-4:1988", "ISO-8859-4");
365     _mimeName.put("ISO-IR-110", "ISO-8859-4");
366     _mimeName.put("ISO-8859-4", "ISO-8859-4");
367     _mimeName.put("ISO-8859-4", "ISO-8859-4");
368     _mimeName.put("LATIN4", "ISO-8859-4");
369     _mimeName.put("LATIN-4", "ISO-8859-4");
370     _mimeName.put("L4", "ISO-8859-4");
371     _mimeName.put("CSISOLATIN4", "ISO-8859-4");
372     _mimeName.put("ISO8859-4", "ISO-8859-4");
373     _javaName.put("ISO-8859-4", "ISO8859_4");
374
375     _mimeName.put("ISO-8859-5:1988", "ISO-8859-5");
376     _mimeName.put("ISO-IR-144", "ISO-8859-5");
377     _mimeName.put("ISO-8859-5", "ISO-8859-5");
378     _mimeName.put("ISO-8859-5", "ISO-8859-5");
379     _mimeName.put("CYRILLIC", "ISO-8859-5");
380     _mimeName.put("CSISOLATINCYRILLIC", "ISO-8859-5");
381     _mimeName.put("ISO8859-5", "ISO-8859-5");
382     _javaName.put("ISO-8859-5", "ISO8859_5");
383
384     _mimeName.put("ISO-8859-6:1987", "ISO-8859-6");
385     _mimeName.put("ISO-IR-127", "ISO-8859-6");
386     _mimeName.put("ISO-8859-6", "ISO-8859-6");
387     _mimeName.put("ISO-8859-6", "ISO-8859-6");
388     _mimeName.put("ECMA-114", "ISO-8859-6");
389     _mimeName.put("ASMO-708", "ISO-8859-6");
390     _mimeName.put("ARABIC", "ISO-8859-6");
391     _mimeName.put("CSISOLATINARABIC", "ISO-8859-6");
392     _mimeName.put("ISO8859-6", "ISO-8859-6");
393     _javaName.put("ISO-8859-6", "ISO8859_6");
394
395     _mimeName.put("ISO-8859-7:1987", "ISO-8859-7");
396     _mimeName.put("ISO-IR-126", "ISO-8859-7");
397     _mimeName.put("ISO-8859-7", "ISO-8859-7");
398     _mimeName.put("ISO-8859-7", "ISO-8859-7");
399     _mimeName.put("ELOT-928", "ISO-8859-7");
400     _mimeName.put("ECMA-118", "ISO-8859-7");
401     _mimeName.put("GREEK", "ISO-8859-7");
402     _mimeName.put("GREEK8", "ISO-8859-7");
403     _mimeName.put("CSISOLATINGREEN", "ISO-8859-7");
404     _mimeName.put("ISO8859-7", "ISO-8859-7");
405     _javaName.put("ISO-8859-7", "ISO8859_7");
406
407     _mimeName.put("ISO-8859-8:1988", "ISO-8859-8");
408     _mimeName.put("ISO-IR-138", "ISO-8859-8");
409     _mimeName.put("ISO-8859-8", "ISO-8859-8");
410     _mimeName.put("ISO-8859-8", "ISO-8859-8");
411     _mimeName.put("HEBREW", "ISO-8859-8");
412     _mimeName.put("CSISOLATINHEBREW", "ISO-8859-8");
413     _mimeName.put("ISO8859-8", "ISO-8859-8");
414     _javaName.put("ISO-8859-8", "ISO8859_8");
415
416     _mimeName.put("ISO-8859-9:1989", "ISO-8859-9");
417     _mimeName.put("ISO-IR-148", "ISO-8859-9");
418     _mimeName.put("ISO-8859-9", "ISO-8859-9");
419     _mimeName.put("ISO-8859-9", "ISO-8859-9");
420     _mimeName.put("LATIN5", "ISO-8859-9");
421     _mimeName.put("LATIN-5", "ISO-8859-9");
422     _mimeName.put("L5", "ISO-8859-9");
423     _mimeName.put("CSISOLATIN5", "ISO-8859-9");
424     _mimeName.put("ISO8859-9", "ISO-8859-9");
425     _javaName.put("ISO-8859-9", "ISO8859_9");
426     
427     _mimeName.put("ISO_8859-10:1992", "ISO-8859-10");
428     _mimeName.put("iso-ir-157", "ISO-8859-10");
429     _mimeName.put("I6", "ISO-8859-10");
430     _mimeName.put("cslSOLatin6", "ISO-8859-10");
431     _mimeName.put("latin6", "ISO-8859-10");
432     _javaName.put("ISO-8859-10", "ISO8859_10");
433
434     _mimeName.put("UTF-7", "UTF-7");
435     _mimeName.put("UTF7", "UTF-7");
436     _javaName.put("UTF-7", "UTF7");
437
438     _mimeName.put("UTF-8", "UTF-8");
439     _mimeName.put("UTF8", "UTF-8");
440     _javaName.put("UTF-8", "UTF8");
441
442     _mimeName.put("UTF-16", "UTF-16");
443     _mimeName.put("UTF16", "UTF-16");
444     _javaName.put("UTF-16", "UTF16");
445
446     _mimeName.put("UTF-16-REV", "UTF-16-REV");
447     _mimeName.put("UTF16-REV", "UTF-16-REV");
448     _javaName.put("UTF-16-REV", "UTF16_REV");
449
450     _mimeName.put("JIS-ENCODING", "JIS_Encoding");
451     _mimeName.put("JIS-ENCODING", "JIS_Encoding");
452     _mimeName.put("CSJISENCODING", "JIS_Encoding");
453     _javaName.put("JIS_Encoding", "JIS_ENCODING");
454
455     _mimeName.put("SHIFT-JIS", "Shift_JIS");
456     _mimeName.put("SHIFT_JIS", "Shift_JIS");
457     _mimeName.put("CSSHIFTJIS", "Shift_JIS");
458     _mimeName.put("SJIS", "Shift_JIS");
459     _javaName.put("Shift_JIS", "SJIS");
460
461     _mimeName.put("EUC-JP", "EUC-JP");
462     _mimeName.put("EUC-JP", "EUC-JP");
463     _mimeName.put("EUCJP", "EUC-JP");
464     _mimeName.put("EUC-JP-LINUX", "EUC-JP");
465     _javaName.put("EUC-JP", "EUC_JP");
466
467     _mimeName.put("GB2312", "GB2312");
468     _mimeName.put("CSGB2312", "GB2312");
469     _javaName.put("GB2312", "GB2312");
470     
471     _mimeName.put("GBK", "GBK");
472     _javaName.put("GBK", "GBK");
473
474     _mimeName.put("BIG5", "Big5");
475     _mimeName.put("BIG-5", "Big5");
476     _mimeName.put("CSBIG5", "Big5");
477     _javaName.put("Big5", "BIG5");
478
479     _mimeName.put("KOI8-R", "KOI8-R");
480     _mimeName.put("KOI-8-R", "KOI8-R");
481     _mimeName.put("KOI8-R", "KOI8-R");
482     _javaName.put("KOI8-R", "KOI8-R");
483     
484     _mimeName.put("MS950", "ms950");
485     _javaName.put("ms950", "MS950");
486     
487     _javaName.put("JAVA", "JAVA");
488     
489     _mimeName.put("windows-hack", "ISO-8859-1");
490     _mimeName.put("WINDOWS-HACK", "ISO-8859-1");
491     _javaName.put("WINDOWS-HACK", "WindowsHack");
492     
493     _mimeName.put("MACROMAN", "MacRoman");
494     _javaName.put("MacRoman", "MacRoman");
495     
496     _mimeName.put("KS_C_5601-1987", "ks_c_5601-1987");
497     _javaName.put("ks_c_5601-1987", "Cp949");
498     
499     _javaName.put("IBM500", "Cp500");
500
501     String JavaDoc []cp = new String JavaDoc[] {
502       "037", "1006", "1025", "1026", "1046", "1097",
503       "1098", "1112", "1122", "1123", "1124", "1250",
504       "1251", "1252", "1253", "1254", "1255", "1256",
505       "1257", "1258", "1381", "273", "277", "278", "280", "284",
506       "285", "297", "33722", "420", "424", "437", "500", "737",
507       "775", "838", "850", "852", "855", "857", "860", "861", "862",
508       "863", "864", "865", "866", "868", "869", "870", "871", "874",
509       "875", "918", "921", "922", "930", "933", "935", "937", "939",
510       "942", "948", "949", "964", "970"
511     };
512
513     for (int i = 0; i < cp.length; i++) {
514       _mimeName.put("CP" + cp[i], "windows-" + cp[i]);
515       _mimeName.put("WINDOWS-" + cp[i], "windows-" + cp[i]);
516       _javaName.put("windows-" + cp[i], "Cp" + cp[i]);
517     }
518     
519     // from http://www.w3c.org/International/O-charset-lang.html
520
_localeName = new HashMap JavaDoc<String JavaDoc,String JavaDoc>();
521     _localeName.put("af", "ISO-8859-1");
522     _localeName.put("sq", "ISO-8859-1");
523     _localeName.put("ar", "ISO-8859-6");
524     _localeName.put("eu", "ISO-8859-1");
525     _localeName.put("bg", "ISO-8859-5");
526     _localeName.put("be", "ISO-8859-5");
527     _localeName.put("ca", "ISO-8859-1");
528     _localeName.put("hr", "ISO-8859-2");
529     _localeName.put("cs", "ISO-8859-2");
530     _localeName.put("da", "ISO-8859-1");
531     _localeName.put("nl", "ISO-8859-1");
532     _localeName.put("en", "ISO-8859-1");
533     _localeName.put("eo", "ISO-8859-3");
534     _localeName.put("et", "ISO-8859-10");
535     _localeName.put("fo", "ISO-8859-1");
536     _localeName.put("fi", "ISO-8859-1");
537     _localeName.put("fr", "ISO-8859-1");
538     _localeName.put("gl", "ISO-8859-1");
539     _localeName.put("de", "ISO-8859-1");
540     _localeName.put("el", "ISO-8859-7");
541     _localeName.put("iw", "ISO-8859-8");
542     _localeName.put("hu", "ISO-8859-2");
543     _localeName.put("is", "ISO-8859-1");
544     _localeName.put("ga", "ISO-8859-1");
545     _localeName.put("it", "ISO-8859-1");
546     _localeName.put("ja", "Shift_JIS");
547     _localeName.put("lv", "ISO-8859-10");
548     _localeName.put("lt", "ISO-8859-10");
549     _localeName.put("mk", "ISO-8859-5");
550     _localeName.put("mt", "ISO-8859-3");
551     _localeName.put("no", "ISO-8859-1");
552     _localeName.put("pl", "ISO-8859-2");
553     _localeName.put("pt", "ISO-8859-1");
554     _localeName.put("ro", "ISO-8859-2");
555     // _localeName.put("ru", "KOI8-R");
556
_localeName.put("ru", "ISO-8859-5");
557     _localeName.put("gd", "ISO-8859-1");
558     _localeName.put("sr", "ISO-8859-5");
559     _localeName.put("sk", "ISO-8859-2");
560     _localeName.put("sl", "ISO-8859-2");
561     _localeName.put("es", "ISO-8859-1");
562     _localeName.put("sv", "ISO-8859-1");
563     _localeName.put("tr", "ISO-8859-9");
564     _localeName.put("uk", "ISO-8859-5");
565
566     _localeName.put("ko", "EUC-KR");
567     _localeName.put("zh", "GB2312");
568     _localeName.put("zh_TW", "Big5");
569   }
570 }
571
Popular Tags