KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > readers > MIME2Java


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.readers;
59
60 import java.util.Hashtable JavaDoc;
61
62 /**
63  * MIME2Java is a convenience class which handles conversions between MIME charset names
64  * and Java encoding names.
65  * <p>The supported XML encodings are the intersection of XML-supported code sets and those
66  * supported in JDK 1.1.
67  * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such
68  * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>.
69  * <p>Java encoding names are used on <var>encoding</var> parameters to
70  * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>.
71  * <P>
72  * <TABLE BORDER="0" WIDTH="100%">
73  * <TR>
74  * <TD WIDTH="33%">
75  * <P ALIGN="CENTER"><B>Common Name</B>
76  * </TD>
77  * <TD WIDTH="15%">
78  * <P ALIGN="CENTER"><B>Use this name in XML files</B>
79  * </TD>
80  * <TD WIDTH="12%">
81  * <P ALIGN="CENTER"><B>Name Type</B>
82  * </TD>
83  * <TD WIDTH="31%">
84  * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B>
85  * </TD>
86  * </TR>
87  * <TR>
88  * <TD WIDTH="33%">8 bit Unicode</TD>
89  * <TD WIDTH="15%">
90  * <P ALIGN="CENTER">UTF-8
91  * </TD>
92  * <TD WIDTH="12%">
93  * <P ALIGN="CENTER">IANA
94  * </TD>
95  * <TD WIDTH="31%">
96  * <P ALIGN="CENTER">UTF8
97  * </TD>
98  * </TR>
99  * <TR>
100  * <TD WIDTH="33%">ISO Latin 1</TD>
101  * <TD WIDTH="15%">
102  * <P ALIGN="CENTER">ISO-8859-1
103  * </TD>
104  * <TD WIDTH="12%">
105  * <P ALIGN="CENTER">MIME
106  * </TD>
107  * <TD WIDTH="31%">
108  * <P ALIGN="CENTER">ISO-8859-1
109  * </TD>
110  * </TR>
111  * <TR>
112  * <TD WIDTH="33%">ISO Latin 2</TD>
113  * <TD WIDTH="15%">
114  * <P ALIGN="CENTER">ISO-8859-2
115  * </TD>
116  * <TD WIDTH="12%">
117  * <P ALIGN="CENTER">MIME
118  * </TD>
119  * <TD WIDTH="31%">
120  * <P ALIGN="CENTER">ISO-8859-2
121  * </TD>
122  * </TR>
123  * <TR>
124  * <TD WIDTH="33%">ISO Latin 3</TD>
125  * <TD WIDTH="15%">
126  * <P ALIGN="CENTER">ISO-8859-3
127  * </TD>
128  * <TD WIDTH="12%">
129  * <P ALIGN="CENTER">MIME
130  * </TD>
131  * <TD WIDTH="31%">
132  * <P ALIGN="CENTER">ISO-8859-3
133  * </TD>
134  * </TR>
135  * <TR>
136  * <TD WIDTH="33%">ISO Latin 4</TD>
137  * <TD WIDTH="15%">
138  * <P ALIGN="CENTER">ISO-8859-4
139  * </TD>
140  * <TD WIDTH="12%">
141  * <P ALIGN="CENTER">MIME
142  * </TD>
143  * <TD WIDTH="31%">
144  * <P ALIGN="CENTER">ISO-8859-4
145  * </TD>
146  * </TR>
147  * <TR>
148  * <TD WIDTH="33%">ISO Latin Cyrillic</TD>
149  * <TD WIDTH="15%">
150  * <P ALIGN="CENTER">ISO-8859-5
151  * </TD>
152  * <TD WIDTH="12%">
153  * <P ALIGN="CENTER">MIME
154  * </TD>
155  * <TD WIDTH="31%">
156  * <P ALIGN="CENTER">ISO-8859-5
157  * </TD>
158  * </TR>
159  * <TR>
160  * <TD WIDTH="33%">ISO Latin Arabic</TD>
161  * <TD WIDTH="15%">
162  * <P ALIGN="CENTER">ISO-8859-6
163  * </TD>
164  * <TD WIDTH="12%">
165  * <P ALIGN="CENTER">MIME
166  * </TD>
167  * <TD WIDTH="31%">
168  * <P ALIGN="CENTER">ISO-8859-6
169  * </TD>
170  * </TR>
171  * <TR>
172  * <TD WIDTH="33%">ISO Latin Greek</TD>
173  * <TD WIDTH="15%">
174  * <P ALIGN="CENTER">ISO-8859-7
175  * </TD>
176  * <TD WIDTH="12%">
177  * <P ALIGN="CENTER">MIME
178  * </TD>
179  * <TD WIDTH="31%">
180  * <P ALIGN="CENTER">ISO-8859-7
181  * </TD>
182  * </TR>
183  * <TR>
184  * <TD WIDTH="33%">ISO Latin Hebrew</TD>
185  * <TD WIDTH="15%">
186  * <P ALIGN="CENTER">ISO-8859-8
187  * </TD>
188  * <TD WIDTH="12%">
189  * <P ALIGN="CENTER">MIME
190  * </TD>
191  * <TD WIDTH="31%">
192  * <P ALIGN="CENTER">ISO-8859-8
193  * </TD>
194  * </TR>
195  * <TR>
196  * <TD WIDTH="33%">ISO Latin 5</TD>
197  * <TD WIDTH="15%">
198  * <P ALIGN="CENTER">ISO-8859-9
199  * </TD>
200  * <TD WIDTH="12%">
201  * <P ALIGN="CENTER">MIME
202  * </TD>
203  * <TD WIDTH="31%">
204  * <P ALIGN="CENTER">ISO-8859-9
205  * </TD>
206  * </TR>
207  * <TR>
208  * <TD WIDTH="33%">EBCDIC: US</TD>
209  * <TD WIDTH="15%">
210  * <P ALIGN="CENTER">ebcdic-cp-us
211  * </TD>
212  * <TD WIDTH="12%">
213  * <P ALIGN="CENTER">IANA
214  * </TD>
215  * <TD WIDTH="31%">
216  * <P ALIGN="CENTER">cp037
217  * </TD>
218  * </TR>
219  * <TR>
220  * <TD WIDTH="33%">EBCDIC: Canada</TD>
221  * <TD WIDTH="15%">
222  * <P ALIGN="CENTER">ebcdic-cp-ca
223  * </TD>
224  * <TD WIDTH="12%">
225  * <P ALIGN="CENTER">IANA
226  * </TD>
227  * <TD WIDTH="31%">
228  * <P ALIGN="CENTER">cp037
229  * </TD>
230  * </TR>
231  * <TR>
232  * <TD WIDTH="33%">EBCDIC: Netherlands</TD>
233  * <TD WIDTH="15%">
234  * <P ALIGN="CENTER">ebcdic-cp-nl
235  * </TD>
236  * <TD WIDTH="12%">
237  * <P ALIGN="CENTER">IANA
238  * </TD>
239  * <TD WIDTH="31%">
240  * <P ALIGN="CENTER">cp037
241  * </TD>
242  * </TR>
243  * <TR>
244  * <TD WIDTH="33%">EBCDIC: Denmark</TD>
245  * <TD WIDTH="15%">
246  * <P ALIGN="CENTER">ebcdic-cp-dk
247  * </TD>
248  * <TD WIDTH="12%">
249  * <P ALIGN="CENTER">IANA
250  * </TD>
251  * <TD WIDTH="31%">
252  * <P ALIGN="CENTER">cp277
253  * </TD>
254  * </TR>
255  * <TR>
256  * <TD WIDTH="33%">EBCDIC: Norway</TD>
257  * <TD WIDTH="15%">
258  * <P ALIGN="CENTER">ebcdic-cp-no
259  * </TD>
260  * <TD WIDTH="12%">
261  * <P ALIGN="CENTER">IANA
262  * </TD>
263  * <TD WIDTH="31%">
264  * <P ALIGN="CENTER">cp277
265  * </TD>
266  * </TR>
267  * <TR>
268  * <TD WIDTH="33%">EBCDIC: Finland</TD>
269  * <TD WIDTH="15%">
270  * <P ALIGN="CENTER">ebcdic-cp-fi
271  * </TD>
272  * <TD WIDTH="12%">
273  * <P ALIGN="CENTER">IANA
274  * </TD>
275  * <TD WIDTH="31%">
276  * <P ALIGN="CENTER">cp278
277  * </TD>
278  * </TR>
279  * <TR>
280  * <TD WIDTH="33%">EBCDIC: Sweden</TD>
281  * <TD WIDTH="15%">
282  * <P ALIGN="CENTER">ebcdic-cp-se
283  * </TD>
284  * <TD WIDTH="12%">
285  * <P ALIGN="CENTER">IANA
286  * </TD>
287  * <TD WIDTH="31%">
288  * <P ALIGN="CENTER">cp278
289  * </TD>
290  * </TR>
291  * <TR>
292  * <TD WIDTH="33%">EBCDIC: Italy</TD>
293  * <TD WIDTH="15%">
294  * <P ALIGN="CENTER">ebcdic-cp-it
295  * </TD>
296  * <TD WIDTH="12%">
297  * <P ALIGN="CENTER">IANA
298  * </TD>
299  * <TD WIDTH="31%">
300  * <P ALIGN="CENTER">cp280
301  * </TD>
302  * </TR>
303  * <TR>
304  * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD>
305  * <TD WIDTH="15%">
306  * <P ALIGN="CENTER">ebcdic-cp-es
307  * </TD>
308  * <TD WIDTH="12%">
309  * <P ALIGN="CENTER">IANA
310  * </TD>
311  * <TD WIDTH="31%">
312  * <P ALIGN="CENTER">cp284
313  * </TD>
314  * </TR>
315  * <TR>
316  * <TD WIDTH="33%">EBCDIC: Great Britain</TD>
317  * <TD WIDTH="15%">
318  * <P ALIGN="CENTER">ebcdic-cp-gb
319  * </TD>
320  * <TD WIDTH="12%">
321  * <P ALIGN="CENTER">IANA
322  * </TD>
323  * <TD WIDTH="31%">
324  * <P ALIGN="CENTER">cp285
325  * </TD>
326  * </TR>
327  * <TR>
328  * <TD WIDTH="33%">EBCDIC: France</TD>
329  * <TD WIDTH="15%">
330  * <P ALIGN="CENTER">ebcdic-cp-fr
331  * </TD>
332  * <TD WIDTH="12%">
333  * <P ALIGN="CENTER">IANA
334  * </TD>
335  * <TD WIDTH="31%">
336  * <P ALIGN="CENTER">cp297
337  * </TD>
338  * </TR>
339  * <TR>
340  * <TD WIDTH="33%">EBCDIC: Arabic</TD>
341  * <TD WIDTH="15%">
342  * <P ALIGN="CENTER">ebcdic-cp-ar1
343  * </TD>
344  * <TD WIDTH="12%">
345  * <P ALIGN="CENTER">IANA
346  * </TD>
347  * <TD WIDTH="31%">
348  * <P ALIGN="CENTER">cp420
349  * </TD>
350  * </TR>
351  * <TR>
352  * <TD WIDTH="33%">EBCDIC: Hebrew</TD>
353  * <TD WIDTH="15%">
354  * <P ALIGN="CENTER">ebcdic-cp-he
355  * </TD>
356  * <TD WIDTH="12%">
357  * <P ALIGN="CENTER">IANA
358  * </TD>
359  * <TD WIDTH="31%">
360  * <P ALIGN="CENTER">cp424
361  * </TD>
362  * </TR>
363  * <TR>
364  * <TD WIDTH="33%">EBCDIC: Switzerland</TD>
365  * <TD WIDTH="15%">
366  * <P ALIGN="CENTER">ebcdic-cp-ch
367  * </TD>
368  * <TD WIDTH="12%">
369  * <P ALIGN="CENTER">IANA
370  * </TD>
371  * <TD WIDTH="31%">
372  * <P ALIGN="CENTER">cp500
373  * </TD>
374  * </TR>
375  * <TR>
376  * <TD WIDTH="33%">EBCDIC: Roece</TD>
377  * <TD WIDTH="15%">
378  * <P ALIGN="CENTER">ebcdic-cp-roece
379  * </TD>
380  * <TD WIDTH="12%">
381  * <P ALIGN="CENTER">IANA
382  * </TD>
383  * <TD WIDTH="31%">
384  * <P ALIGN="CENTER">cp870
385  * </TD>
386  * </TR>
387  * <TR>
388  * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD>
389  * <TD WIDTH="15%">
390  * <P ALIGN="CENTER">ebcdic-cp-yu
391  * </TD>
392  * <TD WIDTH="12%">
393  * <P ALIGN="CENTER">IANA
394  * </TD>
395  * <TD WIDTH="31%">
396  * <P ALIGN="CENTER">cp870
397  * </TD>
398  * </TR>
399  * <TR>
400  * <TD WIDTH="33%">EBCDIC: Iceland</TD>
401  * <TD WIDTH="15%">
402  * <P ALIGN="CENTER">ebcdic-cp-is
403  * </TD>
404  * <TD WIDTH="12%">
405  * <P ALIGN="CENTER">IANA
406  * </TD>
407  * <TD WIDTH="31%">
408  * <P ALIGN="CENTER">cp871
409  * </TD>
410  * </TR>
411  * <TR>
412  * <TD WIDTH="33%">EBCDIC: Urdu</TD>
413  * <TD WIDTH="15%">
414  * <P ALIGN="CENTER">ebcdic-cp-ar2
415  * </TD>
416  * <TD WIDTH="12%">
417  * <P ALIGN="CENTER">IANA
418  * </TD>
419  * <TD WIDTH="31%">
420  * <P ALIGN="CENTER">cp918
421  * </TD>
422  * </TR>
423  * <TR>
424  * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD>
425  * <TD WIDTH="15%">
426  * <P ALIGN="CENTER">gb2312
427  * </TD>
428  * <TD WIDTH="12%">
429  * <P ALIGN="CENTER">MIME
430  * </TD>
431  * <TD WIDTH="31%">
432  * <P ALIGN="CENTER">GB2312
433  * </TD>
434  * </TR>
435  * <TR>
436  * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD>
437  * <TD WIDTH="15%">
438  * <P ALIGN="CENTER">euc-jp
439  * </TD>
440  * <TD WIDTH="12%">
441  * <P ALIGN="CENTER">MIME
442  * </TD>
443  * <TD WIDTH="31%">
444  * <P ALIGN="CENTER">eucjis
445  * </TD>
446  * </TR>
447  * <TR>
448  * <TD WIDTH="33%">Japanese: ISO-2022-jp</TD>
449  * <TD WIDTH="15%">
450  * <P ALIGN="CENTER">ISO-2020-jp
451  * </TD>
452  * <TD WIDTH="12%">
453  * <P ALIGN="CENTER">MIME
454  * </TD>
455  * <TD WIDTH="31%">
456  * <P ALIGN="CENTER">JIS
457  * </TD>
458  * </TR>
459  * <TR>
460  * <TD WIDTH="33%">Japanese: Shift JIS</TD>
461  * <TD WIDTH="15%">
462  * <P ALIGN="CENTER">Shift_JIS
463  * </TD>
464  * <TD WIDTH="12%">
465  * <P ALIGN="CENTER">MIME
466  * </TD>
467  * <TD WIDTH="31%">
468  * <P ALIGN="CENTER">SJIS
469  * </TD>
470  * </TR>
471  * <TR>
472  * <TD WIDTH="33%">Japanese Windows: An extension of Shift JIS</TD>
473  * <TD WIDTH="15%">
474  * <P ALIGN="CENTER">Windows-31J
475  * </TD>
476  * <TD WIDTH="12%">
477  * <P ALIGN="CENTER">MIME
478  * </TD>
479  * <TD WIDTH="31%">
480  * <P ALIGN="CENTER">MS932 (since JDK 1.2)
481  * </TD>
482  * </TR>
483  * <TR>
484  * <TD WIDTH="33%">Chinese: Big5</TD>
485  * <TD WIDTH="15%">
486  * <P ALIGN="CENTER">Big5
487  * </TD>
488  * <TD WIDTH="12%">
489  * <P ALIGN="CENTER">MIME
490  * </TD>
491  * <TD WIDTH="31%">
492  * <P ALIGN="CENTER">Big5
493  * </TD>
494  * </TR>
495  * <TR>
496  * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD>
497  * <TD WIDTH="15%">
498  * <P ALIGN="CENTER">euc-kr
499  * </TD>
500  * <TD WIDTH="12%">
501  * <P ALIGN="CENTER">MIME
502  * </TD>
503  * <TD WIDTH="31%">
504  * <P ALIGN="CENTER">iso2022kr
505  * </TD>
506  * </TR>
507  * <TR>
508  * <TD WIDTH="33%">Cyrillic</TD>
509  * <TD WIDTH="15%">
510  * <P ALIGN="CENTER">koi8-r
511  * </TD>
512  * <TD WIDTH="12%">
513  * <P ALIGN="CENTER">MIME
514  * </TD>
515  * <TD WIDTH="31%">
516  * <P ALIGN="CENTER">koi8-r
517  * </TD>
518  * </TR>
519  * </TABLE>
520  *
521  * @version $Id: MIME2Java.java,v 1.2 2005/01/26 08:28:44 jkjome Exp $
522  * @author TAMURA Kent &lt;kent@trl.ibm.co.jp&gt;
523  */

524 public class MIME2Java {
525     
526     static private Hashtable JavaDoc s_enchash;
527     static private Hashtable JavaDoc s_revhash;
528     
529     static {
530         s_enchash = new Hashtable JavaDoc();
531         // <preferred MIME name (uppercase)>, <Java encoding name>
532
s_enchash.put("UTF-8", "UTF8");
533         s_enchash.put("US-ASCII", "ASCII");
534         s_enchash.put("ISO-IR-6", "ASCII");
535         s_enchash.put("ANSI_X3.4-1986", "ASCII");
536         s_enchash.put("ISO_646.IRV:1991", "ASCII");
537         s_enchash.put("ASCII", "ASCII");
538         s_enchash.put("ISO646-US", "ASCII");
539         s_enchash.put("US", "ASCII");
540         s_enchash.put("IBM367", "ASCII");
541         s_enchash.put("CP367", "ASCII");
542         s_enchash.put("ISO-8859-1", "ISO8859_1");
543         s_enchash.put("ISO-IR-100", "ISO8859_1");
544         s_enchash.put("ISO_8859-1", "ISO8859_1");
545         s_enchash.put("LATIN1", "ISO8859_1");
546         s_enchash.put("L1", "ISO8859_1");
547         s_enchash.put("IBM819", "ISO8859_1");
548         s_enchash.put("CP819", "ISO8859_1");
549         s_enchash.put("ISO-8859-2", "ISO8859_2");
550         s_enchash.put("ISO-IR-101", "ISO8859_2");
551         s_enchash.put("ISO_8859-2", "ISO8859_2");
552         s_enchash.put("LATIN2", "ISO8859_2");
553         s_enchash.put("L2", "ISO8859_2");
554         s_enchash.put("ISO-8859-3", "ISO8859_3");
555         s_enchash.put("ISO-IR-109", "ISO8859_3");
556         s_enchash.put("ISO_8859-3", "ISO8859_3");
557         s_enchash.put("LATIN3", "ISO8859_3");
558         s_enchash.put("L3", "ISO8859_3");
559         s_enchash.put("ISO-8859-4", "ISO8859_4");
560         s_enchash.put("ISO-IR-110", "ISO8859_4");
561         s_enchash.put("ISO_8859-4", "ISO8859_4");
562         s_enchash.put("LATIN4", "ISO8859_4");
563         s_enchash.put("L4", "ISO8859_4");
564         s_enchash.put("ISO-8859-5", "ISO8859_5");
565         s_enchash.put("ISO-IR-144", "ISO8859_5");
566         s_enchash.put("ISO_8859-5", "ISO8859_5");
567         s_enchash.put("CYRILLIC", "ISO8859_5");
568         s_enchash.put("ISO-8859-6", "ISO8859_6");
569         s_enchash.put("ISO-IR-127", "ISO8859_6");
570         s_enchash.put("ISO_8859-6", "ISO8859_6");
571         s_enchash.put("ECMA-114", "ISO8859_6");
572         s_enchash.put("ASMO-708", "ISO8859_6");
573         s_enchash.put("ARABIC", "ISO8859_6");
574         s_enchash.put("ISO-8859-7", "ISO8859_7");
575         s_enchash.put("ISO-IR-126", "ISO8859_7");
576         s_enchash.put("ISO_8859-7", "ISO8859_7");
577         s_enchash.put("ELOT_928", "ISO8859_7");
578         s_enchash.put("ECMA-118", "ISO8859_7");
579         s_enchash.put("GREEK", "ISO8859_7");
580         s_enchash.put("GREEK8", "ISO8859_7");
581         s_enchash.put("ISO-8859-8", "ISO8859_8");
582         s_enchash.put("ISO-IR-138", "ISO8859_8");
583         s_enchash.put("ISO_8859-8", "ISO8859_8");
584         s_enchash.put("HEBREW", "ISO8859_8");
585         s_enchash.put("ISO-8859-9", "ISO8859_9");
586         s_enchash.put("ISO-IR-148", "ISO8859_9");
587         s_enchash.put("ISO_8859-9", "ISO8859_9");
588         s_enchash.put("LATIN5", "ISO8859_9");
589         s_enchash.put("L5", "ISO8859_9");
590         s_enchash.put("ISO-2022-JP", "ISO2022JP");
591         s_enchash.put("SHIFT_JIS", "SJIS");
592         s_enchash.put("MS_Kanji", "SJIS");
593         /**
594          * MS932 is suitable for Windows-31J,
595          * but JDK 1.1.x does not support MS932.
596          */

597         String JavaDoc version = System.getProperty("java.version");
598         if (version.equals("1.1") || version.startsWith("1.1.")) {
599             s_enchash.put("WINDOWS-31J", "SJIS");
600         } else {
601             s_enchash.put("WINDOWS-31J", "MS932");
602         }
603         s_enchash.put("EUC-JP", "EUC_JP");
604         s_enchash.put("GB2312", "GB2312");
605         s_enchash.put("BIG5", "Big5");
606         s_enchash.put("EUC-KR", "EUC_KR");
607         s_enchash.put("ISO-2022-KR", "ISO2022KR");
608         s_enchash.put("KOI8-R", "KOI8_R");
609         s_enchash.put("ISO8859_1", "8859_1");
610
611         s_enchash.put("EBCDIC-CP-US", "CP037");
612         s_enchash.put("EBCDIC-CP-CA", "CP037");
613         s_enchash.put("EBCDIC-CP-NL", "CP037");
614         s_enchash.put("EBCDIC-CP-WT", "CP037");
615         s_enchash.put("EBCDIC-CP-DK", "CP277");
616         s_enchash.put("EBCDIC-CP-NO", "CP277");
617         s_enchash.put("EBCDIC-CP-FI", "CP278");
618         s_enchash.put("EBCDIC-CP-SE", "CP278");
619         s_enchash.put("EBCDIC-CP-IT", "CP280");
620         s_enchash.put("EBCDIC-CP-ES", "CP284");
621         s_enchash.put("EBCDIC-CP-GB", "CP285");
622         s_enchash.put("EBCDIC-CP-FR", "CP297");
623         s_enchash.put("EBCDIC-CP-AR1", "CP420");
624         s_enchash.put("EBCDIC-CP-HE", "CP424");
625         s_enchash.put("EBCDIC-CP-CH", "CP500");
626         s_enchash.put("EBCDIC-CP-BE", "CP500");
627         s_enchash.put("CP-AR", "CP868");
628         s_enchash.put("CP-GR", "CP869");
629         s_enchash.put("EBCDIC-CP-ROECE", "CP870");
630         s_enchash.put("EBCDIC-CP-YU", "CP870");
631         s_enchash.put("EBCDIC-CP-IS", "CP871");
632         s_enchash.put("EBCDIC-CP-AR2", "CP918");
633
634         // Add support for Cp1252 and its friends
635
s_enchash.put("WINDOWS-1250", "Cp1250");
636         s_enchash.put("WINDOWS-1251", "Cp1251");
637         s_enchash.put("WINDOWS-1252", "Cp1252");
638         s_enchash.put("WINDOWS-1253", "Cp1253");
639         s_enchash.put("WINDOWS-1254", "Cp1254");
640         s_enchash.put("WINDOWS-1255", "Cp1255");
641         s_enchash.put("WINDOWS-1256", "Cp1256");
642         s_enchash.put("WINDOWS-1257", "Cp1257");
643         s_enchash.put("WINDOWS-1258", "Cp1258");
644         s_enchash.put("TIS-620", "TIS620");
645                                                 // j:CNS11643 -> EUC-TW?
646
s_enchash.put("ISO-2022-CN", "ISO2022CN");
647         s_enchash.put("X0201", "JIS0201");
648         s_enchash.put("X0208", "JIS0208");
649         s_enchash.put("X0212", "JIS0212");
650         s_enchash.put("ISO-IR-159", "JIS0212");
651                                                 
652         s_revhash = new Hashtable JavaDoc();
653         // <Java encoding name (uppercase)>, <preferred MIME name>
654
s_revhash.put("UTF8", "UTF-8");
655         s_revhash.put("ASCII", "US-ASCII");
656         s_revhash.put("ASCII", "ISO-IR-6");
657         s_revhash.put("ASCII", "ANSI_X3.4-1986");
658         s_revhash.put("ASCII", "ISO_646.IRV:1991");
659         s_revhash.put("ASCII", "ASCII");
660         s_revhash.put("ASCII", "ISO646-US");
661         s_revhash.put("ASCII", "US");
662         s_revhash.put("ASCII", "IBM367");
663         s_revhash.put("ASCII", "CP367");
664         s_revhash.put("ISO8859_1", "ISO-8859-1");
665         s_revhash.put("ISO8859_1", "ISO-IR-100");
666         s_revhash.put("ISO8859_1", "ISO_8859-1");
667         s_revhash.put("ISO8859_1", "LATIN1");
668         s_revhash.put("ISO8859_1", "L1");
669         s_revhash.put("ISO8859_1", "IBM819");
670         s_revhash.put("ISO8859_1", "CP819");
671         s_revhash.put("ISO8859_2", "ISO-8859-2");
672         s_revhash.put("ISO8859_2", "ISO-IR-101");
673         s_revhash.put("ISO8859_2", "ISO_8859-2");
674         s_revhash.put("ISO8859_2", "LATIN2");
675         s_revhash.put("ISO8859_2", "L2");
676         s_revhash.put("ISO8859_3", "ISO-8859-3");
677         s_revhash.put("ISO8859_3", "ISO-IR-109");
678         s_revhash.put("ISO8859_3", "ISO_8859-3");
679         s_revhash.put("ISO8859_3", "LATIN3");
680         s_revhash.put("ISO8859_3", "L3");
681         s_revhash.put("ISO8859_4", "ISO-8859-4");
682         s_revhash.put("ISO8859_4", "ISO-IR-110");
683         s_revhash.put("ISO8859_4", "ISO_8859-4");
684         s_revhash.put("ISO8859_4", "LATIN4");
685         s_revhash.put("ISO8859_4", "L4");
686         s_revhash.put("ISO8859_5", "ISO-8859-5");
687         s_revhash.put("ISO8859_5", "ISO-IR-144");
688         s_revhash.put("ISO8859_5", "ISO_8859-5");
689         s_revhash.put("ISO8859_5", "CYRILLIC");
690         s_revhash.put("ISO8859_6", "ISO-8859-6");
691         s_revhash.put("ISO8859_6", "ISO-IR-127");
692         s_revhash.put("ISO8859_6", "ISO_8859-6");
693         s_revhash.put("ISO8859_6", "ECMA-114");
694         s_revhash.put("ISO8859_6", "ASMO-708");
695         s_revhash.put("ISO8859_6", "ARABIC");
696         s_revhash.put("ISO8859_7", "ISO-8859-7");
697         s_revhash.put("ISO8859_7", "ISO-IR-126");
698         s_revhash.put("ISO8859_7", "ISO_8859-7");
699         s_revhash.put("ISO8859_7", "ELOT_928");
700         s_revhash.put("ISO8859_7", "ECMA-118");
701         s_revhash.put("ISO8859_7", "GREEK");
702         s_revhash.put("ISO8859_7", "GREEK8");
703         s_revhash.put("ISO8859_8", "ISO-8859-8");
704         s_revhash.put("ISO8859_8", "ISO-IR-138");
705         s_revhash.put("ISO8859_8", "ISO_8859-8");
706         s_revhash.put("ISO8859_8", "HEBREW");
707         s_revhash.put("ISO8859_9", "ISO-8859-9");
708         s_revhash.put("ISO8859_9", "ISO-IR-148");
709         s_revhash.put("ISO8859_9", "ISO_8859-9");
710         s_revhash.put("ISO8859_9", "LATIN5");
711         s_revhash.put("ISO8859_9", "L5");
712         s_revhash.put("ISO2022JP", "ISO-2022-JP");
713         s_revhash.put("SJIS", "Shift_JIS");
714         s_revhash.put("SJIS", "MS_Kanji");
715         s_revhash.put("MS932", "WINDOWS-31J");
716         s_revhash.put("EUC_JP", "EUC-JP");
717         s_revhash.put("GB2312", "GB2312");
718         s_revhash.put("BIG5", "Big5");
719         s_revhash.put("EUC_KR", "EUC-KR");
720         s_revhash.put("ISO2022KR", "ISO-2022-KR");
721         s_revhash.put("KOI8_R", "KOI8-R");
722
723         s_revhash.put("CP037", "EBCDIC-CP-US");
724         s_revhash.put("CP037", "EBCDIC-CP-CA");
725         s_revhash.put("CP037", "EBCDIC-CP-NL");
726         s_revhash.put("CP037", "EBCDIC-CP-WT");
727         s_revhash.put("CP277", "EBCDIC-CP-DK");
728         s_revhash.put("CP277", "EBCDIC-CP-NO");
729         s_revhash.put("CP278", "EBCDIC-CP-FI");
730         s_revhash.put("CP278", "EBCDIC-CP-SE");
731         s_revhash.put("CP280", "EBCDIC-CP-IT");
732         s_revhash.put("CP284", "EBCDIC-CP-ES");
733         s_revhash.put("CP285", "EBCDIC-CP-GB");
734         s_revhash.put("CP297", "EBCDIC-CP-FR");
735         s_revhash.put("CP420", "EBCDIC-CP-AR1");
736         s_revhash.put("CP424", "EBCDIC-CP-HE");
737         s_revhash.put("CP500", "EBCDIC-CP-CH");
738         s_revhash.put("CP500", "EBCDIC-CP-BE");
739         s_revhash.put("CP868", "CP-AR");
740         s_revhash.put("CP869", "CP-GR");
741         s_revhash.put("CP870", "EBCDIC-CP-ROECE");
742         s_revhash.put("CP870", "EBCDIC-CP-YU");
743         s_revhash.put("CP871", "EBCDIC-CP-IS");
744         s_revhash.put("CP918", "EBCDIC-CP-AR2");
745
746     // Add support for Cp1252 and friends
747
// Since this code page should be written out in mixed case,
748
// there is no need to reverse the function.
749
s_revhash.put("CP1250", "WINDOWS-1250");
750         s_revhash.put("CP1251", "WINDOWS-1251");
751         s_revhash.put("CP1252", "WINDOWS-1252");
752         s_revhash.put("CP1253", "WINDOWS-1253");
753         s_revhash.put("CP1254", "WINDOWS-1254");
754         s_revhash.put("CP1255", "WINDOWS-1255");
755         s_revhash.put("CP1256", "WINDOWS-1256");
756         s_revhash.put("CP1257", "WINDOWS-1257");
757         s_revhash.put("CP1258", "WINDOWS-1258");
758         s_revhash.put("TIS620", "TIS-620");
759         s_revhash.put("ISO2022CN", "ISO-2022-CN");
760         s_revhash.put("JIS0201", "X0201");
761         s_revhash.put("JIS0208", "X0208");
762         s_revhash.put("JIS0212", "X0212");
763         s_revhash.put("JIS0212", "ISO-IR-159");
764     }
765
766     private MIME2Java() {
767     }
768
769     /**
770      * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name.
771      * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1,
772      * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,
773      * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS, Windows-31J
774      * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,
775      * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,
776      * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,
777      * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,
778      * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,
779      * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>.
780      * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var>
781      * is unknown.
782      * @see #reverse
783      */

784     public static String JavaDoc convert(String JavaDoc mimeCharsetName) {
785         return (String JavaDoc)s_enchash.get(mimeCharsetName.toUpperCase());
786     }
787
788     /**
789      * Convert a Java encoding name to MIME charset name.
790      * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",
791      * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "MS932", "EUCJIS",
792      * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278",
793      * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".
794      * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3,
795      * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, MS932, EUCJIS,
796      * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,
797      * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871
798      * and CP918</code>.
799      * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown.
800      * @see #convert
801      */

802     public static String JavaDoc reverse(String JavaDoc encoding) {
803         return (String JavaDoc)s_revhash.get(encoding.toUpperCase());
804     }
805 }
806
Popular Tags