KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > charcode > CharacterSetFactory


1 package net.sf.saxon.charcode;
2 import net.sf.saxon.Controller;
3 import net.sf.saxon.trans.DynamicError;
4 import net.sf.saxon.trans.XPathException;
5
6 import javax.xml.transform.OutputKeys JavaDoc;
7 import java.nio.charset.Charset JavaDoc;
8 import java.nio.charset.IllegalCharsetNameException JavaDoc;
9 import java.nio.charset.UnsupportedCharsetException JavaDoc;
10 import java.util.Iterator JavaDoc;
11 import java.util.Properties JavaDoc;
12
13 /**
14 * This class creates a CharacterSet object for a given named encoding.
15 */

16
17
18 public class CharacterSetFactory {
19
20     /**
21      * Class is never instantiated
22      */

23     private CharacterSetFactory() {
24     }
25
26     /**
27     * Make a CharacterSet appropriate to the encoding
28     */

29
30     public static CharacterSet getCharacterSet(Properties JavaDoc details, Controller controller)
31     throws XPathException {
32
33         String JavaDoc encoding = details.getProperty(OutputKeys.ENCODING);
34         if (encoding==null) encoding = "UTF8";
35         if (encoding.equalsIgnoreCase("UTF-8")) encoding = "UTF8"; // needed for Microsoft Java VM
36

37         CharacterSet charSet = makeCharacterSet(encoding, controller);
38         if (charSet==null) {
39             charSet = ASCIICharacterSet.getInstance();
40         }
41         return charSet;
42     }
43
44     private static CharacterSet makeCharacterSet(String JavaDoc encoding, Controller controller)
45     throws XPathException {
46         if (encoding.equalsIgnoreCase("UTF8")) {
47             return UnicodeCharacterSet.getInstance();
48         } else if (encoding.equalsIgnoreCase("ASCII")) {
49             return ASCIICharacterSet.getInstance();
50         } else if (encoding.equalsIgnoreCase("US-ASCII")) {
51             return ASCIICharacterSet.getInstance();
52         } else if (encoding.equalsIgnoreCase("iso-646")) {
53             return ASCIICharacterSet.getInstance();
54         } else if (encoding.equalsIgnoreCase("iso646")) {
55             return ASCIICharacterSet.getInstance();
56         } else if (encoding.equalsIgnoreCase("iso-8859-1")) {
57             return Latin1CharacterSet.getInstance();
58         } else if (encoding.equalsIgnoreCase("ISO8859_1")) {
59             return Latin1CharacterSet.getInstance();
60         } else if (encoding.equalsIgnoreCase("iso-8859-2")) {
61             return Latin2CharacterSet.getInstance();
62         } else if (encoding.equalsIgnoreCase("ISO8859_2")) {
63             return Latin2CharacterSet.getInstance();
64         } else if (encoding.equalsIgnoreCase("UTF-8")) {
65             return UnicodeCharacterSet.getInstance();
66         } else if (encoding.equalsIgnoreCase("UTF-16")) {
67             return UnicodeCharacterSet.getInstance();
68         } else if (encoding.equalsIgnoreCase("UTF16")) {
69             return UnicodeCharacterSet.getInstance();
70         } else if (encoding.equalsIgnoreCase("KOI8-R")) {
71             return KOI8RCharacterSet.getInstance();
72         } else if (encoding.equalsIgnoreCase("cp1251")) {
73             return CP1251CharacterSet.getInstance();
74         } else if (encoding.equalsIgnoreCase("windows-1251")) {
75             return CP1251CharacterSet.getInstance();
76         } else if (encoding.equalsIgnoreCase("cp1250")) {
77             return CP1250CharacterSet.getInstance();
78         } else if (encoding.equalsIgnoreCase("windows-1250")) {
79             return CP1250CharacterSet.getInstance();
80         } else if (encoding.equalsIgnoreCase("cp1252")) {
81             return CP1252CharacterSet.getInstance();
82         } else if (encoding.equalsIgnoreCase("windows-1252")) {
83             return CP1252CharacterSet.getInstance();
84         } else if (encoding.equalsIgnoreCase("cp852")) {
85             return CP852CharacterSet.getInstance();
86         } else if (encoding.equalsIgnoreCase("windows-852")) {
87             return CP852CharacterSet.getInstance();
88
89         } else {
90             // Allow an alias for the character set to be specified as a system property
91
String JavaDoc csname = System.getProperty(OutputKeys.ENCODING + '.' + encoding);
92             if (csname == null) {
93                 Charset JavaDoc charset;
94                 try {
95                     charset = Charset.forName(encoding);
96                     CharacterSet res = UnknownCharacterSet.makeCharSet(charset);
97
98                     // Some JDK1.4 charsets are known to be buggy, for example SJIS.
99
// We'll see whether the charset claims to be able to encode some
100
// tricky characters; if it says it can, the chances are it's lying.
101

102                     if (res.inCharset(0x1ff) &&
103                             res.inCharset(0x300) &&
104                             res.inCharset(0xa90) &&
105                             res.inCharset(0x2200) &&
106                             res.inCharset(0x3400)) {
107                         res = BuggyCharacterSet.makeCharSet(charset);
108                     }
109                     return res;
110                 } catch (IllegalCharsetNameException JavaDoc err) {
111                     throw new DynamicError("Invalid encoding name: " + encoding);
112                 } catch (UnsupportedCharsetException JavaDoc err) {
113                     //System.err.println("Unknown encoding " + encoding + ": reverting to ASCII");
114
return ASCIICharacterSet.getInstance();
115                 }
116             } else {
117                 try {
118                     Object JavaDoc obj = controller.getConfiguration().getInstance(csname, controller.getClassLoader());
119                     if (obj instanceof PluggableCharacterSet) {
120                         return (PluggableCharacterSet)obj;
121                     }
122                 } catch (Exception JavaDoc err) {
123                     throw new DynamicError("Failed to load " + csname);
124                 }
125             }
126         }
127         return null;
128     }
129
130     /**
131      * Main program is a utility to give a list of the character sets supported
132      * by the Java VM
133      */

134
135     public static void main(String JavaDoc[] args) throws Exception JavaDoc {
136         System.err.println("Available Character Sets in the java.nio package for this Java VM:");
137         Iterator JavaDoc iter = Charset.availableCharsets().keySet().iterator();
138         while (iter.hasNext()) {
139             String JavaDoc s = (String JavaDoc) iter.next();
140             System.err.println(s);
141         }
142     }
143 }
144
145 //
146
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
147
// you may not use this file except in compliance with the License. You may obtain a copy of the
148
// License at http://www.mozilla.org/MPL/
149
//
150
// Software distributed under the License is distributed on an "AS IS" basis,
151
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
152
// See the License for the specific language governing rights and limitations under the License.
153
//
154
// The Original Code is: all this file.
155
//
156
// The Initial Developer of the Original Code is Michael H. Kay.
157
//
158
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
159
//
160
// Contributor(s): none.
161
//
162
Popular Tags