KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > java > lang > StringCoding


1 /*
2  * @(#)StringCoding.java 1.15 05/03/03
3  *
4  * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7
8 package java.lang;
9
10 import java.io.CharConversionException JavaDoc;
11 import java.io.UnsupportedEncodingException JavaDoc;
12 import java.lang.ref.SoftReference JavaDoc;
13 import java.nio.ByteBuffer JavaDoc;
14 import java.nio.CharBuffer JavaDoc;
15 import java.nio.BufferOverflowException JavaDoc;
16 import java.nio.BufferUnderflowException JavaDoc;
17 import java.nio.charset.Charset JavaDoc;
18 import java.nio.charset.CharsetDecoder JavaDoc;
19 import java.nio.charset.CharsetEncoder JavaDoc;
20 import java.nio.charset.CharacterCodingException JavaDoc;
21 import java.nio.charset.CoderResult JavaDoc;
22 import java.nio.charset.CodingErrorAction JavaDoc;
23 import java.nio.charset.IllegalCharsetNameException JavaDoc;
24 import java.nio.charset.MalformedInputException JavaDoc;
25 import java.nio.charset.UnsupportedCharsetException JavaDoc;
26 import sun.io.ByteToCharConverter;
27 import sun.io.CharToByteConverter;
28 import sun.io.Converters;
29 import sun.misc.MessageUtils;
30 import sun.nio.cs.HistoricallyNamedCharset;
31
32
33 /**
34  * Utility class for string encoding and decoding.
35  */

36
37 class StringCoding {
38
39     private StringCoding() { }
40
41     /* The cached coders for each thread
42      */

43     private static ThreadLocal JavaDoc decoder = new ThreadLocal JavaDoc();
44     private static ThreadLocal JavaDoc encoder = new ThreadLocal JavaDoc();
45
46     private static boolean warnUnsupportedCharset = true;
47
48     private static Object JavaDoc deref(ThreadLocal JavaDoc tl) {
49         SoftReference JavaDoc sr = (SoftReference JavaDoc)tl.get();
50     if (sr == null)
51         return null;
52     return sr.get();
53     }
54
55     private static void set(ThreadLocal JavaDoc tl, Object JavaDoc ob) {
56     tl.set(new SoftReference JavaDoc(ob));
57     }
58
59     // Trim the given byte array to the given length
60
//
61
private static byte[] trim(byte[] ba, int len) {
62     if (len == ba.length)
63         return ba;
64     byte[] tba = new byte[len];
65     System.arraycopy(ba, 0, tba, 0, len);
66     return tba;
67     }
68
69     // Trim the given char array to the given length
70
//
71
private static char[] trim(char[] ca, int len) {
72     if (len == ca.length)
73         return ca;
74     char[] tca = new char[len];
75     System.arraycopy(ca, 0, tca, 0, len);
76     return tca;
77     }
78
79     private static int scale(int len, float expansionFactor) {
80       // We need to perform double, not float, arithmetic; otherwise
81
// we lose low order bits when len is larger than 2**24.
82
return (int)(len * (double)expansionFactor);
83     }
84
85     private static Charset JavaDoc lookupCharset(String JavaDoc csn) {
86     if (Charset.isSupported(csn)) {
87         try {
88         return Charset.forName(csn);
89         } catch (UnsupportedCharsetException JavaDoc x) {
90         throw new Error JavaDoc(x);
91         }
92     }
93     return null;
94     }
95
96     private static void warnUnsupportedCharset(String JavaDoc csn) {
97     if (warnUnsupportedCharset) {
98         // Use sun.misc.MessageUtils rather than the Logging API or
99
// System.err since this method may be called during VM
100
// initialization before either is available.
101
MessageUtils.err("WARNING: Default charset " + csn +
102                  " not supported, using ISO-8859-1 instead");
103         warnUnsupportedCharset = false;
104     }
105     }
106
107
108     // -- Decoding --
109

110     // Encapsulates either a ByteToCharConverter or a CharsetDecoder
111
//
112
private static abstract class StringDecoder {
113     private final String JavaDoc requestedCharsetName;
114     protected StringDecoder(String JavaDoc requestedCharsetName) {
115         this.requestedCharsetName = requestedCharsetName;
116     }
117     final String JavaDoc requestedCharsetName() {
118         return requestedCharsetName;
119     }
120     abstract String JavaDoc charsetName();
121     abstract char[] decode(byte[] ba, int off, int len);
122     }
123
124     // A string decoder based upon a ByteToCharConverter
125
//
126
private static class ConverterSD
127     extends StringDecoder
128     {
129     private ByteToCharConverter btc;
130
131     private ConverterSD(ByteToCharConverter btc, String JavaDoc rcn) {
132         super(rcn);
133         this.btc = btc;
134     }
135
136     String JavaDoc charsetName() {
137         return btc.getCharacterEncoding();
138     }
139
140     char[] decode(byte[] ba, int off, int len) {
141         int en = scale(len, btc.getMaxCharsPerByte());
142         char[] ca = new char[en];
143         if (len == 0)
144         return ca;
145         btc.reset();
146         int n = 0;
147         try {
148         n = btc.convert(ba, off, off + len, ca, 0, en);
149         n += btc.flush(ca, btc.nextCharIndex(), en);
150         } catch (CharConversionException JavaDoc x) {
151         // Yes, this is what we've always done
152
n = btc.nextCharIndex();
153         }
154         return trim(ca, n);
155     }
156
157     }
158
159     // A string decoder based upon a CharsetDecoder
160
//
161
private static class CharsetSD
162     extends StringDecoder
163     {
164     private final Charset JavaDoc cs;
165     private final CharsetDecoder JavaDoc cd;
166
167     private CharsetSD(Charset JavaDoc cs, String JavaDoc rcn) {
168         super(rcn);
169         this.cs = cs;
170         this.cd = cs.newDecoder()
171         .onMalformedInput(CodingErrorAction.REPLACE)
172         .onUnmappableCharacter(CodingErrorAction.REPLACE);
173     }
174
175     String JavaDoc charsetName() {
176         if (cs instanceof HistoricallyNamedCharset)
177         return ((HistoricallyNamedCharset)cs).historicalName();
178         return cs.name();
179     }
180
181     char[] decode(byte[] ba, int off, int len) {
182         int en = scale(len, cd.maxCharsPerByte());
183         char[] ca = new char[en];
184         if (len == 0)
185         return ca;
186         cd.reset();
187         ByteBuffer JavaDoc bb = ByteBuffer.wrap(ba, off, len);
188         CharBuffer JavaDoc cb = CharBuffer.wrap(ca);
189         try {
190         CoderResult JavaDoc cr = cd.decode(bb, cb, true);
191         if (!cr.isUnderflow())
192             cr.throwException();
193         cr = cd.flush(cb);
194         if (!cr.isUnderflow())
195             cr.throwException();
196         } catch (CharacterCodingException JavaDoc x) {
197         // Substitution is always enabled,
198
// so this shouldn't happen
199
throw new Error JavaDoc(x);
200         }
201         return trim(ca, cb.position());
202     }
203
204     }
205
206     static char[] decode(String JavaDoc charsetName, byte[] ba, int off, int len)
207     throws UnsupportedEncodingException JavaDoc
208     {
209     StringDecoder sd = (StringDecoder)deref(decoder);
210     String JavaDoc csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
211     if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
212                   || csn.equals(sd.charsetName()))) {
213         sd = null;
214         try {
215         Charset JavaDoc cs = lookupCharset(csn);
216         if (cs != null)
217             sd = new CharsetSD(cs, csn);
218         else
219             sd = null;
220         } catch (IllegalCharsetNameException JavaDoc x) {
221         // FALL THROUGH to ByteToCharConverter, for compatibility
222
}
223         if (sd == null)
224         sd = new ConverterSD(ByteToCharConverter.getConverter(csn),
225                      csn);
226         set(decoder, sd);
227     }
228     return sd.decode(ba, off, len);
229     }
230
231     static char[] decode(byte[] ba, int off, int len) {
232     String JavaDoc csn = Converters.getDefaultEncodingName();
233     try {
234         return decode(csn, ba, off, len);
235     } catch (UnsupportedEncodingException JavaDoc x) {
236         Converters.resetDefaultEncodingName();
237         warnUnsupportedCharset(csn);
238     }
239     try {
240         return decode("ISO-8859-1", ba, off, len);
241     } catch (UnsupportedEncodingException JavaDoc x) {
242         // If this code is hit during VM initialization, MessageUtils is
243
// the only way we will be able to get any kind of error message.
244
MessageUtils.err("ISO-8859-1 charset not available: "
245                  + x.toString());
246         // If we can not find ISO-8859-1 (a required encoding) then things
247
// are seriously wrong with the installation.
248
System.exit(1);
249         return null;
250     }
251     }
252
253
254
255
256     // -- Encoding --
257

258     // Encapsulates either a CharToByteConverter or a CharsetEncoder
259
//
260
private static abstract class StringEncoder {
261     private final String JavaDoc requestedCharsetName;
262     protected StringEncoder(String JavaDoc requestedCharsetName) {
263         this.requestedCharsetName = requestedCharsetName;
264     }
265     final String JavaDoc requestedCharsetName() {
266         return requestedCharsetName;
267     }
268     abstract String JavaDoc charsetName();
269     abstract byte[] encode(char[] cs, int off, int len);
270     }
271
272     // A string encoder based upon a CharToByteConverter
273
//
274
private static class ConverterSE
275     extends StringEncoder
276     {
277     private CharToByteConverter ctb;
278
279     private ConverterSE(CharToByteConverter ctb, String JavaDoc rcn) {
280         super(rcn);
281         this.ctb = ctb;
282     }
283
284     String JavaDoc charsetName() {
285         return ctb.getCharacterEncoding();
286     }
287
288     byte[] encode(char[] ca, int off, int len) {
289         int en = scale(len, ctb.getMaxBytesPerChar());
290         byte[] ba = new byte[en];
291         if (len == 0)
292         return ba;
293
294         ctb.reset();
295         int n;
296         try {
297         n = ctb.convertAny(ca, off, (off + len),
298                    ba, 0, en);
299         n += ctb.flushAny(ba, ctb.nextByteIndex(), en);
300         } catch (CharConversionException JavaDoc x) {
301         throw new Error JavaDoc("Converter malfunction: " +
302                 ctb.getClass().getName(),
303                 x);
304         }
305         return trim(ba, n);
306     }
307
308     }
309
310     // A string encoder based upon a CharsetEncoder
311
//
312
private static class CharsetSE
313     extends StringEncoder
314     {
315     private Charset JavaDoc cs;
316     private CharsetEncoder JavaDoc ce;
317
318     private CharsetSE(Charset JavaDoc cs, String JavaDoc rcn) {
319         super(rcn);
320         this.cs = cs;
321         this.ce = cs.newEncoder()
322         .onMalformedInput(CodingErrorAction.REPLACE)
323         .onUnmappableCharacter(CodingErrorAction.REPLACE);
324     }
325
326     String JavaDoc charsetName() {
327         if (cs instanceof HistoricallyNamedCharset)
328         return ((HistoricallyNamedCharset)cs).historicalName();
329         return cs.name();
330     }
331
332     byte[] encode(char[] ca, int off, int len) {
333         int en = scale(len, ce.maxBytesPerChar());
334         byte[] ba = new byte[en];
335         if (len == 0)
336         return ba;
337
338         ce.reset();
339         ByteBuffer JavaDoc bb = ByteBuffer.wrap(ba);
340         CharBuffer JavaDoc cb = CharBuffer.wrap(ca, off, len);
341         try {
342         CoderResult JavaDoc cr = ce.encode(cb, bb, true);
343         if (!cr.isUnderflow())
344             cr.throwException();
345         cr = ce.flush(bb);
346         if (!cr.isUnderflow())
347             cr.throwException();
348         } catch (CharacterCodingException JavaDoc x) {
349         // Substitution is always enabled,
350
// so this shouldn't happen
351
throw new Error JavaDoc(x);
352         }
353         return trim(ba, bb.position());
354     }
355
356     }
357
358     static byte[] encode(String JavaDoc charsetName, char[] ca, int off, int len)
359     throws UnsupportedEncodingException JavaDoc
360     {
361     StringEncoder se = (StringEncoder)deref(encoder);
362     String JavaDoc csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
363     if ((se == null) || !(csn.equals(se.requestedCharsetName())
364                   || csn.equals(se.charsetName()))) {
365         se = null;
366         try {
367         Charset JavaDoc cs = lookupCharset(csn);
368         if (cs != null)
369             se = new CharsetSE(cs, csn);
370         } catch (IllegalCharsetNameException JavaDoc x) {
371         // FALL THROUGH to CharToByteConverter, for compatibility
372
}
373         if (se == null)
374         se = new ConverterSE(CharToByteConverter.getConverter(csn),
375                      csn);
376         set(encoder, se);
377     }
378     return se.encode(ca, off, len);
379     }
380
381     static byte[] encode(char[] ca, int off, int len) {
382     String JavaDoc csn = Converters.getDefaultEncodingName();
383     try {
384         return encode(csn, ca, off, len);
385     } catch (UnsupportedEncodingException JavaDoc x) {
386         Converters.resetDefaultEncodingName();
387         warnUnsupportedCharset(csn);
388     }
389     try {
390         return encode("ISO-8859-1", ca, off, len);
391     } catch (UnsupportedEncodingException JavaDoc x) {
392         // If this code is hit during VM initialization, MessageUtils is
393
// the only way we will be able to get any kind of error message.
394
MessageUtils.err("ISO-8859-1 charset not available: "
395                  + x.toString());
396         // If we can not find ISO-8859-1 (a required encoding) then things
397
// are seriously wrong with the installation.
398
System.exit(1);
399         return null;
400     }
401     }
402
403 }
404
Popular Tags