KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > charcode > BuggyCharacterSet


1 package net.sf.saxon.charcode;
2
3 import net.sf.saxon.om.XMLChar;
4
5 import java.nio.CharBuffer JavaDoc;
6 import java.nio.charset.CharacterCodingException JavaDoc;
7 import java.nio.charset.Charset JavaDoc;
8 import java.nio.charset.CharsetEncoder JavaDoc;
9 import java.util.HashMap JavaDoc;
10
11 /**
12 * This class establishes properties of a character set that is
13  * known to the Java VM but not specifically known to Saxon. It avoids
14  * using the encoder.canEncode() method because there is a known bug
15  * (in JDK 1.4.2) that for some encodings, this returns true for
16  * every character. So this version of the class actually attempts
17  * to encode the characters, and catches the exception when it fails.
18 */

19
20 public class BuggyCharacterSet implements CharacterSet {
21
22     private static HashMap JavaDoc map;
23
24     private CharsetEncoder JavaDoc encoder;
25
26     // This class is written on the assumption that the CharsetEncoder.canEncode()
27
// method may be expensive. For BMP characters, it therefore remembers the results
28
// so each character is only looked up the first time it is encountered.
29

30     private byte[] charinfo = new byte[65536];
31         // rely on initialization to zeroes
32

33     //private final static byte UNKNOWN = 0;
34
private static final byte GOOD = 1;
35     private static final byte BAD = 2;
36
37     private BuggyCharacterSet(Charset JavaDoc charset) {
38         encoder = charset.newEncoder();
39     }
40     
41     public static synchronized BuggyCharacterSet makeCharSet(Charset JavaDoc charset) {
42         if (map == null) {
43             map = new HashMap JavaDoc(10);
44         }
45         BuggyCharacterSet c = (BuggyCharacterSet)map.get(charset);
46         if (c == null) {
47             c = new BuggyCharacterSet(charset);
48             map.put(charset, c);
49         }
50         return c;
51     }
52
53     public final boolean inCharset(int c) {
54         // Assume ASCII chars are always OK
55
if (c <= 127) {
56             return true;
57         }
58         try {
59             if (c <= 65535) {
60                 if (charinfo[c] == GOOD) {
61                     return true;
62                 } else if (charinfo[c] == BAD) {
63                     return false;
64                 } else {
65                     charinfo[c] = BAD; // guilty until proved innocent
66
char[] cc = {(char)c};
67                     encoder.encode(CharBuffer.wrap(cc));
68                     charinfo[c] = GOOD;
69                     return true;
70                 }
71             } else {
72                 char[] ss = { XMLChar.highSurrogate(c),
73                               XMLChar.lowSurrogate(c) };
74                 encoder.encode(CharBuffer.wrap(ss));
75                 return true;
76             }
77         } catch (CharacterCodingException JavaDoc ex) {
78             return false;
79         }
80     }
81
82 }
83
84 //
85
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
86
// you may not use this file except in compliance with the License. You may obtain a copy of the
87
// License at http://www.mozilla.org/MPL/
88
//
89
// Software distributed under the License is distributed on an "AS IS" basis,
90
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
91
// See the License for the specific language governing rights and limitations under the License.
92
//
93
// The Original Code is: all this file.
94
//
95
// The Initial Developer of the Original Code is
96
// Aleksei Makarov [makarov@iitam.omsk.net.ru]
97
//
98
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
99
//
100
// Contributor(s): none.
101
//
102
Popular Tags