KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > charcode > UnicodeCharacterSet


1 package net.sf.saxon.charcode;
2
3 /**
4 * This class defines properties of the Unicode character set
5 */

6
7 public final class UnicodeCharacterSet implements CharacterSet {
8
9     private static UnicodeCharacterSet theInstance = new UnicodeCharacterSet();
10
11     /**
12      * Private constructor to force the singular instance to be used
13      */

14
15     private UnicodeCharacterSet() {}
16
17     public static UnicodeCharacterSet getInstance() {
18         return theInstance;
19     }
20
21     public boolean inCharset(int c) {
22         return true;
23     }
24
25     /**
26     * Static method to generate the UTF-8 representation of a Unicode character
27     * @param in the Unicode character, or the high half of a surrogate pair
28     * @param in2 the low half of a surrogate pair (ignored unless the first argument is in the
29     * range for a surrogate pair)
30     * @param out an array of at least 4 bytes to hold the UTF-8 representation.
31     * @return the number of bytes in the UTF-8 representation
32     */

33
34     public static int getUTF8Encoding(char in, char in2, byte[] out) {
35         // See Tony Graham, "Unicode, a Primer", page 92
36
int i = (int)in;
37         if (i<=0x7f) {
38             out[0] = (byte)i;
39             return 1;
40         } else if (i<=0x7ff) {
41             out[0] = (byte)(0xc0 | ((in >> 6) & 0x1f));
42             out[1] = (byte)(0x80 | (in & 0x3f));
43             return 2;
44         } else if (i>=0xd800 && i<=0xdbff) {
45             // surrogate pair
46
int j = (int)in2;
47             if (!(j>=0xdc00 && j<=0xdfff)) {
48                 throw new IllegalArgumentException JavaDoc("Malformed Unicode Surrogate Pair (" + i + "," + j + ")");
49             }
50             byte xxxxxx = (byte)(j & 0x3f);
51             byte yyyyyy = (byte)(((i & 0x03) << 4) | ((j >> 6) & 0x0f));
52             byte zzzz = (byte)((i >> 2) & 0x0f);
53             byte uuuuu = (byte)(((i >> 6) & 0x0f) + 1);
54             out[0] = (byte)(0xf0 | ((uuuuu >> 2) & 0x07));
55             out[1] = (byte)(0x80 | ((uuuuu & 0x03) << 4) | zzzz);
56             out[2] = (byte)(0x80 | yyyyyy);
57             out[3] = (byte)(0x80 | xxxxxx);
58             return 4;
59         } else if (i>=0xdc00 && i<=0xdfff) {
60             // second half of surrogate pair - ignore it
61
return 0;
62         } else {
63             out[0] = (byte)(0xe0 | ((in >> 12) & 0x0f));
64             out[1] = (byte)(0x80 | ((in >> 6) & 0x3f));
65             out[2] = (byte)(0x80 | (in & 0x3f));
66             return 3;
67         }
68     }
69
70 }
71
72 //
73
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
74
// you may not use this file except in compliance with the License. You may obtain a copy of the
75
// License at http://www.mozilla.org/MPL/
76
//
77
// Software distributed under the License is distributed on an "AS IS" basis,
78
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
79
// See the License for the specific language governing rights and limitations under the License.
80
//
81
// The Original Code is: all this file.
82
//
83
// The Initial Developer of the Original Code is
84
// Aleksei Makarov [makarov@iitam.omsk.net.ru]
85
//
86
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
87
//
88
// Contributor(s): none.
89
//
90
Popular Tags