KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > gcc > util > UTF8


1 /*
2  * Copyright 2004 The Apache Software Foundation or its licensors, as
3  * applicable.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14  * implied.
15  *
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */

19 package gcc.util;
20
21 public abstract class UTF8
22 {
23     public static byte[] fromString(String value)
24     {
25         int n = value.length(), u = 0;
26         for (int i = 0; i < n; i++)
27         {
28             int c = value.charAt(i);
29             if (c >= 0x0001 && c <= 0x007F)
30             {
31                 u++;
32             }
33             else if (c > 0x07FF)
34             {
35                 u += 3;
36             }
37             else
38             {
39                 u += 2;
40             }
41         }
42         byte[] bytes = new byte[u];
43         for (int i = 0, j = 0; i < n; i++)
44         {
45             int c = value.charAt(i);
46             if (c >= 0x0001 && c <= 0x007F)
47             {
48                 bytes[j++] = (byte)c;
49             }
50             else if (c > 0x07FF)
51             {
52                 bytes[j++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
53                 bytes[j++] = (byte)(0x80 | ((c >> 6) & 0x3F));
54                 bytes[j++] = (byte)(0x80 | (c & 0x3F));
55             }
56             else
57             {
58                 bytes[j++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
59                 bytes[j++] = (byte)(0x80 | (c & 0x3F));
60             }
61         }
62         return bytes;
63     }
64
65     /**
66      ** If there is sufficient space in buffer from offset to convert value
67      ** without allocating a new byte array, do so now and return the number
68      ** of bytes written. Otherwise return -1. This method is intended for
69      ** use in optimized string marshalling.
70      **/

71     public static int fromString(String value, byte[] buffer, int offset, int length)
72     {
73         int n = value.length(), j = offset;
74         for (int i = 0; i < n; i++)
75         {
76             if (j + 3 > length)
77             {
78                 return -1;
79             }
80             int c = value.charAt(i);
81             if (c >= 0x0001 && c <= 0x007F)
82             {
83                 buffer[j++] = (byte)c;
84             }
85             else if (c > 0x07FF)
86             {
87                 buffer[j++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
88                 buffer[j++] = (byte)(0x80 | ((c >> 6) & 0x3F));
89                 buffer[j++] = (byte)(0x80 | (c & 0x3F));
90             }
91             else
92             {
93                 buffer[j++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
94                 buffer[j++] = (byte)(0x80 | (c & 0x3F));
95             }
96         }
97         return j - offset;
98     }
99
100     public static String toString(byte[] value)
101     {
102         return toString(value, 0, value.length);
103     }
104
105     public static String toString(byte[] value, int offset, int length)
106     {
107         int n = offset + length, j = 0;
108         char[] chars = new char[length]; // May be more than we need, but not less
109
for (int i = offset; i < n; i++)
110         {
111             int c = (value[i] + 256) & 255; // byte is signed, we need unsigned
112
int c2, c3;
113
114             switch (c >> 4)
115             {
116                 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
117                     // 0xxx xxxx
118
chars[j++] = (char)c;
119                     break;
120
121                 case 12: case 13:
122                     // 110x xxxx 10xx xxxx
123
if (i + 1 >= n)
124                     {
125                         badUtf8Data();
126                     }
127                     c2 = (value[++i] + 256) & 255; // byte is signed, we need unsigned
128
if ((c2 & 0xC0) != 0x80)
129                     {
130                         badUtf8Data();
131                     }
132                     chars[j++] = (char)(((c & 0x1F) << 6) | (c2 & 0x3F));
133                     break;
134
135                 case 14:
136                     // 1110 xxxx 10xx xxxx 10xx xxxx
137
if (i + 2 >= n)
138                     {
139                         badUtf8Data();
140                     }
141                     c2 = (value[++i] + 256) & 255; // byte is signed, we need unsigned
142
c3 = (value[++i] + 256) & 255; // byte is signed, we need unsigned
143
if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
144                     {
145                         badUtf8Data();
146                     }
147                     chars[j++] = (char)(((c & 0x0F) << 12)
148                         | ((c2 & 0x3F) << 6)
149                         | (c3 & 0x3F));
150                     break;
151
152                 default:
153                     badUtf8Data();
154             }
155         }
156         return new String(chars, 0, j);
157     }
158
159     private static void badUtf8Data()
160     {
161         throw new org.omg.CORBA.MARSHAL("bad UTF-8 data");
162     }
163 }
164
Popular Tags