KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sleepycat > util > UtfOps


1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 2000,2006 Oracle. All rights reserved.
5  *
6  * $Id: UtfOps.java,v 1.17 2006/10/30 21:14:35 bostic Exp $
7  */

8
9 package com.sleepycat.util;
10
11 /**
12  * UTF operations with more flexibility than is provided by DataInput and
13  * DataOutput.
14  *
15  * @author Mark Hayes
16  */

17 public class UtfOps {
18
19     private static byte[] EMPTY_BYTES = {};
20     private static String JavaDoc EMPTY_STRING = "";
21
22     /**
23      * Returns the byte length of a null terminated UTF string, not including
24      * the terminator.
25      *
26      * @param bytes the data containing the UTF string.
27      *
28      * @param offset the beginning of the string the measure.
29      *
30      * @throws IndexOutOfBoundsException if no zero terminator is found.
31      *
32      * @return the number of bytes.
33      */

34     public static int getZeroTerminatedByteLength(byte[] bytes, int offset)
35         throws IndexOutOfBoundsException JavaDoc {
36
37         int len = 0;
38         while (bytes[offset++] != 0) {
39             len++;
40         }
41         return len;
42     }
43
44     /**
45      * Returns the byte length of the UTF string that would be created by
46      * converting the given characters to UTF.
47      *
48      * @param chars the characters that would be converted.
49      *
50      * @return the byte length of the equivalent UTF data.
51      */

52     public static int getByteLength(char[] chars) {
53
54         return getByteLength(chars, 0, chars.length);
55     }
56
57     /**
58      * Returns the byte length of the UTF string that would be created by
59      * converting the given characters to UTF.
60      *
61      * @param chars the characters that would be converted.
62      *
63      * @param offset the first character to be converted.
64      *
65      * @param length the number of characters to be converted.
66      *
67      * @return the byte length of the equivalent UTF data.
68      */

69     public static int getByteLength(char[] chars, int offset, int length) {
70
71         int len = 0;
72         length += offset;
73         for (int i = offset; i < length; i++) {
74             int c = chars[i];
75             if ((c >= 0x0001) && (c <= 0x007F)) {
76                 len++;
77             } else if (c > 0x07FF) {
78                 len += 3;
79             } else {
80                 len += 2;
81             }
82         }
83         return len;
84     }
85
86     /**
87      * Returns the number of characters represented by the given UTF string.
88      *
89      * @param bytes the UTF string.
90      *
91      * @return the number of characters.
92      *
93      * @throws IndexOutOfBoundsException if a UTF character sequence at the end
94      * of the data is not complete.
95      *
96      * @throws IllegalArgumentException if an illegal UTF sequence is
97      * encountered.
98      */

99     public static int getCharLength(byte[] bytes)
100         throws IllegalArgumentException JavaDoc, IndexOutOfBoundsException JavaDoc {
101
102         return getCharLength(bytes, 0, bytes.length);
103     }
104
105     /**
106      * Returns the number of characters represented by the given UTF string.
107      *
108      * @param bytes the data containing the UTF string.
109      *
110      * @param offset the first byte to be converted.
111      *
112      * @param length the number of byte to be converted.
113      *
114      * @throws IndexOutOfBoundsException if a UTF character sequence at the end
115      * of the data is not complete.
116      *
117      * @throws IllegalArgumentException if an illegal UTF sequence is
118      * encountered.
119      */

120     public static int getCharLength(byte[] bytes, int offset, int length)
121         throws IllegalArgumentException JavaDoc, IndexOutOfBoundsException JavaDoc {
122
123         int charCount = 0;
124         length += offset;
125         while (offset < length) {
126             switch ((bytes[offset] & 0xff) >> 4) {
127             case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
128                 offset++;
129                 break;
130             case 12: case 13:
131                 offset += 2;
132                 break;
133             case 14:
134                 offset += 3;
135                 break;
136             default:
137                 throw new IllegalArgumentException JavaDoc();
138             }
139             charCount++;
140         }
141         return charCount;
142     }
143
144     /**
145      * Converts byte arrays into character arrays.
146      *
147      * @param bytes the source byte data to convert
148      *
149      * @param byteOffset the offset into the byte array at which
150      * to start the conversion
151      *
152      * @param chars the destination array
153      *
154      * @param charOffset the offset into chars at which to begin the copy
155      *
156      * @param len the amount of information to copy into chars
157      *
158      * @param isByteLen if true then len is a measure of bytes, otherwise
159      * len is a measure of characters
160      *
161      * @throws IndexOutOfBoundsException if a UTF character sequence at the end
162      * of the data is not complete.
163      *
164      * @throws IllegalArgumentException if an illegal UTF sequence is
165      * encountered.
166      */

167     public static int bytesToChars(byte[] bytes, int byteOffset,
168                                    char[] chars, int charOffset,
169                                    int len, boolean isByteLen)
170         throws IllegalArgumentException JavaDoc, IndexOutOfBoundsException JavaDoc {
171
172         int char1, char2, char3;
173         len += isByteLen ? byteOffset : charOffset;
174         while ((isByteLen ? byteOffset : charOffset) < len) {
175             char1 = bytes[byteOffset++] & 0xff;
176             switch ((char1 & 0xff) >> 4) {
177             case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
178                 chars[charOffset++] = (char) char1;
179                 break;
180             case 12: case 13:
181                 char2 = bytes[byteOffset++];
182                 if ((char2 & 0xC0) != 0x80) {
183                     throw new IllegalArgumentException JavaDoc();
184                 }
185                 chars[charOffset++] = (char)(((char1 & 0x1F) << 6) |
186                                              (char2 & 0x3F));
187                 break;
188             case 14:
189                 char2 = bytes[byteOffset++];
190                 char3 = bytes[byteOffset++];
191                 if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
192                     throw new IllegalArgumentException JavaDoc();
193                 chars[charOffset++] = (char)(((char1 & 0x0F) << 12) |
194                                              ((char2 & 0x3F) << 6) |
195                                              ((char3 & 0x3F) << 0));
196                 break;
197             default:
198                 throw new IllegalArgumentException JavaDoc();
199             }
200         }
201         return byteOffset;
202     }
203
204     /**
205      * Converts character arrays into byte arrays.
206      *
207      * @param chars the source character data to convert
208      *
209      * @param charOffset the offset into the character array at which
210      * to start the conversion
211      *
212      * @param bytes the destination array
213      *
214      * @param byteOffset the offset into bytes at which to begin the copy
215      *
216      * @param charLength the length of characters to copy into bytes
217      */

218     public static void charsToBytes(char[] chars, int charOffset,
219                                     byte[] bytes, int byteOffset,
220                                     int charLength) {
221         charLength += charOffset;
222         for (int i = charOffset; i < charLength; i++) {
223             int c = chars[i];
224             if ((c >= 0x0001) && (c <= 0x007F)) {
225                 bytes[byteOffset++] = (byte) c;
226             } else if (c > 0x07FF) {
227                 bytes[byteOffset++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
228                 bytes[byteOffset++] = (byte) (0x80 | ((c >> 6) & 0x3F));
229                 bytes[byteOffset++] = (byte) (0x80 | ((c >> 0) & 0x3F));
230             } else {
231                 bytes[byteOffset++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
232                 bytes[byteOffset++] = (byte) (0x80 | ((c >> 0) & 0x3F));
233             }
234         }
235     }
236
237     /**
238      * Converts byte arrays into strings.
239      *
240      * @param bytes the source byte data to convert
241      *
242      * @param offset the offset into the byte array at which
243      * to start the conversion
244      *
245      * @param length the number of bytes to be converted.
246      *
247      * @return the string.
248      *
249      * @throws IndexOutOfBoundsException if a UTF character sequence at the end
250      * of the data is not complete.
251      *
252      * @throws IllegalArgumentException if an illegal UTF sequence is
253      * encountered.
254      */

255     public static String JavaDoc bytesToString(byte[] bytes, int offset, int length)
256         throws IllegalArgumentException JavaDoc, IndexOutOfBoundsException JavaDoc {
257
258         if (length == 0) return EMPTY_STRING;
259         int charLen = UtfOps.getCharLength(bytes, offset, length);
260         char[] chars = new char[charLen];
261         UtfOps.bytesToChars(bytes, offset, chars, 0, length, true);
262         return new String JavaDoc(chars, 0, charLen);
263     }
264
265     /**
266      * Converts strings to byte arrays.
267      *
268      * @param string the string to convert.
269      *
270      * @return the UTF byte array.
271      */

272     public static byte[] stringToBytes(String JavaDoc string) {
273
274         if (string.length() == 0) return EMPTY_BYTES;
275         char[] chars = string.toCharArray();
276         byte[] bytes = new byte[UtfOps.getByteLength(chars)];
277         UtfOps.charsToBytes(chars, 0, bytes, 0, chars.length);
278         return bytes;
279     }
280 }
281
Popular Tags