KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > impl > UTF32


1 /*
2  *******************************************************************************
3  * Copyright (C) 2005, International Business Machines Corporation and *
4  * others. All Rights Reserved. *
5  *******************************************************************************
6  *
7  */

8
9 package com.ibm.icu.impl;
10
11 import com.ibm.icu.text.UTF16;
12
13 /**
14  * This class converts between an array of bytes in UTF-32 encoding (BE or LE) and
15  * Java Strings.
16  *
17  * @internal
18  */

19 public abstract class UTF32
20 {
21     /**
22      * This method packs a 32-bit Unicode code point into the byte array. It is
23      * implemented by subclasses that implement the BE and LE encodings.
24      *
25      * @param bytes the destination byte array
26      * @param codePoint the 32-bit Unicode code point
27      * @param out the destination index in <code>bytes</code>.
28      *
29      * @internal
30      */

31     abstract protected void pack(byte[] bytes, int codePoint, int out);
32     
33     /**
34      * This method unpacks bytes from the encoded byte array into a 32-bit
35      * Unicode code point. It is implmeented by subclasses that implmeent the BE and LE encodings.
36      *
37      * @param bytes the source byte array.
38      * @param index the index of the first source byte.
39      * @return the 32-bit Unicode code point.
40      *
41      * @internal
42      */

43     abstract protected int unpack(byte[] bytes, int index);
44     
45     
46     /**
47      * Convert a Java String into an array of UTF-32 encoded bytes. Calls
48      * the <code>pack</code> method to do the encoding.
49      *
50      * @param utf16 the source Java String.
51      * @return an array of UTF-32 encoded bytes.
52      *
53      * @internal
54      */

55     public byte[] toBytes(String JavaDoc utf16)
56     {
57         int codePoints = UTF16.countCodePoint(utf16);
58         byte[] bytes = new byte[codePoints * 4];
59         int out = 0;
60
61         for (int cp = 0; cp < codePoints; out += 4) {
62             int codePoint = UTF16.charAt(utf16, cp);
63             
64             pack(bytes, codePoint, out);
65             cp += UTF16.getCharCount(codePoint);
66         }
67         
68         return bytes;
69     }
70     
71     /**
72      * This method converts a sequence of UTF-32 encoded bytes into
73      * a Java String. It calls the <code>unpack</code> method to implement
74      * the encoding.
75      *
76      * @param bytes the source byte array.
77      * @param offset the starting offset in the byte array.
78      * @param count the number of bytes to process.
79      * @return the Java String.
80      *
81      * @internal
82      */

83     public String JavaDoc fromBytes(byte[] bytes, int offset, int count)
84     {
85         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
86         int limit = offset + count;
87         
88         for (int cp = offset; cp < limit; cp += 4) {
89             int codePoint = unpack(bytes, cp);
90             
91             UTF16.append(buffer, codePoint);
92         }
93         
94         return buffer.toString();
95     }
96     
97     /**
98      * A convenience method that converts an entire byte array
99      * into a Java String.
100      *
101      * @param bytes the source byte array.
102      * @return the Java String.
103      *
104      * @internal
105      */

106     public String JavaDoc fromBytes(byte[] bytes)
107     {
108         return fromBytes(bytes, 0, bytes.length);
109     }
110     
111     /**
112      * Get an instance that implements UTF-32BE encoding.
113      *
114      * @return the instance.
115      *
116      * @internal
117      */

118     static public UTF32 getBEInstance()
119     {
120         if (beInstance == null) {
121             beInstance = new BE();
122         }
123         
124         return beInstance;
125     }
126     
127     /**
128      * Get an instance that implemnts the UTF-32LE encoding.
129      *
130      * @return the instance.
131      *
132      * @internal
133      */

134     static public UTF32 getLEInstance()
135     {
136         if (leInstance == null) {
137             leInstance = new LE();
138         }
139         
140         return leInstance;
141     }
142     
143     /**
144      * Get an instance that implements either UTF-32BE or UTF32-LE,
145      * depending on the encoding name suppled.
146      *
147      * @param encoding the encoding name - must be <code>"UTF-32BE"</code> or <code>"UTF-32LE"</code>.
148      * @return the instance.
149      *
150      * @internal
151      */

152     static public UTF32 getInstance(String JavaDoc encoding)
153     {
154         if (encoding.equals("UTF-32BE")) {
155             return getBEInstance();
156         }
157         
158         if (encoding.equals("UTF-32LE")) {
159             return getLEInstance();
160         }
161         
162         return null;
163     }
164     
165     /**
166      * This sublcass implements the UTF-32BE encoding via the
167      * <code>pack</code> and <code>unpack</code> methods.
168      *
169      * @internal
170      */

171     static class BE extends UTF32
172     {
173         /**
174          * This method packs a 32-bit Unicode code point into the byte array using
175          * the UTF-32BE encoding.
176          *
177          * @param bytes the destination byte array
178          * @param codePoint the 32-bit Unicode code point
179          * @param out the destination index in <code>bytes</code>.
180          *
181          * @internal
182          */

183         public void pack(byte[] bytes, int codePoint, int out)
184         {
185             bytes[out + 0] = (byte) ((codePoint >> 24) & 0xFF);
186             bytes[out + 1] = (byte) ((codePoint >> 16) & 0xFF);
187             bytes[out + 2] = (byte) ((codePoint >> 8) & 0xFF);
188             bytes[out + 3] = (byte) ((codePoint >> 0) & 0xFF);
189         }
190         
191         /**
192          * This method unpacks bytes from the UTF-32BE encoded byte array into a 32-bit
193          * Unicode code point.
194          *
195          * @param bytes the source byte array.
196          * @param index the index of the first source byte.
197          * @return the 32-bit Unicode code point.
198          *
199          * @internal
200          */

201         public int unpack(byte[] bytes, int index)
202         {
203             return (bytes[index + 0] & 0xFF) << 24 | (bytes[index + 1] & 0xFF) << 16 |
204                    (bytes[index + 2] & 0xFF) << 8 | (bytes[index + 3] & 0xFF);
205         }
206     }
207     
208     /**
209      * This sublcass implements the UTF-32LE encoding via the
210      * <code>pack</code> and <code>unpack</code> methods.
211      *
212      * @internal
213      */

214     static class LE extends UTF32
215     {
216         /**
217          * This method packs a 32-bit Unicode code point into the byte array using
218          * the UTF-32LE encoding.
219          *
220          * @param bytes the destination byte array
221          * @param codePoint the 32-bit Unicode code point
222          * @param out the destination index in <code>bytes</code>.
223          *
224          * @internal
225          */

226         public void pack(byte[] bytes, int codePoint, int out)
227         {
228             bytes[out + 3] = (byte) ((codePoint >> 24) & 0xFF);
229             bytes[out + 2] = (byte) ((codePoint >> 16) & 0xFF);
230             bytes[out + 1] = (byte) ((codePoint >> 8) & 0xFF);
231             bytes[out + 0] = (byte) ((codePoint >> 0) & 0xFF);
232         }
233         
234         /**
235          * This method unpacks bytes from the UTF-32LE encoded byte array into a 32-bit
236          * Unicode code point.
237          *
238          * @param bytes the source byte array.
239          * @param index the index of the first source byte.
240          * @return the 32-bit Unicode code point.
241          *
242          * @internal
243          */

244         public int unpack(byte[] bytes, int index)
245         {
246             return (bytes[index + 3] & 0xFF) << 24 | (bytes[index + 2] & 0xFF) << 16 |
247                    (bytes[index + 1] & 0xFF) << 8 | (bytes[index + 0] & 0xFF);
248         }
249     }
250     
251     private static UTF32 beInstance = null;
252     private static UTF32 leInstance = null;
253 }
254
Popular Tags