KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > javolution > io > Utf8ByteBufferReader


1 /*
2  * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
3  * Copyright (C) 2005 - Javolution (http://javolution.org/)
4  * All rights reserved.
5  *
6  * Permission to use, copy, modify, and distribute this software is
7  * freely granted, provided that this notice is preserved.
8  */

9 package javolution.io;
10
11 import j2me.lang.IllegalStateException;
12 import j2me.io.CharConversionException;
13 import j2me.nio.BufferUnderflowException;
14 import j2me.nio.ByteBuffer;
15
16 import java.io.IOException;
17 import java.io.Reader;
18
19 import javolution.lang.Appendable;
20 import javolution.lang.Reusable;
21
22 /**
23  * <p> This class represents a UTF-8 <code>j2me.nio.ByteBuffer</code> reader.
24  * </p>
25  *
26  * <p> This reader can be used for efficient decoding of native byte
27  * buffers (e.g. <code>MappedByteBuffer</code>), high-performance
28  * messaging (no intermediate buffer), etc.</p>
29  *
30  * <p> This reader supports surrogate <code>char</code> pairs (representing
31  * characters in the range [U+10000 .. U+10FFFF]). It can also be used
32  * to read characters unicodes (31 bits) directly
33  * (ref. {@link #read()}).</p>
34  *
35  * <p> Each invocation of one of the <code>read()</code> methods may cause one
36  * or more bytes to be read from the underlying byte buffer.
37  * The end of stream is reached when the byte buffer position and limit
38  * coincide.</p>
39  *
40  * @author <a HREF="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
41  * @version 2.0, December 9, 2004
42  * @see Utf8ByteBufferWriter
43  */

44 public final class Utf8ByteBufferReader extends Reader implements Reusable {
45
46     /**
47      * Holds the byte buffer source.
48      */

49     private ByteBuffer _byteBuffer;
50
51     /**
52      * Default constructor.
53      */

54     public Utf8ByteBufferReader() {
55     }
56
57     /**
58      * Sets the <code>ByteBuffer</code> to use for reading available bytes
59      * from current buffer position.
60      *
61      * @param byteBuffer the <code>ByteBuffer</code> source.
62      * @return this UTF-8 reader.
63      * @throws IllegalStateException if this reader is being reused and
64      * it has not been {@link #close closed} or {@link #reset reset}.
65      */

66     public Utf8ByteBufferReader setByteBuffer(ByteBuffer byteBuffer) {
67         if (_byteBuffer != null)
68             throw new IllegalStateException("Reader not closed or reset");
69         _byteBuffer = byteBuffer;
70         return this;
71     }
72
73     /**
74      * Indicates if this stream is ready to be read.
75      *
76      * @return <code>true</code> if the byte buffer has remaining bytes to
77      * read; <code>false</code> otherwise.
78      * @throws IOException if an I/O error occurs.
79      */

80     public boolean ready() throws IOException {
81         if (_byteBuffer != null) {
82             return _byteBuffer.hasRemaining();
83         } else {
84             throw new IOException("Reader closed");
85         }
86     }
87
88     /**
89      * Closes and {@link #reset resets} this reader for reuse.
90      *
91      * @throws IOException if an I/O error occurs.
92      */

93     public void close() throws IOException {
94         if (_byteBuffer != null) {
95             reset();
96         }
97     }
98
99     /**
100      * Reads a single character. This method does not block, <code>-1</code>
101      * is returned if the buffer's limit has been reached.
102      *
103      * @return the 31-bits Unicode of the character read, or -1 if there is
104      * no more remaining bytes to be read.
105      * @throws IOException if an I/O error occurs (e.g. incomplete
106      * character sequence being read).
107      */

108     public int read() throws IOException {
109         if (_byteBuffer != null) {
110             if (_byteBuffer.hasRemaining()) {
111                 byte b = _byteBuffer.get();
112                 return (b >= 0) ? b : read2(b);
113             } else {
114                 return -1;
115             }
116         } else {
117             throw new IOException("Reader closed");
118         }
119     }
120
121     // Reads one full character, throws CharConversionException if limit reached.
122
private int read2(byte b) throws IOException {
123         try {
124             // Decodes UTF-8.
125
if ((b >= 0) && (_moreBytes == 0)) {
126                 // 0xxxxxxx
127
return b;
128             } else if (((b & 0xc0) == 0x80) && (_moreBytes != 0)) {
129                 // 10xxxxxx (continuation byte)
130
_code = (_code << 6) | (b & 0x3f); // Adds 6 bits to code.
131
if (--_moreBytes == 0) {
132                     return _code;
133                 } else {
134                     return read2(_byteBuffer.get());
135                 }
136             } else if (((b & 0xe0) == 0xc0) && (_moreBytes == 0)) {
137                 // 110xxxxx
138
_code = b & 0x1f;
139                 _moreBytes = 1;
140                 return read2(_byteBuffer.get());
141             } else if (((b & 0xf0) == 0xe0) && (_moreBytes == 0)) {
142                 // 1110xxxx
143
_code = b & 0x0f;
144                 _moreBytes = 2;
145                 return read2(_byteBuffer.get());
146             } else if (((b & 0xf8) == 0xf0) && (_moreBytes == 0)) {
147                 // 11110xxx
148
_code = b & 0x07;
149                 _moreBytes = 3;
150                 return read2(_byteBuffer.get());
151             } else if (((b & 0xfc) == 0xf8) && (_moreBytes == 0)) {
152                 // 111110xx
153
_code = b & 0x03;
154                 _moreBytes = 4;
155                 return read2(_byteBuffer.get());
156             } else if (((b & 0xfe) == 0xfc) && (_moreBytes == 0)) {
157                 // 1111110x
158
_code = b & 0x01;
159                 _moreBytes = 5;
160                 return read2(_byteBuffer.get());
161             } else {
162                 throw new CharConversionException("Invalid UTF-8 Encoding");
163             }
164         } catch (BufferUnderflowException e) {
165             throw new CharConversionException("Incomplete Sequence");
166         }
167     }
168
169     private int _code;
170
171     private int _moreBytes;
172
173     /**
174      * Reads characters into a portion of an array. This method does not
175      * block.
176      *
177      * <p> Note: Characters between U+10000 and U+10FFFF are represented
178      * by surrogate pairs (two <code>char</code>).</p>
179      *
180      * @param cbuf the destination buffer.
181      * @param off the offset at which to start storing characters.
182      * @param len the maximum number of characters to read
183      * @return the number of characters read, or 0 if there is
184      * no more remaining bytes to be read.
185      * @throws IOException if an I/O error occurs.
186      */

187     public int read(char cbuf[], int off, int len) throws IOException {
188         if (_byteBuffer == null)
189             throw new IOException("Reader closed");
190         final int off_plus_len = off + len;
191         int remaining = _byteBuffer.remaining();
192         for (int i = off; i < off_plus_len;) {
193             if (remaining-- > 0) {
194                 byte b = _byteBuffer.get();
195                 if (b >= 0) {
196                     cbuf[i++] = (char) b; // Most common case.
197
} else {
198                     if (i < off_plus_len - 1) { // Up to two 'char' can be read.
199
int code = read2(b);
200                         remaining = _byteBuffer.remaining(); // Recalculates.
201
if (code < 0x10000) {
202                             cbuf[i++] = (char) code;
203                         } else if (code <= 0x10ffff) { // Surrogates.
204
cbuf[i++] = (char) (((code - 0x10000) >> 10) + 0xd800);
205                             cbuf[i++] = (char) (((code - 0x10000) & 0x3ff) + 0xdc00);
206                         } else {
207                             throw new CharConversionException(
208                                     "Cannot convert U+"
209                                             + Integer.toHexString(code)
210                                             + " to char (code greater than U+10FFFF)");
211                         }
212                     } else { // Not enough space in destination (go back).
213
_byteBuffer.position(_byteBuffer.position() - 1);
214                         remaining++;
215                         return i - off;
216                     }
217                 }
218             } else {
219                 return i - off;
220             }
221         }
222         return len;
223     }
224
225     /**
226      * Reads characters into the specified appendable. This method does not
227      * block.
228      *
229      * <p> Note: Characters between U+10000 and U+10FFFF are represented
230      * by surrogate pairs (two <code>char</code>).</p>
231      *
232      * @param dest the destination buffer.
233      * @throws IOException if an I/O error occurs.
234      */

235     public void read(Appendable dest) throws IOException {
236         if (_byteBuffer == null)
237             throw new IOException("Reader closed");
238         while (_byteBuffer.hasRemaining()) {
239             byte b = _byteBuffer.get();
240             if (b >= 0) {
241                 dest.append((char) b); // Most common case.
242
} else {
243                 int code = read2(b);
244                 if (code < 0x10000) {
245                     dest.append((char) code);
246                 } else if (code <= 0x10ffff) { // Surrogates.
247
dest.append((char) (((code - 0x10000) >> 10) + 0xd800));
248                     dest.append((char) (((code - 0x10000) & 0x3ff) + 0xdc00));
249                 } else {
250                     throw new CharConversionException("Cannot convert U+"
251                             + Integer.toHexString(code)
252                             + " to char (code greater than U+10FFFF)");
253                 }
254             }
255         }
256     }
257
258     // Implements Reusable.
259
public void reset() {
260         _byteBuffer = null;
261         _code = 0;
262         _moreBytes = 0;
263     }
264 }
Popular Tags