KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > javolution > io > UTF8ByteBufferReader


1 /*
2  * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
3  * Copyright (C) 2005 - Javolution (http://javolution.org/)
4  * All rights reserved.
5  *
6  * Permission to use, copy, modify, and distribute this software is
7  * freely granted, provided that this notice is preserved.
8  */

9 package javolution.io;
10
11 import j2me.lang.IllegalStateException;
12 import j2me.io.CharConversionException;
13 import j2me.nio.BufferUnderflowException;
14 import j2me.nio.ByteBuffer;
15
16 import java.io.IOException JavaDoc;
17 import java.io.Reader JavaDoc;
18
19 import javolution.lang.Reusable;
20 import javolution.text.Appendable;
21
22 /**
23  * <p> This class represents a UTF-8 <code>j2me.nio.ByteBuffer</code> reader.
24  * </p>
25  *
26  * <p> This reader can be used for efficient decoding of native byte
27  * buffers (e.g. <code>MappedByteBuffer</code>), high-performance
28  * messaging (no intermediate buffer), etc.</p>
29  *
30  * <p> This reader supports surrogate <code>char</code> pairs (representing
31  * characters in the range [U+10000 .. U+10FFFF]). It can also be used
32  * to read characters unicodes (31 bits) directly
33  * (ref. {@link #read()}).</p>
34  *
35  * <p> Each invocation of one of the <code>read()</code> methods may cause one
36  * or more bytes to be read from the underlying byte buffer.
37  * The end of stream is reached when the byte buffer position and limit
38  * coincide.</p>
39  *
40  * @author <a HREF="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
41  * @version 2.0, December 9, 2004
42  * @see UTF8ByteBufferWriter
43  */

44 public final class UTF8ByteBufferReader extends Reader JavaDoc implements Reusable {
45
46     /**
47      * Holds the byte buffer source.
48      */

49     private ByteBuffer _byteBuffer;
50
51     /**
52      * Default constructor.
53      */

54     public UTF8ByteBufferReader() {
55     }
56
57     /**
58      * Sets the <code>ByteBuffer</code> to use for reading available bytes
59      * from current buffer position.
60      *
61      * @param byteBuffer the <code>ByteBuffer</code> source.
62      * @return this UTF-8 reader.
63      * @throws IllegalStateException if this reader is being reused and
64      * it has not been {@link #close closed} or {@link #reset reset}.
65      */

66     public UTF8ByteBufferReader setInput(ByteBuffer byteBuffer) {
67         if (_byteBuffer != null)
68             throw new IllegalStateException JavaDoc("Reader not closed or reset");
69         _byteBuffer = byteBuffer;
70         return this;
71     }
72
73     /**
74      * Indicates if this stream is ready to be read.
75      *
76      * @return <code>true</code> if the byte buffer has remaining bytes to
77      * read; <code>false</code> otherwise.
78      * @throws IOException if an I/O error occurs.
79      */

80     public boolean ready() throws IOException JavaDoc {
81         if (_byteBuffer != null) {
82             return _byteBuffer.hasRemaining();
83         } else {
84             throw new IOException JavaDoc("Reader closed");
85         }
86     }
87
88     /**
89      * Closes and {@link #reset resets} this reader for reuse.
90      *
91      * @throws IOException if an I/O error occurs.
92      */

93     public void close() throws IOException JavaDoc {
94         if (_byteBuffer != null) {
95             reset();
96         }
97     }
98
99     /**
100      * Reads a single character. This method does not block, <code>-1</code>
101      * is returned if the buffer's limit has been reached.
102      *
103      * @return the 31-bits Unicode of the character read, or -1 if there is
104      * no more remaining bytes to be read.
105      * @throws IOException if an I/O error occurs (e.g. incomplete
106      * character sequence being read).
107      */

108     public int read() throws IOException JavaDoc {
109         if (_byteBuffer != null) {
110             if (_byteBuffer.hasRemaining()) {
111                 byte b = _byteBuffer.get();
112                 return (b >= 0) ? b : read2(b);
113             } else {
114                 return -1;
115             }
116         } else {
117             throw new IOException JavaDoc("Reader closed");
118         }
119     }
120
121     // Reads one full character, throws CharConversionException if limit reached.
122
private int read2(byte b) throws IOException JavaDoc {
123         try {
124             // Decodes UTF-8.
125
if ((b >= 0) && (_moreBytes == 0)) {
126                 // 0xxxxxxx
127
return b;
128             } else if (((b & 0xc0) == 0x80) && (_moreBytes != 0)) {
129                 // 10xxxxxx (continuation byte)
130
_code = (_code << 6) | (b & 0x3f); // Adds 6 bits to code.
131
if (--_moreBytes == 0) {
132                     return _code;
133                 } else {
134                     return read2(_byteBuffer.get());
135                 }
136             } else if (((b & 0xe0) == 0xc0) && (_moreBytes == 0)) {
137                 // 110xxxxx
138
_code = b & 0x1f;
139                 _moreBytes = 1;
140                 return read2(_byteBuffer.get());
141             } else if (((b & 0xf0) == 0xe0) && (_moreBytes == 0)) {
142                 // 1110xxxx
143
_code = b & 0x0f;
144                 _moreBytes = 2;
145                 return read2(_byteBuffer.get());
146             } else if (((b & 0xf8) == 0xf0) && (_moreBytes == 0)) {
147                 // 11110xxx
148
_code = b & 0x07;
149                 _moreBytes = 3;
150                 return read2(_byteBuffer.get());
151             } else if (((b & 0xfc) == 0xf8) && (_moreBytes == 0)) {
152                 // 111110xx
153
_code = b & 0x03;
154                 _moreBytes = 4;
155                 return read2(_byteBuffer.get());
156             } else if (((b & 0xfe) == 0xfc) && (_moreBytes == 0)) {
157                 // 1111110x
158
_code = b & 0x01;
159                 _moreBytes = 5;
160                 return read2(_byteBuffer.get());
161             } else {
162                 throw new CharConversionException("Invalid UTF-8 Encoding");
163             }
164         } catch (BufferUnderflowException e) {
165             throw new CharConversionException("Incomplete Sequence");
166         }
167     }
168
169     private int _code;
170
171     private int _moreBytes;
172
173     /**
174      * Reads characters into a portion of an array. This method does not
175      * block.
176      *
177      * <p> Note: Characters between U+10000 and U+10FFFF are represented
178      * by surrogate pairs (two <code>char</code>).</p>
179      *
180      * @param cbuf the destination buffer.
181      * @param off the offset at which to start storing characters.
182      * @param len the maximum number of characters to read
183      * @return the number of characters read, or -1 if there is no more
184      * byte remaining.
185      * @throws IOException if an I/O error occurs.
186      */

187     public int read(char cbuf[], int off, int len) throws IOException JavaDoc {
188         if (_byteBuffer == null)
189             throw new IOException JavaDoc("Reader closed");
190         final int off_plus_len = off + len;
191         int remaining = _byteBuffer.remaining();
192         if (remaining <= 0)
193             return -1;
194         for (int i = off; i < off_plus_len;) {
195             if (remaining-- > 0) {
196                 byte b = _byteBuffer.get();
197                 if (b >= 0) {
198                     cbuf[i++] = (char) b; // Most common case.
199
} else {
200                     if (i < off_plus_len - 1) { // Up to two 'char' can be read.
201
int code = read2(b);
202                         remaining = _byteBuffer.remaining(); // Recalculates.
203
if (code < 0x10000) {
204                             cbuf[i++] = (char) code;
205                         } else if (code <= 0x10ffff) { // Surrogates.
206
cbuf[i++] = (char) (((code - 0x10000) >> 10) + 0xd800);
207                             cbuf[i++] = (char) (((code - 0x10000) & 0x3ff) + 0xdc00);
208                         } else {
209                             throw new CharConversionException(
210                                     "Cannot convert U+"
211                                             + Integer.toHexString(code)
212                                             + " to char (code greater than U+10FFFF)");
213                         }
214                     } else { // Not enough space in destination (go back).
215
_byteBuffer.position(_byteBuffer.position() - 1);
216                         remaining++;
217                         return i - off;
218                     }
219                 }
220             } else {
221                 return i - off;
222             }
223         }
224         return len;
225     }
226
227     /**
228      * Reads characters into the specified appendable. This method does not
229      * block.
230      *
231      * <p> Note: Characters between U+10000 and U+10FFFF are represented
232      * by surrogate pairs (two <code>char</code>).</p>
233      *
234      * @param dest the destination buffer.
235      * @throws IOException if an I/O error occurs.
236      */

237     public void read(Appendable JavaDoc dest) throws IOException JavaDoc {
238         if (_byteBuffer == null)
239             throw new IOException JavaDoc("Reader closed");
240         while (_byteBuffer.hasRemaining()) {
241             byte b = _byteBuffer.get();
242             if (b >= 0) {
243                 dest.append((char) b); // Most common case.
244
} else {
245                 int code = read2(b);
246                 if (code < 0x10000) {
247                     dest.append((char) code);
248                 } else if (code <= 0x10ffff) { // Surrogates.
249
dest.append((char) (((code - 0x10000) >> 10) + 0xd800));
250                     dest.append((char) (((code - 0x10000) & 0x3ff) + 0xdc00));
251                 } else {
252                     throw new CharConversionException("Cannot convert U+"
253                             + Integer.toHexString(code)
254                             + " to char (code greater than U+10FFFF)");
255                 }
256             }
257         }
258     }
259
260     // Implements Reusable.
261
public void reset() {
262         _byteBuffer = null;
263         _code = 0;
264         _moreBytes = 0;
265     }
266
267     /**
268      * @deprecated Replaced by {@link #setInput(ByteBuffer)}
269      */

270     public UTF8ByteBufferReader setByteBuffer(ByteBuffer byteBuffer) {
271         return this.setInput(byteBuffer);
272     }
273
274 }
Popular Tags