KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > impl > io > UCSReader


1 /*
2  * Copyright 2000-2002,2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.impl.io;
18
19 import java.io.InputStream JavaDoc;
20 import java.io.IOException JavaDoc;
21 import java.io.Reader JavaDoc;
22
23 /**
24  * Reader for UCS-2 and UCS-4 encodings.
25  * (i.e., encodings from ISO-10646-UCS-(2|4)).
26  *
27  * @xerces.internal
28  *
29  * @author Neil Graham, IBM
30  *
31  * @version $Id: UCSReader.java,v 1.5 2004/10/04 22:07:41 mrglavas Exp $
32  */

33 public class UCSReader extends Reader JavaDoc {
34
35     //
36
// Constants
37
//
38

39     /** Default byte buffer size (8192, larger than that of ASCIIReader
40      * since it's reasonable to surmise that the average UCS-4-encoded
41      * file should be 4 times as large as the average ASCII-encoded file).
42      */

43     public static final int DEFAULT_BUFFER_SIZE = 8192;
44
45     public static final short UCS2LE = 1;
46     public static final short UCS2BE = 2;
47     public static final short UCS4LE = 4;
48     public static final short UCS4BE = 8;
49
50     //
51
// Data
52
//
53

54     /** Input stream. */
55     protected InputStream JavaDoc fInputStream;
56
57     /** Byte buffer. */
58     protected byte[] fBuffer;
59
60     // what kind of data we're dealing with
61
protected short fEncoding;
62
63     //
64
// Constructors
65
//
66

67     /**
68      * Constructs an ASCII reader from the specified input stream
69      * using the default buffer size. The Endian-ness and whether this is
70      * UCS-2 or UCS-4 needs also to be known in advance.
71      *
72      * @param inputStream The input stream.
73      * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
74      */

75     public UCSReader(InputStream JavaDoc inputStream, short encoding) {
76         this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
77     } // <init>(InputStream, short)
78

79     /**
80      * Constructs an ASCII reader from the specified input stream
81      * and buffer size. The Endian-ness and whether this is
82      * UCS-2 or UCS-4 needs also to be known in advance.
83      *
84      * @param inputStream The input stream.
85      * @param size The initial buffer size.
86      * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
87      */

88     public UCSReader(InputStream JavaDoc inputStream, int size, short encoding) {
89         fInputStream = inputStream;
90         fBuffer = new byte[size];
91         fEncoding = encoding;
92     } // <init>(InputStream,int,short)
93

94     //
95
// Reader methods
96
//
97

98     /**
99      * Read a single character. This method will block until a character is
100      * available, an I/O error occurs, or the end of the stream is reached.
101      *
102      * <p> Subclasses that intend to support efficient single-character input
103      * should override this method.
104      *
105      * @return The character read, as an integer in the range 0 to 127
106      * (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
107      * been reached
108      *
109      * @exception IOException If an I/O error occurs
110      */

111     public int read() throws IOException JavaDoc {
112         int b0 = fInputStream.read() & 0xff;
113         if (b0 == 0xff)
114             return -1;
115         int b1 = fInputStream.read() & 0xff;
116         if (b1 == 0xff)
117             return -1;
118         if(fEncoding >=4) {
119             int b2 = fInputStream.read() & 0xff;
120             if (b2 == 0xff)
121                 return -1;
122             int b3 = fInputStream.read() & 0xff;
123             if (b3 == 0xff)
124                 return -1;
125             System.err.println("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff));
126             if (fEncoding == UCS4BE)
127                 return (b0<<24)+(b1<<16)+(b2<<8)+b3;
128             else
129                 return (b3<<24)+(b2<<16)+(b1<<8)+b0;
130         } else { // UCS-2
131
if (fEncoding == UCS2BE)
132                 return (b0<<8)+b1;
133             else
134                 return (b1<<8)+b0;
135         }
136     } // read():int
137

138     /**
139      * Read characters into a portion of an array. This method will block
140      * until some input is available, an I/O error occurs, or the end of the
141      * stream is reached.
142      *
143      * @param ch Destination buffer
144      * @param offset Offset at which to start storing characters
145      * @param length Maximum number of characters to read
146      *
147      * @return The number of characters read, or -1 if the end of the
148      * stream has been reached
149      *
150      * @exception IOException If an I/O error occurs
151      */

152     public int read(char ch[], int offset, int length) throws IOException JavaDoc {
153         int byteLength = length << ((fEncoding >= 4)?2:1);
154         if (byteLength > fBuffer.length) {
155             byteLength = fBuffer.length;
156         }
157         int count = fInputStream.read(fBuffer, 0, byteLength);
158         if(count == -1) return -1;
159         // try and make count be a multiple of the number of bytes we're looking for
160
if(fEncoding >= 4) { // BigEndian
161
// this looks ugly, but it avoids an if at any rate...
162
int numToRead = (4 - (count & 3) & 3);
163             for(int i=0; i<numToRead; i++) {
164                 int charRead = fInputStream.read();
165                 if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
166
for (int j = i;j<numToRead; j++)
167                         fBuffer[count+j] = 0;
168                     break;
169                 } else {
170                     fBuffer[count+i] = (byte)charRead;
171                 }
172             }
173             count += numToRead;
174         } else {
175             int numToRead = count & 1;
176             if(numToRead != 0) {
177                 count++;
178                 int charRead = fInputStream.read();
179                 if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
180
fBuffer[count] = 0;
181                 } else {
182                     fBuffer[count] = (byte)charRead;
183                 }
184             }
185         }
186
187         // now count is a multiple of the right number of bytes
188
int numChars = count >> ((fEncoding >= 4)?2:1);
189         int curPos = 0;
190         for (int i = 0; i < numChars; i++) {
191             int b0 = fBuffer[curPos++] & 0xff;
192             int b1 = fBuffer[curPos++] & 0xff;
193             if(fEncoding >=4) {
194                 int b2 = fBuffer[curPos++] & 0xff;
195                 int b3 = fBuffer[curPos++] & 0xff;
196                 if (fEncoding == UCS4BE)
197                     ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3);
198                 else
199                     ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0);
200             } else { // UCS-2
201
if (fEncoding == UCS2BE)
202                     ch[offset+i] = (char)((b0<<8)+b1);
203                 else
204                     ch[offset+i] = (char)((b1<<8)+b0);
205             }
206         }
207         return numChars;
208     } // read(char[],int,int)
209

210     /**
211      * Skip characters. This method will block until some characters are
212      * available, an I/O error occurs, or the end of the stream is reached.
213      *
214      * @param n The number of characters to skip
215      *
216      * @return The number of characters actually skipped
217      *
218      * @exception IOException If an I/O error occurs
219      */

220     public long skip(long n) throws IOException JavaDoc {
221         // charWidth will represent the number of bits to move
222
// n leftward to get num of bytes to skip, and then move the result rightward
223
// to get num of chars effectively skipped.
224
// The trick with &'ing, as with elsewhere in this dcode, is
225
// intended to avoid an expensive use of / that might not be optimized
226
// away.
227
int charWidth = (fEncoding >=4)?2:1;
228         long bytesSkipped = fInputStream.skip(n<<charWidth);
229         if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
230         return (bytesSkipped >> charWidth) + 1;
231     } // skip(long):long
232

233     /**
234      * Tell whether this stream is ready to be read.
235      *
236      * @return True if the next read() is guaranteed not to block for input,
237      * false otherwise. Note that returning false does not guarantee that the
238      * next read will block.
239      *
240      * @exception IOException If an I/O error occurs
241      */

242     public boolean ready() throws IOException JavaDoc {
243         return false;
244     } // ready()
245

246     /**
247      * Tell whether this stream supports the mark() operation.
248      */

249     public boolean markSupported() {
250         return fInputStream.markSupported();
251     } // markSupported()
252

253     /**
254      * Mark the present position in the stream. Subsequent calls to reset()
255      * will attempt to reposition the stream to this point. Not all
256      * character-input streams support the mark() operation.
257      *
258      * @param readAheadLimit Limit on the number of characters that may be
259      * read while still preserving the mark. After
260      * reading this many characters, attempting to
261      * reset the stream may fail.
262      *
263      * @exception IOException If the stream does not support mark(),
264      * or if some other I/O error occurs
265      */

266     public void mark(int readAheadLimit) throws IOException JavaDoc {
267         fInputStream.mark(readAheadLimit);
268     } // mark(int)
269

270     /**
271      * Reset the stream. If the stream has been marked, then attempt to
272      * reposition it at the mark. If the stream has not been marked, then
273      * attempt to reset it in some way appropriate to the particular stream,
274      * for example by repositioning it to its starting point. Not all
275      * character-input streams support the reset() operation, and some support
276      * reset() without supporting mark().
277      *
278      * @exception IOException If the stream has not been marked,
279      * or if the mark has been invalidated,
280      * or if the stream does not support reset(),
281      * or if some other I/O error occurs
282      */

283     public void reset() throws IOException JavaDoc {
284         fInputStream.reset();
285     } // reset()
286

287     /**
288      * Close the stream. Once a stream has been closed, further read(),
289      * ready(), mark(), or reset() invocations will throw an IOException.
290      * Closing a previously-closed stream, however, has no effect.
291      *
292      * @exception IOException If an I/O error occurs
293      */

294      public void close() throws IOException JavaDoc {
295          fInputStream.close();
296      } // close()
297

298 } // class UCSReader
299
Popular Tags