KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xerces > internal > impl > io > UCSReader


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 2000-2002 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package com.sun.org.apache.xerces.internal.impl.io;
59
60 import java.io.InputStream JavaDoc;
61 import java.io.IOException JavaDoc;
62 import java.io.Reader JavaDoc;
63
64 /**
65  * Reader for UCS-2 and UCS-4 encodings.
66  * (i.e., encodings from ISO-10646-UCS-(2|4)).
67  *
68  * @author Neil Graham, IBM
69  *
70  * @version $Id: UCSReader.java,v 1.3 2002/07/08 16:24:03 neilg Exp $
71  */

72 public class UCSReader extends Reader JavaDoc {
73
74     //
75
// Constants
76
//
77

78     /** Default byte buffer size (8192, larger than that of ASCIIReader
79      * since it's reasonable to surmise that the average UCS-4-encoded
80      * file should be 4 times as large as the average ASCII-encoded file).
81      */

82     public static final int DEFAULT_BUFFER_SIZE = 8192;
83
84     public static final short UCS2LE = 1;
85     public static final short UCS2BE = 2;
86     public static final short UCS4LE = 4;
87     public static final short UCS4BE = 8;
88
89     //
90
// Data
91
//
92

93     /** Input stream. */
94     protected InputStream JavaDoc fInputStream;
95
96     /** Byte buffer. */
97     protected byte[] fBuffer;
98
99     // what kind of data we're dealing with
100
protected short fEncoding;
101
102     //
103
// Constructors
104
//
105

106     /**
107      * Constructs an ASCII reader from the specified input stream
108      * using the default buffer size. The Endian-ness and whether this is
109      * UCS-2 or UCS-4 needs also to be known in advance.
110      *
111      * @param inputStream The input stream.
112      * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
113      */

114     public UCSReader(InputStream JavaDoc inputStream, short encoding) {
115         this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
116     } // <init>(InputStream, short)
117

118     /**
119      * Constructs an ASCII reader from the specified input stream
120      * and buffer size. The Endian-ness and whether this is
121      * UCS-2 or UCS-4 needs also to be known in advance.
122      *
123      * @param inputStream The input stream.
124      * @param size The initial buffer size.
125      * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
126      */

127     public UCSReader(InputStream JavaDoc inputStream, int size, short encoding) {
128         fInputStream = inputStream;
129         fBuffer = new byte[size];
130         fEncoding = encoding;
131     } // <init>(InputStream,int,short)
132

133     //
134
// Reader methods
135
//
136

137     /**
138      * Read a single character. This method will block until a character is
139      * available, an I/O error occurs, or the end of the stream is reached.
140      *
141      * <p> Subclasses that intend to support efficient single-character input
142      * should override this method.
143      *
144      * @return The character read, as an integer in the range 0 to 127
145      * (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
146      * been reached
147      *
148      * @exception IOException If an I/O error occurs
149      */

150     public int read() throws IOException JavaDoc {
151         int b0 = fInputStream.read() & 0xff;
152         if (b0 == 0xff)
153             return -1;
154         int b1 = fInputStream.read() & 0xff;
155         if (b1 == 0xff)
156             return -1;
157         if(fEncoding >=4) {
158             int b2 = fInputStream.read() & 0xff;
159             if (b2 == 0xff)
160                 return -1;
161             int b3 = fInputStream.read() & 0xff;
162             if (b3 == 0xff)
163                 return -1;
164             System.err.println("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff));
165             if (fEncoding == UCS4BE)
166                 return (b0<<24)+(b1<<16)+(b2<<8)+b3;
167             else
168                 return (b3<<24)+(b2<<16)+(b1<<8)+b0;
169         } else { // UCS-2
170
if (fEncoding == UCS2BE)
171                 return (b0<<8)+b1;
172             else
173                 return (b1<<8)+b0;
174         }
175     } // read():int
176

177     /**
178      * Read characters into a portion of an array. This method will block
179      * until some input is available, an I/O error occurs, or the end of the
180      * stream is reached.
181      *
182      * @param ch Destination buffer
183      * @param offset Offset at which to start storing characters
184      * @param length Maximum number of characters to read
185      *
186      * @return The number of characters read, or -1 if the end of the
187      * stream has been reached
188      *
189      * @exception IOException If an I/O error occurs
190      */

191     public int read(char ch[], int offset, int length) throws IOException JavaDoc {
192         int byteLength = length << ((fEncoding >= 4)?2:1);
193         if (byteLength > fBuffer.length) {
194             byteLength = fBuffer.length;
195         }
196         int count = fInputStream.read(fBuffer, 0, byteLength);
197         if(count == -1) return -1;
198         // try and make count be a multiple of the number of bytes we're looking for
199
if(fEncoding >= 4) { // BigEndian
200
// this looks ugly, but it avoids an if at any rate...
201
int numToRead = (4 - (count & 3) & 3);
202             for(int i=0; i<numToRead; i++) {
203                 int charRead = fInputStream.read();
204                 if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
205
for (int j = i;j<numToRead; j++)
206                         fBuffer[count+j] = 0;
207                     break;
208                 } else {
209                     fBuffer[count+i] = (byte)charRead;
210                 }
211             }
212             count += numToRead;
213         } else {
214             int numToRead = count & 1;
215             if(numToRead != 0) {
216                 count++;
217                 int charRead = fInputStream.read();
218                 if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
219
fBuffer[count] = 0;
220                 } else {
221                     fBuffer[count] = (byte)charRead;
222                 }
223             }
224         }
225
226         // now count is a multiple of the right number of bytes
227
int numChars = count >> ((fEncoding >= 4)?2:1);
228         int curPos = 0;
229         for (int i = 0; i < numChars; i++) {
230             int b0 = fBuffer[curPos++] & 0xff;
231             int b1 = fBuffer[curPos++] & 0xff;
232             if(fEncoding >=4) {
233                 int b2 = fBuffer[curPos++] & 0xff;
234                 int b3 = fBuffer[curPos++] & 0xff;
235                 if (fEncoding == UCS4BE)
236                     ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3);
237                 else
238                     ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0);
239             } else { // UCS-2
240
if (fEncoding == UCS2BE)
241                     ch[offset+i] = (char)((b0<<8)+b1);
242                 else
243                     ch[offset+i] = (char)((b1<<8)+b0);
244             }
245         }
246         return numChars;
247     } // read(char[],int,int)
248

249     /**
250      * Skip characters. This method will block until some characters are
251      * available, an I/O error occurs, or the end of the stream is reached.
252      *
253      * @param n The number of characters to skip
254      *
255      * @return The number of characters actually skipped
256      *
257      * @exception IOException If an I/O error occurs
258      */

259     public long skip(long n) throws IOException JavaDoc {
260         // charWidth will represent the number of bits to move
261
// n leftward to get num of bytes to skip, and then move the result rightward
262
// to get num of chars effectively skipped.
263
// The trick with &'ing, as with elsewhere in this dcode, is
264
// intended to avoid an expensive use of / that might not be optimized
265
// away.
266
int charWidth = (fEncoding >=4)?2:1;
267         long bytesSkipped = fInputStream.skip(n<<charWidth);
268         if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
269         return (bytesSkipped >> charWidth) + 1;
270     } // skip(long):long
271

272     /**
273      * Tell whether this stream is ready to be read.
274      *
275      * @return True if the next read() is guaranteed not to block for input,
276      * false otherwise. Note that returning false does not guarantee that the
277      * next read will block.
278      *
279      * @exception IOException If an I/O error occurs
280      */

281     public boolean ready() throws IOException JavaDoc {
282         return false;
283     } // ready()
284

285     /**
286      * Tell whether this stream supports the mark() operation.
287      */

288     public boolean markSupported() {
289         return fInputStream.markSupported();
290     } // markSupported()
291

292     /**
293      * Mark the present position in the stream. Subsequent calls to reset()
294      * will attempt to reposition the stream to this point. Not all
295      * character-input streams support the mark() operation.
296      *
297      * @param readAheadLimit Limit on the number of characters that may be
298      * read while still preserving the mark. After
299      * reading this many characters, attempting to
300      * reset the stream may fail.
301      *
302      * @exception IOException If the stream does not support mark(),
303      * or if some other I/O error occurs
304      */

305     public void mark(int readAheadLimit) throws IOException JavaDoc {
306         fInputStream.mark(readAheadLimit);
307     } // mark(int)
308

309     /**
310      * Reset the stream. If the stream has been marked, then attempt to
311      * reposition it at the mark. If the stream has not been marked, then
312      * attempt to reset it in some way appropriate to the particular stream,
313      * for example by repositioning it to its starting point. Not all
314      * character-input streams support the reset() operation, and some support
315      * reset() without supporting mark().
316      *
317      * @exception IOException If the stream has not been marked,
318      * or if the mark has been invalidated,
319      * or if the stream does not support reset(),
320      * or if some other I/O error occurs
321      */

322     public void reset() throws IOException JavaDoc {
323         fInputStream.reset();
324     } // reset()
325

326     /**
327      * Close the stream. Once a stream has been closed, further read(),
328      * ready(), mark(), or reset() invocations will throw an IOException.
329      * Closing a previously-closed stream, however, has no effect.
330      *
331      * @exception IOException If an I/O error occurs
332      */

333      public void close() throws IOException JavaDoc {
334          fInputStream.close();
335      } // close()
336

337 } // class UCSReader
338
Popular Tags