KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > jasper > xmlparser > UCSReader


1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18 package org.apache.jasper.xmlparser;
19
20 import java.io.InputStream JavaDoc;
21 import java.io.IOException JavaDoc;
22 import java.io.Reader JavaDoc;
23
24 /**
25  * Reader for UCS-2 and UCS-4 encodings.
26  * (i.e., encodings from ISO-10646-UCS-(2|4)).
27  *
28  * @author Neil Graham, IBM
29  *
30  * @version $Id: UCSReader.java 467222 2006-10-24 03:17:11Z markt $
31  */

32 public class UCSReader extends Reader JavaDoc {
33
34     private org.apache.commons.logging.Log log=
35         org.apache.commons.logging.LogFactory.getLog( UCSReader.class );
36     
37     //
38
// Constants
39
//
40

41     /** Default byte buffer size (8192, larger than that of ASCIIReader
42      * since it's reasonable to surmise that the average UCS-4-encoded
43      * file should be 4 times as large as the average ASCII-encoded file).
44      */

45     public static final int DEFAULT_BUFFER_SIZE = 8192;
46
47     public static final short UCS2LE = 1;
48     public static final short UCS2BE = 2;
49     public static final short UCS4LE = 4;
50     public static final short UCS4BE = 8;
51
52     //
53
// Data
54
//
55

56     /** Input stream. */
57     protected InputStream JavaDoc fInputStream;
58
59     /** Byte buffer. */
60     protected byte[] fBuffer;
61
62     // what kind of data we're dealing with
63
protected short fEncoding;
64
65     //
66
// Constructors
67
//
68

69     /**
70      * Constructs an ASCII reader from the specified input stream
71      * using the default buffer size. The Endian-ness and whether this is
72      * UCS-2 or UCS-4 needs also to be known in advance.
73      *
74      * @param inputStream The input stream.
75      * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
76      */

77     public UCSReader(InputStream JavaDoc inputStream, short encoding) {
78         this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
79     } // <init>(InputStream, short)
80

81     /**
82      * Constructs an ASCII reader from the specified input stream
83      * and buffer size. The Endian-ness and whether this is
84      * UCS-2 or UCS-4 needs also to be known in advance.
85      *
86      * @param inputStream The input stream.
87      * @param size The initial buffer size.
88      * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
89      */

90     public UCSReader(InputStream JavaDoc inputStream, int size, short encoding) {
91         fInputStream = inputStream;
92         fBuffer = new byte[size];
93         fEncoding = encoding;
94     } // <init>(InputStream,int,short)
95

96     //
97
// Reader methods
98
//
99

100     /**
101      * Read a single character. This method will block until a character is
102      * available, an I/O error occurs, or the end of the stream is reached.
103      *
104      * <p> Subclasses that intend to support efficient single-character input
105      * should override this method.
106      *
107      * @return The character read, as an integer in the range 0 to 127
108      * (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
109      * been reached
110      *
111      * @exception IOException If an I/O error occurs
112      */

113     public int read() throws IOException JavaDoc {
114         int b0 = fInputStream.read() & 0xff;
115         if (b0 == 0xff)
116             return -1;
117         int b1 = fInputStream.read() & 0xff;
118         if (b1 == 0xff)
119             return -1;
120         if(fEncoding >=4) {
121             int b2 = fInputStream.read() & 0xff;
122             if (b2 == 0xff)
123                 return -1;
124             int b3 = fInputStream.read() & 0xff;
125             if (b3 == 0xff)
126                 return -1;
127             if (log.isDebugEnabled())
128                 log.debug("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff));
129             if (fEncoding == UCS4BE)
130                 return (b0<<24)+(b1<<16)+(b2<<8)+b3;
131             else
132                 return (b3<<24)+(b2<<16)+(b1<<8)+b0;
133         } else { // UCS-2
134
if (fEncoding == UCS2BE)
135                 return (b0<<8)+b1;
136             else
137                 return (b1<<8)+b0;
138         }
139     } // read():int
140

141     /**
142      * Read characters into a portion of an array. This method will block
143      * until some input is available, an I/O error occurs, or the end of the
144      * stream is reached.
145      *
146      * @param ch Destination buffer
147      * @param offset Offset at which to start storing characters
148      * @param length Maximum number of characters to read
149      *
150      * @return The number of characters read, or -1 if the end of the
151      * stream has been reached
152      *
153      * @exception IOException If an I/O error occurs
154      */

155     public int read(char ch[], int offset, int length) throws IOException JavaDoc {
156         int byteLength = length << ((fEncoding >= 4)?2:1);
157         if (byteLength > fBuffer.length) {
158             byteLength = fBuffer.length;
159         }
160         int count = fInputStream.read(fBuffer, 0, byteLength);
161         if(count == -1) return -1;
162         // try and make count be a multiple of the number of bytes we're looking for
163
if(fEncoding >= 4) { // BigEndian
164
// this looks ugly, but it avoids an if at any rate...
165
int numToRead = (4 - (count & 3) & 3);
166             for(int i=0; i<numToRead; i++) {
167                 int charRead = fInputStream.read();
168                 if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
169
for (int j = i;j<numToRead; j++)
170                         fBuffer[count+j] = 0;
171                     break;
172                 } else {
173                     fBuffer[count+i] = (byte)charRead;
174                 }
175             }
176             count += numToRead;
177         } else {
178             int numToRead = count & 1;
179             if(numToRead != 0) {
180                 count++;
181                 int charRead = fInputStream.read();
182                 if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
183
fBuffer[count] = 0;
184                 } else {
185                     fBuffer[count] = (byte)charRead;
186                 }
187             }
188         }
189
190         // now count is a multiple of the right number of bytes
191
int numChars = count >> ((fEncoding >= 4)?2:1);
192         int curPos = 0;
193         for (int i = 0; i < numChars; i++) {
194             int b0 = fBuffer[curPos++] & 0xff;
195             int b1 = fBuffer[curPos++] & 0xff;
196             if(fEncoding >=4) {
197                 int b2 = fBuffer[curPos++] & 0xff;
198                 int b3 = fBuffer[curPos++] & 0xff;
199                 if (fEncoding == UCS4BE)
200                     ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3);
201                 else
202                     ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0);
203             } else { // UCS-2
204
if (fEncoding == UCS2BE)
205                     ch[offset+i] = (char)((b0<<8)+b1);
206                 else
207                     ch[offset+i] = (char)((b1<<8)+b0);
208             }
209         }
210         return numChars;
211     } // read(char[],int,int)
212

213     /**
214      * Skip characters. This method will block until some characters are
215      * available, an I/O error occurs, or the end of the stream is reached.
216      *
217      * @param n The number of characters to skip
218      *
219      * @return The number of characters actually skipped
220      *
221      * @exception IOException If an I/O error occurs
222      */

223     public long skip(long n) throws IOException JavaDoc {
224         // charWidth will represent the number of bits to move
225
// n leftward to get num of bytes to skip, and then move the result rightward
226
// to get num of chars effectively skipped.
227
// The trick with &'ing, as with elsewhere in this dcode, is
228
// intended to avoid an expensive use of / that might not be optimized
229
// away.
230
int charWidth = (fEncoding >=4)?2:1;
231         long bytesSkipped = fInputStream.skip(n<<charWidth);
232         if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth;
233         return (bytesSkipped >> charWidth) + 1;
234     } // skip(long):long
235

236     /**
237      * Tell whether this stream is ready to be read.
238      *
239      * @return True if the next read() is guaranteed not to block for input,
240      * false otherwise. Note that returning false does not guarantee that the
241      * next read will block.
242      *
243      * @exception IOException If an I/O error occurs
244      */

245     public boolean ready() throws IOException JavaDoc {
246     return false;
247     } // ready()
248

249     /**
250      * Tell whether this stream supports the mark() operation.
251      */

252     public boolean markSupported() {
253     return fInputStream.markSupported();
254     } // markSupported()
255

256     /**
257      * Mark the present position in the stream. Subsequent calls to reset()
258      * will attempt to reposition the stream to this point. Not all
259      * character-input streams support the mark() operation.
260      *
261      * @param readAheadLimit Limit on the number of characters that may be
262      * read while still preserving the mark. After
263      * reading this many characters, attempting to
264      * reset the stream may fail.
265      *
266      * @exception IOException If the stream does not support mark(),
267      * or if some other I/O error occurs
268      */

269     public void mark(int readAheadLimit) throws IOException JavaDoc {
270     fInputStream.mark(readAheadLimit);
271     } // mark(int)
272

273     /**
274      * Reset the stream. If the stream has been marked, then attempt to
275      * reposition it at the mark. If the stream has not been marked, then
276      * attempt to reset it in some way appropriate to the particular stream,
277      * for example by repositioning it to its starting point. Not all
278      * character-input streams support the reset() operation, and some support
279      * reset() without supporting mark().
280      *
281      * @exception IOException If the stream has not been marked,
282      * or if the mark has been invalidated,
283      * or if the stream does not support reset(),
284      * or if some other I/O error occurs
285      */

286     public void reset() throws IOException JavaDoc {
287         fInputStream.reset();
288     } // reset()
289

290     /**
291      * Close the stream. Once a stream has been closed, further read(),
292      * ready(), mark(), or reset() invocations will throw an IOException.
293      * Closing a previously-closed stream, however, has no effect.
294      *
295      * @exception IOException If an I/O error occurs
296      */

297      public void close() throws IOException JavaDoc {
298          fInputStream.close();
299      } // close()
300

301 } // class UCSReader
302
Popular Tags