KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > vfs > i18n > UTF8Reader


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  * Free SoftwareFoundation, Inc.
23  * 59 Temple Place, Suite 330
24  * Boston, MA 02111-1307 USA
25  *
26  * @author Scott Ferguson
27  */

28
29 package com.caucho.vfs.i18n;
30
31 import java.io.CharConversionException JavaDoc;
32 import java.io.EOFException JavaDoc;
33 import java.io.IOException JavaDoc;
34 import java.io.InputStream JavaDoc;
35 import java.io.Reader JavaDoc;
36
37 /**
38  * Implements an encoding reader for UTF8.
39  */

40 public class UTF8Reader extends EncodingReader {
41   private InputStream JavaDoc _is;
42   private int _peek = -1;
43
44   /**
45    * Null-arg constructor for instantiation by com.caucho.vfs.Encoding only.
46    */

47   public UTF8Reader()
48   {
49   }
50
51   /**
52    * Create a UTF-8 reader based on the readStream.
53    */

54   private UTF8Reader(InputStream JavaDoc is)
55   {
56     _is = is;
57   }
58
59   /**
60    * Create a UTF-8 reader based on the readStream.
61    *
62    * @param is the input stream providing the bytes.
63    * @param javaEncoding the JDK name for the encoding.
64    *
65    * @return the UTF-8 reader.
66    */

67   public Reader JavaDoc create(InputStream JavaDoc is, String JavaDoc javaEncoding)
68   {
69     return new UTF8Reader(is);
70   }
71
72   /**
73    * Reads into a character buffer using the correct encoding.
74    */

75   public int read()
76     throws IOException JavaDoc
77   {
78     if (_peek >= 0) {
79       int peek = _peek;
80       _peek = -1;
81       return peek;
82     }
83
84     InputStream JavaDoc is = _is;
85     
86     int ch1 = is.read();
87
88     if (ch1 < 0x80) {
89       return ch1;
90     }
91     if ((ch1 & 0xe0) == 0xc0) {
92       int ch2 = is.read();
93       if (ch2 < 0)
94         throw new EOFException JavaDoc("unexpected end of file in utf8 character");
95       else if ((ch2 & 0xc0) != 0x80)
96         throw new CharConversionException JavaDoc("illegal utf8 encoding at 0x" +
97                       Integer.toHexString(ch1) + ", " +
98                       Integer.toHexString(ch2));
99       
100       return ((ch1 & 0x1f) << 6) + (ch2 & 0x3f);
101     }
102     else if ((ch1 & 0xf0) == 0xe0) {
103       int ch2 = is.read();
104       int ch3 = is.read();
105       
106       if (ch2 < 0)
107         throw new EOFException JavaDoc("unexpected end of file in utf8 character");
108       else if ((ch2 & 0xc0) != 0x80)
109         throw new CharConversionException JavaDoc("illegal utf8 encoding at 0x" +
110                       Integer.toHexString(ch2));
111       
112       if (ch3 < 0)
113         throw new EOFException JavaDoc("unexpected end of file in utf8 character");
114       else if ((ch3 & 0xc0) != 0x80)
115         throw new CharConversionException JavaDoc("illegal utf8 encoding at 0x" +
116                       Integer.toHexString(ch3));
117
118       int ch = ((ch1 & 0x1f) << 12) + ((ch2 & 0x3f) << 6) + (ch3 & 0x3f);
119
120       if (ch == 0xfeff) // handle some writers, e.g. microsoft
121
return is.read();
122       else
123         return ch;
124     }
125     else if ((ch1 & 0xf0) == 0xf0) {
126       int ch2 = is.read();
127       int ch3 = is.read();
128       int ch4 = is.read();
129
130       if (ch2 < 0)
131         throw new EOFException JavaDoc("unexpected end of file in utf8 character");
132       else if ((ch2 & 0xc0) != 0x80)
133         throw new CharConversionException JavaDoc("illegal utf8 encoding at 0x" +
134                       Integer.toHexString(ch2));
135       
136       if (ch3 < 0)
137         throw new EOFException JavaDoc("unexpected end of file in utf8 character");
138       else if ((ch3 & 0xc0) != 0x80)
139         throw new CharConversionException JavaDoc("illegal utf8 encoding at 0x" +
140                       Integer.toHexString(ch3));
141       
142       if (ch4 < 0)
143         throw new EOFException JavaDoc("unexpected end of file in utf8 character");
144       else if ((ch4 & 0xc0) != 0x80)
145         throw new CharConversionException JavaDoc("illegal utf8 encoding at 0x" +
146                       Integer.toHexString(ch4));
147       
148       int ch = (((ch1 & 0xf) << 18) +
149         ((ch2 & 0x3f) << 12) +
150         ((ch3 & 0x3f) << 6) +
151         ((ch4 & 0x3f)));
152
153       _peek = 0xdc00 + (ch & 0x3ff);
154       
155       return 0xd800 + ((ch - 0x10000) / 0x400);
156     }
157     else
158       throw new CharConversionException JavaDoc("illegal utf8 encoding at (" +
159                                         (int) ch1 + ")");
160   }
161
162   /**
163    * Reads into a character buffer using the correct encoding.
164    *
165    * @param cbuf character buffer receiving the data.
166    * @param off starting offset into the buffer.
167    * @param len number of characters to read.
168    *
169    * @return the number of characters read or -1 on end of file.
170    */

171   public int read(char []cbuf, int off, int len)
172     throws IOException JavaDoc
173   {
174     int i = 0;
175
176     for (i = 0; i < len; i++) {
177       int ch = read();
178
179       if (ch < 0)
180     return i == 0 ? -1 : i;
181
182       cbuf[off + i] = (char) ch;
183     }
184
185     return i;
186   }
187 }
188
Popular Tags