1 28 29 package com.caucho.vfs.i18n; 30 31 import java.io.CharConversionException ; 32 import java.io.EOFException ; 33 import java.io.IOException ; 34 import java.io.InputStream ; 35 import java.io.Reader ; 36 37 40 public class UTF8Reader extends EncodingReader { 41 private InputStream _is; 42 private int _peek = -1; 43 44 47 public UTF8Reader() 48 { 49 } 50 51 54 private UTF8Reader(InputStream is) 55 { 56 _is = is; 57 } 58 59 67 public Reader create(InputStream is, String javaEncoding) 68 { 69 return new UTF8Reader(is); 70 } 71 72 75 public int read() 76 throws IOException 77 { 78 if (_peek >= 0) { 79 int peek = _peek; 80 _peek = -1; 81 return peek; 82 } 83 84 InputStream is = _is; 85 86 int ch1 = is.read(); 87 88 if (ch1 < 0x80) { 89 return ch1; 90 } 91 if ((ch1 & 0xe0) == 0xc0) { 92 int ch2 = is.read(); 93 if (ch2 < 0) 94 throw new EOFException ("unexpected end of file in utf8 character"); 95 else if ((ch2 & 0xc0) != 0x80) 96 throw new CharConversionException ("illegal utf8 encoding at 0x" + 97 Integer.toHexString(ch1) + ", " + 98 Integer.toHexString(ch2)); 99 100 return ((ch1 & 0x1f) << 6) + (ch2 & 0x3f); 101 } 102 else if ((ch1 & 0xf0) == 0xe0) { 103 int ch2 = is.read(); 104 int ch3 = is.read(); 105 106 if (ch2 < 0) 107 throw new EOFException ("unexpected end of file in utf8 character"); 108 else if ((ch2 & 0xc0) != 0x80) 109 throw new CharConversionException ("illegal utf8 encoding at 0x" + 110 Integer.toHexString(ch2)); 111 112 if (ch3 < 0) 113 throw new EOFException ("unexpected end of file in utf8 character"); 114 else if ((ch3 & 0xc0) != 0x80) 115 throw new CharConversionException ("illegal utf8 encoding at 0x" + 116 Integer.toHexString(ch3)); 117 118 int ch = ((ch1 & 0x1f) << 12) + ((ch2 & 0x3f) << 6) + (ch3 & 0x3f); 119 120 if (ch == 0xfeff) return is.read(); 122 else 123 return ch; 124 } 125 else if ((ch1 & 0xf0) == 0xf0) { 126 int ch2 = is.read(); 127 int ch3 = is.read(); 128 int ch4 = is.read(); 129 130 if (ch2 < 0) 131 throw new EOFException ("unexpected end of file in utf8 character"); 132 else if ((ch2 & 0xc0) != 0x80) 133 throw new CharConversionException ("illegal utf8 encoding at 0x" + 134 Integer.toHexString(ch2)); 135 136 if (ch3 < 0) 137 throw new EOFException ("unexpected end of file in utf8 character"); 138 else if ((ch3 & 0xc0) != 0x80) 139 throw new CharConversionException ("illegal utf8 encoding at 0x" + 140 Integer.toHexString(ch3)); 141 142 if (ch4 < 0) 143 throw new EOFException ("unexpected end of file in utf8 character"); 144 else if ((ch4 & 0xc0) != 0x80) 145 throw new CharConversionException ("illegal utf8 encoding at 0x" + 146 Integer.toHexString(ch4)); 147 148 int ch = (((ch1 & 0xf) << 18) + 149 ((ch2 & 0x3f) << 12) + 150 ((ch3 & 0x3f) << 6) + 151 ((ch4 & 0x3f))); 152 153 _peek = 0xdc00 + (ch & 0x3ff); 154 155 return 0xd800 + ((ch - 0x10000) / 0x400); 156 } 157 else 158 throw new CharConversionException ("illegal utf8 encoding at (" + 159 (int) ch1 + ")"); 160 } 161 162 171 public int read(char []cbuf, int off, int len) 172 throws IOException 173 { 174 int i = 0; 175 176 for (i = 0; i < len; i++) { 177 int ch = read(); 178 179 if (ch < 0) 180 return i == 0 ? -1 : i; 181 182 cbuf[off + i] = (char) ch; 183 } 184 185 return i; 186 } 187 } 188 | Popular Tags |