1 28 29 package com.caucho.xml.readers; 30 31 import com.caucho.util.CharBuffer; 32 import com.caucho.vfs.ReadStream; 33 import com.caucho.xml.XmlParser; 34 35 import java.io.CharConversionException ; 36 import java.io.EOFException ; 37 import java.io.IOException ; 38 39 42 public class Utf8Reader extends XmlReader { 43 46 public Utf8Reader() 47 { 48 } 49 50 53 public Utf8Reader(XmlParser parser, ReadStream is) 54 { 55 super(parser, is); 56 } 57 58 61 public int read() 62 throws IOException 63 { 64 int ch1 = _is.read(); 65 66 if (ch1 == '\n') { 67 _parser.setLine(++_line); 68 return ch1; 69 } 70 else if (ch1 == '\r') { 71 _parser.setLine(++_line); 72 73 int ch2 = _is.read(); 74 if (ch2 == '\n') 75 return '\n'; 76 77 if (ch2 < 0) { 78 } 79 else if (ch2 < 0x80) 80 _parser.unread(ch2); 81 else 82 _parser.unread(readSecond(ch2)); 83 84 return '\n'; 85 } 86 else if (ch1 < 0x80) 87 return ch1; 88 else 89 return readSecond(ch1); 90 } 91 92 private int readSecond(int ch1) 93 throws IOException 94 { 95 if ((ch1 & 0xe0) == 0xc0) { 96 int ch2 = _is.read(); 97 if (ch2 < 0) 98 throw new EOFException ("unexpected end of file in utf8 character"); 99 else if ((ch2 & 0xc0) != 0x80) 100 throw error(L.l("illegal utf8 encoding {0}", hex(ch1))); 101 102 return ((ch1 & 0x1f) << 6) + (ch2 & 0x3f); 103 } 104 else if ((ch1 & 0xf0) == 0xe0) { 105 int ch2 = _is.read(); 106 int ch3 = _is.read(); 107 108 if (ch2 < 0) 109 throw new EOFException ("unexpected end of file in utf8 character"); 110 else if ((ch2 & 0xc0) != 0x80) 111 throw error(L.l("illegal utf8 encoding at {0} {1} {2}", hex(ch1), hex(ch2), hex(ch3))); 112 113 if (ch3 < 0) 114 throw new EOFException ("unexpected end of file in utf8 character"); 115 else if ((ch3 & 0xc0) != 0x80) 116 throw error(L.l("illegal utf8 encoding {0} {1} {2}", 117 hex(ch1), hex(ch2), hex(ch3))); 118 119 int ch = ((ch1 & 0x1f) << 12) + ((ch2 & 0x3f) << 6) + (ch3 & 0x3f); 120 121 if (ch == 0xfeff) return read(); 123 else 124 return ch; 125 } 126 else 127 throw error(L.l("illegal utf8 encoding at {0}", hex(ch1))); 128 } 129 130 private String hex(int n) 131 { 132 n = n & 0xff; 133 134 CharBuffer cb = CharBuffer.allocate(); 135 136 cb.append("0x"); 137 138 int d = n / 16; 139 if (d >= 0 && d <= 9) 140 cb.append((char) ('0' + d)); 141 else 142 cb.append((char) ('a' + d - 10)); 143 144 d = n % 16; 145 if (d >= 0 && d <= 9) 146 cb.append((char) ('0' + d)); 147 else 148 cb.append((char) ('a' + d - 10)); 149 150 return cb.close(); 151 } 152 153 private CharConversionException error(String msg) 154 { 155 String filename = _parser.getFilename(); 156 int line = _parser.getLine(); 157 158 if (filename != null) 159 return new CharConversionException (filename + ":" + line + ": " + msg); 160 else 161 return new CharConversionException (msg); 162 } 163 } 164 165 | Popular Tags |