1 5 6 package org.w3c.tidy; 7 8 32 33 import java.io.InputStream ; 34 import java.io.IOException ; 35 36 public class StreamInImpl extends StreamIn { 37 38 39 private static int[] Win2Unicode = 40 { 41 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 42 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000, 43 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 44 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178 45 }; 46 47 51 52 private static int[] Mac2Unicode = 53 { 54 55 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 56 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 57 58 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 59 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 60 61 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 62 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 63 64 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 65 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 66 67 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 68 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 69 70 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 71 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 72 73 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 74 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 75 76 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 77 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 78 79 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 80 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, 81 82 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, 83 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, 84 85 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 86 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, 87 88 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, 89 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, 90 91 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 92 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, 93 94 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, 95 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, 96 97 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 98 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, 99 100 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 101 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7 102 }; 103 104 public StreamInImpl(InputStream stream, int encoding, int tabsize) 105 { 106 this.stream = stream; 107 this.pushed = false; 108 this.c = (int)'\0'; 109 this.tabs = 0; 110 this.tabsize = tabsize; 111 this.curline = 1; 112 this.curcol = 1; 113 this.encoding = encoding; 114 this.state = FSM_ASCII; 115 this.endOfStream = false; 116 } 117 118 119 public int readCharFromStream() 120 { 121 int n, c, i, count; 122 123 try { 124 c = this.stream.read(); 125 126 if (c == EndOfStream) { 127 this.endOfStream = true; 128 return c; 129 } 130 131 149 150 if (this.encoding == Configuration.ISO2022) 151 { 152 if (c == 0x1b) 153 { 154 this.state = FSM_ESC; 155 return c; 156 } 157 158 switch (this.state) 159 { 160 case FSM_ESC: 161 if (c == '$') 162 this.state = FSM_ESCD; 163 else if (c == '(') 164 this.state = FSM_ESCP; 165 else 166 this.state = FSM_ASCII; 167 break; 168 169 case FSM_ESCD: 170 if (c == '(') 171 this.state = FSM_ESCDP; 172 else 173 this.state = FSM_NONASCII; 174 break; 175 176 case FSM_ESCDP: 177 this.state = FSM_NONASCII; 178 break; 179 180 case FSM_ESCP: 181 this.state = FSM_ASCII; 182 break; 183 184 case FSM_NONASCII: 185 c |= 0x80; 186 break; 187 } 188 189 return c; 190 } 191 192 if (this.encoding != Configuration.UTF8) 193 return c; 194 195 196 197 if ((c & 0xE0) == 0xC0) 198 { 199 n = c & 31; 200 count = 1; 201 } 202 else if ((c & 0xF0) == 0xE0) 203 { 204 n = c & 15; 205 count = 2; 206 } 207 else if ((c & 0xF8) == 0xF0) 208 { 209 n = c & 7; 210 count = 3; 211 } 212 else if ((c & 0xFC) == 0xF8) 213 { 214 n = c & 3; 215 count = 4; 216 } 217 else if ((c & 0xFE) == 0xFC) 218 { 219 n = c & 1; 220 count = 5; 221 } 222 else 223 return c; 224 225 226 for (i = 1; i <= count; ++i) 227 { 228 c = this.stream.read(); 229 230 if (c == EndOfStream) { 231 this.endOfStream = true; 232 return c; 233 } 234 235 n = (n << 6) | (c & 0x3F); 236 } 237 } 238 catch (IOException e) { 239 System.err.println("StreamInImpl.readCharFromStream: " + e.toString()); 240 n = EndOfStream; 241 } 242 243 return n; 244 } 245 246 public int readChar() 247 { 248 int c; 249 250 if (this.pushed) 251 { 252 this.pushed = false; 253 c = this.c; 254 255 if (c == '\n') 256 { 257 this.curcol = 1; 258 this.curline++; 259 return c; 260 } 261 262 this.curcol++; 263 return c; 264 } 265 266 this.lastcol = this.curcol; 267 268 if (this.tabs > 0) 269 { 270 this.curcol++; 271 this.tabs--; 272 return ' '; 273 } 274 275 for (;;) 276 { 277 c = readCharFromStream(); 278 279 if (c < 0) 280 return EndOfStream; 281 282 if (c == '\n') 283 { 284 this.curcol = 1; 285 this.curline++; 286 break; 287 } 288 289 if (c == '\r') 290 { 291 c = readCharFromStream(); 292 if (c != '\n') 293 { 294 ungetChar(c); 295 c = '\n'; 296 } 297 this.curcol = 1; 298 this.curline++; 299 break; 300 } 301 302 if (c == '\t') 303 { 304 this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1; 305 this.curcol++; 306 c = ' '; 307 break; 308 } 309 310 311 312 if (c == '\033') 313 break; 314 315 if (0 < c && c < 32) 316 continue; 317 318 319 320 if (this.encoding == Configuration.RAW || 321 this.encoding == Configuration.ISO2022) 322 { 323 this.curcol++; 324 break; 325 } 326 327 if (this.encoding == Configuration.MACROMAN) 328 c = Mac2Unicode[c]; 329 330 331 332 if (127 < c && c < 160) 333 { 334 Report.encodingError((Lexer)this.lexer, Report.WINDOWS_CHARS, c); 335 336 c = Win2Unicode[c - 128]; 337 338 if (c == 0) 339 continue; 340 } 341 342 this.curcol++; 343 break; 344 } 345 346 return c; 347 } 348 349 public void ungetChar(int c) 350 { 351 this.pushed = true; 352 this.c = c; 353 354 if (c == '\n') 355 { 356 --this.curline; 357 } 358 359 this.curcol = this.lastcol; 360 } 361 362 public boolean isEndOfStream() 363 { 364 return this.endOfStream; 365 } 366 367 } 368 | Popular Tags |