1 27 package org.htmlparser.scanners; 28 29 import org.htmlparser.lexer.Cursor; 30 import org.htmlparser.lexer.Page; 31 import org.htmlparser.util.ParserException; 32 33 40 public class ScriptDecoder 41 { 42 45 public static final int STATE_DONE = 0; 46 47 50 public static final int STATE_INITIAL = 1; 51 52 55 protected static final int STATE_LENGTH = 2; 56 57 60 protected static final int STATE_PREFIX = 3; 61 62 65 protected static final int STATE_DECODE = 4; 66 67 70 protected static final int STATE_ESCAPE = 5; 71 72 75 protected static final int STATE_CHECKSUM = 6; 76 77 80 protected static final int STATE_FINAL = 7; 81 82 90 public static int LAST_STATE = STATE_DONE; 91 92 98 protected static byte mEncodingIndex[] = 99 { 100 1, 2, 0, 1, 2, 0, 2, 0, 0, 2, 0, 2, 1, 0, 2, 0, 101 1, 0, 2, 0, 1, 1, 2, 0, 0, 2, 1, 0, 2, 0, 0, 2, 102 1, 1, 0, 2, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 0, 2, 103 1, 0, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 0, 2, 104 }; 105 106 111 protected static char mLookupTable[][] = 112 { 113 { 114 '{', 115 '2', '0', '!', ')', '[', '8', '3', '=', 116 'X', ':', '5', 'e', '9', '\\', 'V', 's', 117 'f', 'N', 'E', 'k', 'b', 'Y', 'x', '^', 118 '}', 'J', 'm', 'q', 0, '`', 0, 'S', 119 0, 'B', '\'', 'H', 'r', 'u', '1', '7', 120 'M', 'R', '"', 'T', 'j', 'G', 'd', '-', 121 ' ', '', '.', 'L', ']', '~', 'l', 'o', 122 'y', 't', 'C', '&', 'v', '%', '$', '+', 123 '(', '#', 'A', '4', '\t', '*', 'D', '?', 124 'w', ';', 'U', 'i', 'a', 'c', 'P', 'g', 125 'Q', 'I', 'O', 'F', 'h', '|', '6', 'p', 126 'n', 'z', '/', '_', 'K', 'Z', ',', 'W', 127 }, 128 { 129 'W', 130 '.', 'G', 'z', 'V', 'B', 'j', '/', '&', 131 'I', 'A', '4', '2', '[', 'v', 'r', 'C', 132 '8', '9', 'p', 'E', 'h', 'q', 'O', '\t', 133 'b', 'D', '#', 'u', 0, '~', 0, '^', 134 0, 'w', 'J', 'a', ']', '"', 'K', 'o', 135 'N', ';', 'L', 'P', 'g', '*', '}', 't', 136 'T', '+', '-', ',', '0', 'n', 'k', 'f', 137 '5', '%', '!', 'd', 'M', 'R', 'c', '?', 138 '{', 'x', ')', '(', 's', 'Y', '3', '', 139 'm', 'U', 'S', '|', ':', '_', 'e', 'F', 140 'X', '1', 'i', 'l', 'Z', 'H', '\'', '\\', 141 '=', '$', 'y', '7', '`', 'Q', ' ', '6', 142 }, 143 { 144 'n', 145 '-', 'u', 'R', '`', 'q', '^', 'I', '\\', 146 'b', '}', ')', '6', ' ', '|', 'z', '', 147 'k', 'c', '3', '+', 'h', 'Q', 'f', 'v', 148 '1', 'd', 'T', 'C', 0, ':', 0, '~', 149 0, 'E', ',', '*', 't', '\'', '7', 'D', 150 'y', 'Y', '/', 'o', '&', 'r', 'j', '9', 151 '{', '?', '8', 'w', 'g', 'S', 'G', '4', 152 'x', ']', '0', '#', 'Z', '[', 'l', 'H', 153 'U', 'p', 'i', '.', 'L', '!', '$', 'N', 154 'P', '\t', 'V', 's', '5', 'a', 'K', 'X', 155 ';', 'W', '"', 'm', 'M', '%', '(', 'F', 156 'J', '2', 'A', '=', '_', 'O', 'B', 'e', 157 }, 158 }; 159 160 164 protected static int mDigits[]; 165 static 166 { 167 mDigits = new int[0x7b]; 168 for (int i = 0; i < 26; i++) 169 { 170 mDigits['A' + i] = i; 171 mDigits['a' + i] = i + 26; 172 } 173 for (int i = 0; i < 10; i++) 174 mDigits['0' + i] = i + 52; 175 mDigits[0x2b] = '>'; 176 mDigits[0x2f] = '?'; 177 } 178 179 184 protected static char mLeader[] = 185 { 186 '#', 187 '@', 188 '~', 189 '^', 190 }; 191 192 196 protected static char mPrefix[] = 197 { 198 '=', 199 '=', 200 }; 201 202 207 protected static char mTrailer[] = 208 { 209 '=', 210 '=', 211 '^', 212 '#', 213 '~', 214 '@', 215 }; 216 217 220 protected static char mEscapes[] = 221 { 222 '#', 223 '&', 224 '!', 225 '*', 226 '$', 227 }; 228 229 232 protected static char mEscaped[] = { 234 '\r', 235 '\n', 236 '<', 237 '>', 238 '@', 239 }; 240 241 250 protected static long decodeBase64 (char[] p) 251 { 252 long ret; 253 254 ret = 0; 255 256 ret += (mDigits[p[0]] << 2); 257 ret += (mDigits[p[1]] >> 4); 258 ret += (mDigits[p[1]] & 0xf) << 12; 259 ret += ((mDigits[p[2]] >> 2) << 8); 260 ret += ((mDigits[p[2]] & 0x3) << 22); 261 ret += (mDigits[p[3]] << 16); 262 ret += ((mDigits[p[4]] << 2) << 24); 263 ret += ((mDigits[p[5]] >> 4) << 24); 264 265 return (ret); 266 } 267 268 276 public static String Decode (Page page, Cursor cursor) 277 throws 278 ParserException 279 { 280 int state; 281 int substate_initial; 282 int substate_length; 283 int substate_prefix; 284 int substate_checksum; 285 int substate_final; 286 long checksum; 287 long length; 288 char buffer[]; 289 buffer = new char[6]; 290 int index; 291 char character; 292 int input_character; 293 boolean found; 294 StringBuffer ret; 295 296 ret = new StringBuffer (1024); 297 298 state = STATE_INITIAL; 299 substate_initial = 0; 300 substate_length = 0; 301 substate_prefix = 0; 302 substate_checksum = 0; 303 substate_final = 0; 304 length = 0L; 305 checksum = 0L; 306 index = 0; 307 while (STATE_DONE != state) 308 { 309 input_character = page.getCharacter (cursor); 310 character = (char)input_character; 311 if (Page.EOF == input_character) 312 { 313 if ( (STATE_INITIAL != state) 314 || (0 != substate_initial) 315 || (0 != substate_length) 316 || (0 != substate_prefix) 317 || (0 != substate_checksum) 318 || (0 != substate_final)) 319 throw new ParserException ("illegal state for exit"); 320 state = STATE_DONE; 321 } 322 else 323 switch (state) 324 { 325 case STATE_INITIAL: 326 if (character == mLeader[substate_initial]) 327 { 328 substate_initial++; 329 if (substate_initial == mLeader.length) 330 { 331 substate_initial = 0; 332 state = STATE_LENGTH; 333 } 334 } 335 else 336 { 337 for (int k = 0; 0 < substate_initial; k++) 339 { 340 ret.append (mLeader[k++]); 341 substate_initial--; 342 } 343 ret.append (character); 344 } 345 break; 346 347 case STATE_LENGTH: 348 buffer[substate_length] = character; 349 substate_length++; 350 if (substate_length >= buffer.length) 351 { 352 length = decodeBase64 (buffer); 353 if (0 > length) 354 throw new ParserException ("illegal length: " + length); 355 substate_length = 0; 356 state = STATE_PREFIX; 357 } 358 break; 359 360 case STATE_PREFIX: 361 if (character == mPrefix[substate_prefix]) 362 substate_prefix++; 363 else 364 throw new ParserException ("illegal character encountered: " + (int)character + " ('" + character + "')"); 365 if (substate_prefix >= mPrefix.length) 366 { 367 substate_prefix = 0; 368 state = STATE_DECODE; 369 } 370 break; 371 372 case STATE_DECODE: 373 if ('@' == character) 374 state = STATE_ESCAPE; 375 else 376 { 377 if (input_character < 0x80) 378 { 379 if (input_character == '\t') 380 input_character = 0; 381 else if (input_character >= ' ') 382 input_character -= ' ' - 1; 383 else 384 throw new ParserException ("illegal encoded character: " + input_character + " ('" + character + "')"); 385 char ch = mLookupTable[mEncodingIndex[index % 64]][input_character]; 386 ret.append (ch); 387 checksum += ch; 388 index++; 389 } 390 else 391 ret.append (character); 392 } 393 length--; 394 if (0 == length) 395 { 396 index = 0; 397 state = STATE_CHECKSUM; 398 } 399 break; 400 401 case STATE_ESCAPE: 402 found = false; 403 for (int i = 0; i < mEscapes.length; i++) 404 if (character == mEscapes[i]) 405 { 406 found = true; 407 character = mEscaped[i]; 408 } 409 if (!found) 410 throw new ParserException ("unexpected escape character: " + (int)character + " ('" + character + "')"); 411 ret.append (character); 412 checksum += character; 413 index++; 414 state = STATE_DECODE; 415 length--; 416 if (0 == length) 417 { 418 index = 0; 419 state = STATE_CHECKSUM; 420 } 421 break; 422 423 case STATE_CHECKSUM: 424 buffer[substate_checksum] = character; 425 substate_checksum++; 426 if (substate_checksum >= buffer.length) 427 { 428 long check = decodeBase64 (buffer); 429 if (check != checksum) 430 throw new ParserException ("incorrect checksum, expected " + check + ", calculated " + checksum); 431 checksum = 0; 432 substate_checksum = 0; 433 state = STATE_FINAL; 434 } 435 break; 436 437 case STATE_FINAL: 438 if (character == mTrailer[substate_final]) 439 substate_final++; 440 else 441 throw new ParserException ("illegal character encountered: " + (int)character + " ('" + character + "')"); 442 if (substate_final >= mTrailer.length) 443 { 444 substate_final = 0; 445 state = LAST_STATE; 446 } 447 break; 448 default: 449 throw new ParserException ("invalid state: " + state); 450 } 451 } 452 453 return (ret.toString ()); 454 } 455 456 } | Popular Tags |