KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > scanners > ScriptDecoder


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptDecoder.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/03/13 14:51:45 $
10
// $Revision: 1.3 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.scanners;
28
29 import org.htmlparser.lexer.Cursor;
30 import org.htmlparser.lexer.Page;
31 import org.htmlparser.util.ParserException;
32
33 /**
34  * Decode script.
35  * Script obfuscated by the <A HREF="http://www.microsoft.com/downloads/details.aspx?FamilyId=E7877F67-C447-4873-B1B0-21F0626A6329&displaylang=en" target="_parent">Windows Script Encoder</A>
36  * provided by Microsoft, is converted to plaintext. This code is based loosely
37  * on example code provided by MrBrownstone with changes by Joe Steele, see
38  * <A HREF="http://www.virtualconspiracy.com/download/scrdec14.c" target="_parent">scrdec14.c</A>.
39  */

40 public class ScriptDecoder
41 {
42     /**
43      * Termination state.
44      */

45     public static final int STATE_DONE = 0;
46
47     /**
48      * State on entry.
49      */

50     public static final int STATE_INITIAL = 1;
51
52     /**
53      * State while reading the encoded length.
54      */

55     protected static final int STATE_LENGTH = 2;
56
57     /**
58      * State when reading up to decoded text.
59      */

60     protected static final int STATE_PREFIX = 3;
61
62     /**
63      * State while decoding.
64      */

65     protected static final int STATE_DECODE = 4;
66
67     /**
68      * State when reading an escape sequence.
69      */

70     protected static final int STATE_ESCAPE = 5;
71
72     /**
73      * State when reading the checksum.
74      */

75     protected static final int STATE_CHECKSUM = 6;
76
77     /**
78      * State while exiting.
79      */

80     protected static final int STATE_FINAL = 7;
81
82     /**
83      * The state to enter when decrypting is complete.
84      * If this is STATE_DONE, the decryption will return with any characters
85      * following the encoded text still unconsumed.
86      * Otherwise, if this is STATE_INITIAL, the input will be exhausted and
87      * all following characters will be contained in the return value
88      * of the <code>Decode()</code> method.
89      */

90     public static int LAST_STATE = STATE_DONE;
91
92     /**
93      * Table of lookup choice.
94      * The decoding cycles between three flavours determined
95      * by this sequence of 64 choices, corresponding to the
96      * first dimension of the lookup table.
97      */

98     protected static byte mEncodingIndex[] =
99     {
100         1, 2, 0, 1, 2, 0, 2, 0, 0, 2, 0, 2, 1, 0, 2, 0,
101         1, 0, 2, 0, 1, 1, 2, 0, 0, 2, 1, 0, 2, 0, 0, 2,
102         1, 1, 0, 2, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 0, 2,
103         1, 0, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 0, 2,
104     };
105
106     /**
107      * Two dimensional lookup table.
108      * The decoding uses this table to determine the plaintext for
109      * characters that aren't mEscaped.
110      */

111     protected static char mLookupTable[][] =
112     {
113         {
114             '{',
115             '2', '0', '!', ')', '[', '8', '3', '=',
116             'X', ':', '5', 'e', '9', '\\', 'V', 's',
117             'f', 'N', 'E', 'k', 'b', 'Y', 'x', '^',
118             '}', 'J', 'm', 'q', 0, '`', 0, 'S',
119               0, 'B', '\'', 'H', 'r', 'u', '1', '7',
120             'M', 'R', '"', 'T', 'j', 'G', 'd', '-',
121             ' ', '', '.', 'L', ']', '~', 'l', 'o',
122             'y', 't', 'C', '&', 'v', '%', '$', '+',
123             '(', '#', 'A', '4', '\t', '*', 'D', '?',
124             'w', ';', 'U', 'i', 'a', 'c', 'P', 'g',
125             'Q', 'I', 'O', 'F', 'h', '|', '6', 'p',
126             'n', 'z', '/', '_', 'K', 'Z', ',', 'W',
127         },
128         {
129             'W',
130             '.', 'G', 'z', 'V', 'B', 'j', '/', '&',
131             'I', 'A', '4', '2', '[', 'v', 'r', 'C',
132             '8', '9', 'p', 'E', 'h', 'q', 'O', '\t',
133             'b', 'D', '#', 'u', 0, '~', 0, '^',
134               0, 'w', 'J', 'a', ']', '"', 'K', 'o',
135             'N', ';', 'L', 'P', 'g', '*', '}', 't',
136             'T', '+', '-', ',', '0', 'n', 'k', 'f',
137             '5', '%', '!', 'd', 'M', 'R', 'c', '?',
138             '{', 'x', ')', '(', 's', 'Y', '3', '',
139             'm', 'U', 'S', '|', ':', '_', 'e', 'F',
140             'X', '1', 'i', 'l', 'Z', 'H', '\'', '\\',
141             '=', '$', 'y', '7', '`', 'Q', ' ', '6',
142         },
143         {
144             'n',
145             '-', 'u', 'R', '`', 'q', '^', 'I', '\\',
146             'b', '}', ')', '6', ' ', '|', 'z', '',
147             'k', 'c', '3', '+', 'h', 'Q', 'f', 'v',
148             '1', 'd', 'T', 'C', 0, ':', 0, '~',
149               0, 'E', ',', '*', 't', '\'', '7', 'D',
150             'y', 'Y', '/', 'o', '&', 'r', 'j', '9',
151             '{', '?', '8', 'w', 'g', 'S', 'G', '4',
152             'x', ']', '0', '#', 'Z', '[', 'l', 'H',
153             'U', 'p', 'i', '.', 'L', '!', '$', 'N',
154             'P', '\t', 'V', 's', '5', 'a', 'K', 'X',
155             ';', 'W', '"', 'm', 'M', '%', '(', 'F',
156             'J', '2', 'A', '=', '_', 'O', 'B', 'e',
157         },
158     };
159
160     /**
161      * The base 64 decoding table.
162      * This array determines the value of decoded base 64 elements.
163      */

164     protected static int mDigits[];
165     static
166     {
167         mDigits = new int[0x7b];
168         for (int i = 0; i < 26; i++)
169         {
170             mDigits['A' + i] = i;
171             mDigits['a' + i] = i + 26;
172         }
173         for (int i = 0; i < 10; i++)
174             mDigits['0' + i] = i + 52;
175         mDigits[0x2b] = '>';
176         mDigits[0x2f] = '?';
177     }
178
179     /**
180      * The leader.
181      * The prefix to the encoded script is #@~^nnnnnn== where the n are the
182      * length digits in base64.
183      */

184     protected static char mLeader[] =
185     {
186         '#',
187         '@',
188         '~',
189         '^',
190     };
191
192     /**
193      * The prefix.
194      * The prfix separates the encoded text from the length.
195      */

196     protected static char mPrefix[] =
197     {
198         '=',
199         '=',
200     };
201
202     /**
203      * The trailer.
204      * The suffix to the encoded script is nnnnnn==^#~@ where the n are the
205      * checksum digits in base64. These characters are the part after the checksum.
206      */

207     protected static char mTrailer[] =
208     {
209         '=',
210         '=',
211         '^',
212         '#',
213         '~',
214         '@',
215     };
216
217     /**
218      * Escape sequence characters.
219      */

220     protected static char mEscapes[] =
221     {
222         '#',
223         '&',
224         '!',
225         '*',
226         '$',
227     };
228
229     /**
230      * The escaped characters corresponding to the each escape sequence.
231      */

232     protected static char mEscaped[] = //"\r\n<>@";
233
{
234         '\r',
235         '\n',
236         '<',
237         '>',
238         '@',
239     };
240
241     /**
242      * Extract the base 64 encoded number.
243      * This is a very limited subset of base 64 encoded characters.
244      * Six characters are expected. These are translated into a single long
245      * value. For a more complete base 64 codec see for example the base64
246      * package of <A HREF="http://sourceforge.net/projects/iharder/" target="_parent">iHarder.net</A>
247      * @param p Six base 64 encoded digits.
248      * @return The value of the decoded number.
249      */

250     protected static long decodeBase64 (char[] p)
251     {
252         long ret;
253         
254         ret = 0;
255
256         ret += (mDigits[p[0]] << 2);
257         ret += (mDigits[p[1]] >> 4);
258         ret += (mDigits[p[1]] & 0xf) << 12;
259         ret += ((mDigits[p[2]] >> 2) << 8);
260         ret += ((mDigits[p[2]] & 0x3) << 22);
261         ret += (mDigits[p[3]] << 16);
262         ret += ((mDigits[p[4]] << 2) << 24);
263         ret += ((mDigits[p[5]] >> 4) << 24);
264
265         return (ret);
266     }
267
268     /**
269      * Decode script encoded by the Microsoft obfuscator.
270      * @param page The source for encoded text.
271      * @param cursor The position at which to start decoding.
272      * This is advanced to the end of the encoded text.
273      * @return The plaintext.
274      * @exception ParserException If an error is discovered while decoding.
275      */

276     public static String JavaDoc Decode (Page page, Cursor cursor)
277         throws
278             ParserException
279     {
280         int state;
281         int substate_initial;
282         int substate_length;
283         int substate_prefix;
284         int substate_checksum;
285         int substate_final;
286         long checksum;
287         long length;
288         char buffer[];
289         buffer = new char[6];
290         int index;
291         char character;
292         int input_character;
293         boolean found;
294         StringBuffer JavaDoc ret;
295         
296         ret = new StringBuffer JavaDoc (1024);
297
298         state = STATE_INITIAL;
299         substate_initial = 0;
300         substate_length = 0;
301         substate_prefix = 0;
302         substate_checksum = 0;
303         substate_final = 0;
304         length = 0L;
305         checksum = 0L;
306         index = 0;
307         while (STATE_DONE != state)
308         {
309             input_character = page.getCharacter (cursor);
310             character = (char)input_character;
311             if (Page.EOF == input_character)
312             {
313                 if ( (STATE_INITIAL != state)
314                     || (0 != substate_initial)
315                     || (0 != substate_length)
316                     || (0 != substate_prefix)
317                     || (0 != substate_checksum)
318                     || (0 != substate_final))
319                     throw new ParserException ("illegal state for exit");
320                 state = STATE_DONE;
321             }
322             else
323                 switch (state)
324                 {
325                     case STATE_INITIAL:
326                         if (character == mLeader[substate_initial])
327                         {
328                             substate_initial++;
329                             if (substate_initial == mLeader.length)
330                             {
331                                 substate_initial = 0;
332                                 state = STATE_LENGTH;
333                             }
334                         }
335                         else
336                         {
337                             // oops, flush
338
for (int k = 0; 0 < substate_initial; k++)
339                             {
340                                 ret.append (mLeader[k++]);
341                                 substate_initial--;
342                             }
343                             ret.append (character);
344                         }
345                         break;
346
347                     case STATE_LENGTH:
348                         buffer[substate_length] = character;
349                         substate_length++;
350                         if (substate_length >= buffer.length)
351                         {
352                             length = decodeBase64 (buffer);
353                             if (0 > length)
354                                 throw new ParserException ("illegal length: " + length);
355                             substate_length = 0;
356                             state = STATE_PREFIX;
357                         }
358                         break;
359
360                     case STATE_PREFIX:
361                         if (character == mPrefix[substate_prefix])
362                             substate_prefix++;
363                         else
364                             throw new ParserException ("illegal character encountered: " + (int)character + " ('" + character + "')");
365                         if (substate_prefix >= mPrefix.length)
366                         {
367                             substate_prefix = 0;
368                             state = STATE_DECODE;
369                         }
370                         break;
371
372                     case STATE_DECODE:
373                         if ('@' == character)
374                             state = STATE_ESCAPE;
375                         else
376                         {
377                             if (input_character < 0x80)
378                             {
379                                 if (input_character == '\t')
380                                     input_character = 0;
381                                 else if (input_character >= ' ')
382                                     input_character -= ' ' - 1;
383                                 else
384                                     throw new ParserException ("illegal encoded character: " + input_character + " ('" + character + "')");
385                                 char ch = mLookupTable[mEncodingIndex[index % 64]][input_character];
386                                 ret.append (ch);
387                                 checksum += ch;
388                                 index++;
389                             }
390                             else
391                                 ret.append (character);
392                         }
393                         length--;
394                         if (0 == length)
395                         {
396                             index = 0;
397                             state = STATE_CHECKSUM;
398                         }
399                         break;
400
401                     case STATE_ESCAPE:
402                         found = false;
403                         for (int i = 0; i < mEscapes.length; i++)
404                             if (character == mEscapes[i])
405                             {
406                                 found = true;
407                                 character = mEscaped[i];
408                             }
409                         if (!found)
410                             throw new ParserException ("unexpected escape character: " + (int)character + " ('" + character + "')");
411                         ret.append (character);
412                         checksum += character;
413                         index++;
414                         state = STATE_DECODE;
415                         length--;
416                         if (0 == length)
417                         {
418                             index = 0;
419                             state = STATE_CHECKSUM;
420                         }
421                         break;
422
423                     case STATE_CHECKSUM:
424                         buffer[substate_checksum] = character;
425                         substate_checksum++;
426                         if (substate_checksum >= buffer.length)
427                         {
428                             long check = decodeBase64 (buffer);
429                             if (check != checksum)
430                                 throw new ParserException ("incorrect checksum, expected " + check + ", calculated " + checksum);
431                             checksum = 0;
432                             substate_checksum = 0;
433                             state = STATE_FINAL;
434                         }
435                         break;
436
437                     case STATE_FINAL:
438                         if (character == mTrailer[substate_final])
439                             substate_final++;
440                         else
441                             throw new ParserException ("illegal character encountered: " + (int)character + " ('" + character + "')");
442                         if (substate_final >= mTrailer.length)
443                         {
444                             substate_final = 0;
445                             state = LAST_STATE;
446                         }
447                         break;
448                     default:
449                         throw new ParserException ("invalid state: " + state);
450                 }
451         }
452
453         return (ret.toString ());
454     }
455
456 // /**
457
// * Example mainline for decrypting script.
458
// * Change a file with encrypted script into one without.
459
// * <em>WARNING: This does not preserve DOS type line endings.</em>
460
// * @param args Command line arguments. Two file names, input and output.
461
// * Optionally, the character set to use as a third argument.
462
// * @exception IOException If the input file doesn't exist, or the output
463
// * file cannot be created.
464
// * @exception ParserException If there is a decryption problem.
465
// */
466
// public static void main (String[] args)
467
// throws
468
// IOException,
469
// ParserException
470
// {
471
// String charset;
472
// FileInputStream in;
473
// Page page;
474
// Cursor cursor;
475
// String string;
476
// int ret;
477
//
478
// if (args.length < 2)
479
// {
480
// System.out.println ("Usage: java org.htmlparser.scanners.ScriptDecoder <infile> <outfile> [charset]");
481
// ret = 1;
482
// }
483
// else
484
// {
485
// if (2 < args.length)
486
// charset = args[2];
487
// else
488
// charset = "ISO-8859-1";
489
// in = new FileInputStream (args[0]);
490
// page = new Page (in, charset);
491
// cursor = new Cursor (page, 0);
492
// ScriptDecoder.LAST_STATE = STATE_INITIAL;
493
// string = ScriptDecoder.Decode (page, cursor);
494
// in.close ();
495
//
496
// FileOutputStream outfile = new FileOutputStream (args[1]);
497
// outfile.write (string.getBytes (charset));
498
// outfile.close ();
499
// ret = (0 != string.length ()) ? 0 : 1;
500
// }
501
//
502
// System.exit (ret);
503
// }
504
}
Popular Tags