KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > hotsax > html > sax > HtmlParser


1 //### This file created by BYACC 1.8(/Java extension 1.1)
2
//### Java capabilities added 7 Jan 97, Bob Jamison
3
//### Updated : 27 Nov 97 -- Bob Jamison, Joe Nieten
4
//### 01 Jan 98 -- Bob Jamison -- fixed generic semantic constructor
5
//### 01 Jun 99 -- Bob Jamison -- added Runnable support
6
//### 06 Aug 00 -- Bob Jamison -- made state variables class-global
7
//### 03 Jan 01 -- Bob Jamison -- improved flags, tracing
8
//### 16 May 01 -- Bob Jamison -- added custom stack sizing
9
//### Please send bug reports to rjamison@lincom-asg.com
10
//### static char yysccsid[] = "@(#)yaccpar 1.8 (Berkeley) 01/20/90";
11

12
13
14 package hotsax.html.sax;
15
16
17
18 //#line 3 "HtmlParser.y"
19

20 /* package name generated by BYACC/J command line: -J*/
21
22 import java.io.*;
23 //#line 20 "HtmlParser.java"
24

25
26
27
28 /**
29  * Encapsulates yacc() parser functionality in a Java
30  * class for quick code development
31  */

32 public class HtmlParser
33 {
34
35 boolean yydebug; //do I want debug output?
36
int yynerrs; //number of errors so far
37
int yyerrflag; //was there an error?
38
int yychar; //the current working character
39

40 //########## MESSAGES ##########
41
//###############################################################
42
// method: debug
43
//###############################################################
44
void debug(String JavaDoc msg)
45 {
46   if (yydebug)
47     System.out.println(msg);
48 }
49
50 //########## STATE STACK ##########
51
final static int YYSTACKSIZE = 500; //maximum stack sizeint statestk[],stateptr; //state stack
52
int stateptrmax; //highest index of stackptr
53
int statemax; //state when highest index reached
54
//###############################################################
55
// methods: state stack push,pop,drop,peek
56
//###############################################################
57
void state_push(int state)
58 {
59   if (stateptr>=YYSTACKSIZE) //overflowed?
60
return;
61   statestk[++stateptr]=state;
62   if (stateptr>statemax)
63     {
64     statemax=state;
65     stateptrmax=stateptr;
66     }
67 }
68 int state_pop()
69 {
70   if (stateptr<0) //underflowed?
71
return -1;
72   return statestk[stateptr--];
73 }
74 void state_drop(int cnt)
75 {
76 int ptr;
77   ptr=stateptr-cnt;
78   if (ptr<0)
79     return;
80   stateptr = ptr;
81 }
82 int state_peek(int relative)
83 {
84 int ptr;
85   ptr=stateptr-relative;
86   if (ptr<0)
87     return -1;
88   return statestk[ptr];
89 }
90 //###############################################################
91
// method: init_stacks : allocate and prepare stacks
92
//###############################################################
93
boolean init_stacks()
94 {
95   statestk = new int[YYSTACKSIZE];
96   stateptr = -1;
97   statemax = -1;
98   stateptrmax = -1;
99   val_init();
100   return true;
101 }
102 //###############################################################
103
// method: dump_stacks : show n levels of the stacks
104
//###############################################################
105
void dump_stacks(int count)
106 {
107 int i;
108   System.out.println("=index==state====value= s:"+stateptr+" v:"+valptr);
109   for (i=0;i<count;i++)
110     System.out.println(" "+i+" "+statestk[i]+" "+valstk[i]);
111   System.out.println("======================");
112 }
113
114
115 //########## SEMANTIC VALUES ##########
116
//public class HtmlParserVal is defined in HtmlParserVal.java
117

118
119 String JavaDoc yytext;//user variable to return contextual strings
120
HtmlParserVal yyval; //used to return semantic vals from action routines
121
HtmlParserVal yylval;//the 'lval' (result) I got from yylex()
122
HtmlParserVal valstk[];
123 int valptr;
124 //###############################################################
125
// methods: value stack push,pop,drop,peek.
126
//###############################################################
127
void val_init()
128 {
129   valstk=new HtmlParserVal[YYSTACKSIZE];
130   yyval=new HtmlParserVal(0);
131   yylval=new HtmlParserVal(0);
132   valptr=-1;
133 }
134 void val_push(HtmlParserVal val)
135 {
136   if (valptr>=YYSTACKSIZE)
137     return;
138   valstk[++valptr]=val;
139 }
140 HtmlParserVal val_pop()
141 {
142   if (valptr<0)
143     return new HtmlParserVal(-1);
144   return valstk[valptr--];
145 }
146 void val_drop(int cnt)
147 {
148 int ptr;
149   ptr=valptr-cnt;
150   if (ptr<0)
151     return;
152   valptr = ptr;
153 }
154 HtmlParserVal val_peek(int relative)
155 {
156 int ptr;
157   ptr=valptr-relative;
158   if (ptr<0)
159     return new HtmlParserVal(-1);
160   return valstk[ptr];
161 }
162 //#### end semantic value section ####
163
public final static short UNDEFINED=257;
164 public final static short SOF=258;
165 public final static short TAG_START=259;
166 public final static short TAG_END=260;
167 public final static short TAG_EMPTY=261;
168 public final static short ATTR=262;
169 public final static short VAL=263;
170 public final static short TEXT=264;
171 public final static short COMMENT=265;
172 public final static short PI=266;
173 public final static short DOCTYPE=267;
174 public final static short CDATA=268;
175 public final static short TAG_START_COMPLETE=269;
176 public final static short EOF=270;
177 public final static short YYERRCODE=256;
178 final static short yylhs[] = { -1,
179     0, 1, 0, 3, 0, 2, 2, 4, 5, 5,
180     5, 5, 5, 5, 5, 5, 5, 5, 5,
181 };
182 final static short yylen[] = { 2,
183     0, 0, 3, 0, 4, 1, 2, 1, 1, 1,
184     1, 1, 1, 1, 1, 1, 1, 1, 1,
185 };
186 final static short yydefred[] = { 0,
187     0, 0, 0, 0, 3, 9, 11, 14, 12, 13,
188    18, 19, 16, 15, 17, 10, 0, 6, 8, 5,
189     7,
190 };
191 final static short yydgoto[] = { 2,
192     3, 17, 4, 18, 19,
193 };
194 final static short yysindex[] = { -223,
195     0, 0, -234, -235, 0, 0, 0, 0, 0, 0,
196     0, 0, 0, 0, 0, 0, -259, 0, 0, 0,
197     0,
198 };
199 final static short yyrindex[] = { 37,
200  -247, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202     0,
203 };
204 final static short yygindex[] = { 0,
205     0, 0, 0, 21, 0,
206 };
207 final static int YYTABLESIZE=38;
208 final static short yytable[] = { 6,
209     7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
210    20, 4, 4, 4, 4, 4, 4, 4, 4, 4,
211     4, 4, 2, 6, 7, 8, 9, 10, 11, 12,
212    13, 14, 15, 16, 1, 5, 1, 21,
213 };
214 final static short yycheck[] = { 259,
215   260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
216   270, 259, 260, 261, 262, 263, 264, 265, 266, 267,
217   268, 269, 270, 259, 260, 261, 262, 263, 264, 265,
218   266, 267, 268, 269, 258, 270, 0, 17,
219 };
220 final static short YYFINAL=2;
221 final static short YYMAXTOKEN=270;
222 final static String JavaDoc yyname[] = {
223 "end-of-file",null,null,null,null,null,null,null,null,null,null,null,null,null,
224 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
225 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
226 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
227 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
228 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
229 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
230 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
231 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
232 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
233 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
234 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
235 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
236 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
237 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
238 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
239 null,null,null,"UNDEFINED","SOF","TAG_START","TAG_END","TAG_EMPTY","ATTR","VAL",
240 "TEXT","COMMENT","PI","DOCTYPE","CDATA","TAG_START_COMPLETE","EOF",
241 };
242 final static String JavaDoc yyrule[] = {
243 "$accept : document",
244 "document :",
245 "$$1 :",
246 "document : SOF $$1 EOF",
247 "$$2 :",
248 "document : SOF $$2 docstuff EOF",
249 "docstuff : start",
250 "docstuff : docstuff start",
251 "start : elements",
252 "elements : TAG_START",
253 "elements : TAG_START_COMPLETE",
254 "elements : TAG_END",
255 "elements : ATTR",
256 "elements : VAL",
257 "elements : TAG_EMPTY",
258 "elements : DOCTYPE",
259 "elements : PI",
260 "elements : CDATA",
261 "elements : TEXT",
262 "elements : COMMENT",
263 };
264
265 //#line 49 "HtmlParser.y"
266

267   private HtmlLexer lexer;
268
269   private int stateptr; // possible bug in parser generator
270
private int statestk[];
271
272
273   /* Helpers */
274
275   private ParserDelegate delegate;
276   
277   public ParserDelegate getDelegate() { return delegate; }
278
279   private HtmlParserVal tagName;
280
281
282   private int yylex () {
283     int yyl_token = -1;
284     try {
285       yyl_token = lexer._yylex();
286     
287         if (yydebug)
288             System.out.println("token: " + yyl_token + " " + yyname[yyl_token] + " " + yylval.toString());
289     }
290     catch (IOException e) {
291       System.err.println("IO error :"+e);
292     }
293     return yyl_token;
294   }
295
296
297   public void yyerror (String JavaDoc error) {
298     System.err.println ("Error: " + error);
299   }
300
301
302   public HtmlParser(Reader r) {
303     lexer = new HtmlLexer(r, this);
304     delegate = new ParserDelegate(this);
305   }
306
307
308
309   
310     // testing -----------
311
static boolean interactive;
312
313   public static void main(String JavaDoc args[]) throws IOException {
314     System.out.println("BYACC/Java with JFlex HtmlParser");
315
316     HtmlParser yyparser;
317     if ( args.length > 0 ) {
318       // parse a file
319
yyparser = new HtmlParser(new FileReader(args[0]));
320        if (args.length > 1)
321             yyparser.yydebug = true;
322     }
323     else {
324       // interactive mode
325
System.out.println("[Quit with CTRL-D]");
326       System.out.print("Expression: ");
327       interactive = true;
328         yyparser = new HtmlParser(new InputStreamReader(System.in));
329     }
330
331     yyparser.yyparse();
332     
333     if (interactive) {
334       System.out.println();
335       System.out.println("Have a nice day");
336     }
337   }
338 //#line 284 "HtmlParser.java"
339
//###############################################################
340
// method: yylexdebug : check lexer state
341
//###############################################################
342
void yylexdebug(int state,int ch)
343 {
344 String JavaDoc s=null;
345   if (ch < 0) ch=0;
346   if (ch <= YYMAXTOKEN) //check index bounds
347
s = yyname[ch]; //now get it
348
if (s==null)
349     s = "illegal-symbol";
350   debug("state "+state+", reading "+ch+" ("+s+")");
351 }
352
353
354
355
356
357 //The following are now global, to aid in error reporting
358
int yyn; //next next thing to do
359
int yym; //
360
int yystate; //current parsing state from state table
361
String JavaDoc yys; //current token string
362

363
364 //###############################################################
365
// method: yyparse : parse input and execute indicated items
366
//###############################################################
367
int yyparse()
368 {
369 boolean doaction;
370   init_stacks();
371   yynerrs = 0;
372   yyerrflag = 0;
373   yychar = -1; //impossible char forces a read
374
yystate=0; //initial state
375
state_push(yystate); //save it
376
while (true) //until parsing is done, either correctly, or w/error
377
{
378     doaction=true;
379     if (yydebug) debug("loop");
380     //#### NEXT ACTION (from reduction table)
381
for (yyn=yydefred[yystate];yyn==0;yyn=yydefred[yystate])
382       {
383       if (yydebug) debug("yyn:"+yyn+" state:"+yystate+" yychar:"+yychar);
384       if (yychar < 0) //we want a char?
385
{
386         yychar = yylex(); //get next token
387
if (yydebug) debug(" next yychar:"+yychar);
388         //#### ERROR CHECK ####
389
if (yychar < 0) //it it didn't work/error
390
{
391           yychar = 0; //change it to default string (no -1!)
392
if (yydebug)
393             yylexdebug(yystate,yychar);
394           }
395         }//yychar<0
396
yyn = yysindex[yystate]; //get amount to shift by (shift index)
397
if ((yyn != 0) && (yyn += yychar) >= 0 &&
398           yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
399         {
400         if (yydebug)
401           debug("state "+yystate+", shifting to state "+yytable[yyn]);
402         //#### NEXT STATE ####
403
yystate = yytable[yyn];//we are in a new state
404
state_push(yystate); //save it
405
val_push(yylval); //push our lval as the input for next rule
406
yychar = -1; //since we have 'eaten' a token, say we need another
407
if (yyerrflag > 0) //have we recovered an error?
408
--yyerrflag; //give ourselves credit
409
doaction=false; //but don't process yet
410
break; //quit the yyn=0 loop
411
}
412
413     yyn = yyrindex[yystate]; //reduce
414
if ((yyn !=0 ) && (yyn += yychar) >= 0 &&
415             yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
416       { //we reduced!
417
if (yydebug) debug("reduce");
418       yyn = yytable[yyn];
419       doaction=true; //get ready to execute
420
break; //drop down to actions
421
}
422     else //ERROR RECOVERY
423
{
424       if (yyerrflag==0)
425         {
426         yyerror("syntax error");
427         yynerrs++;
428         }
429       if (yyerrflag < 3) //low error count?
430
{
431         yyerrflag = 3;
432         while (true) //do until break
433
{
434           if (stateptr<0) //check for under & overflow here
435
{
436             yyerror("stack underflow. aborting..."); //note lower case 's'
437
return 1;
438             }
439           yyn = yysindex[state_peek(0)];
440           if ((yyn != 0) && (yyn += YYERRCODE) >= 0 &&
441                     yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)
442             {
443             if (yydebug)
444               debug("state "+state_peek(0)+", error recovery shifting to state "+yytable[yyn]+" ");
445             yystate = yytable[yyn];
446             state_push(yystate);
447             val_push(yylval);
448             doaction=false;
449             break;
450             }
451           else
452             {
453             if (yydebug)
454               debug("error recovery discarding state "+state_peek(0)+" ");
455             if (stateptr<0) //check for under & overflow here
456
{
457               yyerror("Stack underflow. aborting..."); //capital 'S'
458
return 1;
459               }
460             state_pop();
461             val_pop();
462             }
463           }
464         }
465       else //discard this token
466
{
467         if (yychar == 0)
468           return 1; //yyabort
469
if (yydebug)
470           {
471           yys = null;
472           if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
473           if (yys == null) yys = "illegal-symbol";
474           debug("state "+yystate+", error recovery discards token "+yychar+" ("+yys+")");
475           }
476         yychar = -1; //read another
477
}
478       }//end error recovery
479
}//yyn=0 loop
480
if (!doaction) //any reason not to proceed?
481
continue; //skip action
482
yym = yylen[yyn]; //get count of terminals on rhs
483
if (yydebug)
484       debug("state "+yystate+", reducing "+yym+" by rule "+yyn+" ("+yyrule[yyn]+")");
485     if (yym>0) //if count of rhs not 'nil'
486
yyval = val_peek(yym-1); //get current semantic value
487
switch(yyn)
488       {
489 //########## USER-SUPPLIED ACTIONS ##########
490
case 2:
491 //#line 18 "HtmlParser.y"
492
{ delegate.startDocument(); }
493 break;
494 case 3:
495 //#line 19 "HtmlParser.y"
496
{ delegate.endDocument(); }
497 break;
498 case 4:
499 //#line 20 "HtmlParser.y"
500
{ delegate.startDocument(); }
501 break;
502 case 5:
503 //#line 22 "HtmlParser.y"
504
{ delegate.endDocument(); }
505 break;
506 case 9:
507 //#line 32 "HtmlParser.y"
508
{ delegate.startElement(); tagName = yyval; }
509 break;
510 case 10:
511 //#line 33 "HtmlParser.y"
512
{ delegate.startElement(tagName); }
513 break;
514 case 11:
515 //#line 34 "HtmlParser.y"
516
{ delegate.endElement(yyval); }
517 break;
518 case 12:
519 //#line 35 "HtmlParser.y"
520
{ delegate.addAttribute(yyval); }
521 break;
522 case 14:
523 //#line 37 "HtmlParser.y"
524
{ delegate.startElement(); tagName = yyval; }
525 break;
526 case 15:
527 //#line 38 "HtmlParser.y"
528
{ delegate.startDTD(yyval); }
529 break;
530 case 16:
531 //#line 39 "HtmlParser.y"
532
{ delegate.processingInstruction(yyval); }
533 break;
534 case 17:
535 //#line 40 "HtmlParser.y"
536
{ delegate.startCDATA(); }
537 break;
538 case 18:
539 //#line 41 "HtmlParser.y"
540
{ delegate.characters(yyval); }
541 break;
542 case 19:
543 //#line 42 "HtmlParser.y"
544
{ delegate.comment(yyval); }
545 break;
546 //#line 487 "HtmlParser.java"
547
//########## END OF USER-SUPPLIED ACTIONS ##########
548
}//switch
549
//#### Now let's reduce... ####
550
if (yydebug) debug("reduce");
551     state_drop(yym); //we just reduced yylen states
552
yystate = state_peek(0); //get new state
553
val_drop(yym); //corresponding value drop
554
yym = yylhs[yyn]; //select next TERMINAL(on lhs)
555
if (yystate == 0 && yym == 0)//done? 'rest' state and at first TERMINAL
556
{
557       debug("After reduction, shifting from state 0 to state "+YYFINAL+"");
558       yystate = YYFINAL; //explicitly say we're done
559
state_push(YYFINAL); //and save it
560
val_push(yyval); //also save the semantic value of parsing
561
if (yychar < 0) //we want another character?
562
{
563         yychar = yylex(); //get next character
564
if (yychar<0) yychar=0; //clean, if necessary
565
if (yydebug)
566           yylexdebug(yystate,yychar);
567         }
568       if (yychar == 0) //Good exit (if lex returns 0 ;-)
569
break; //quit the loop--all DONE
570
}//if yystate
571
else //else not done yet
572
{ //get next state and push, for next yydefred[]
573
yyn = yygindex[yym]; //find out where to go
574
if ((yyn != 0) && (yyn += yystate) >= 0 &&
575             yyn <= YYTABLESIZE && yycheck[yyn] == yystate)
576         yystate = yytable[yyn]; //get new state
577
else
578         yystate = yydgoto[yym]; //else go to new defred
579
debug("after reduction, shifting from state "+state_peek(0)+" to state "+yystate+"");
580       state_push(yystate); //going again, so push state & val...
581
val_push(yyval); //for next action
582
}
583     }//main loop
584
return 0;//yyaccept!!
585
}
586 //## end of method parse() ######################################
587

588
589
590 //## run() --- for Thread #######################################
591
/**
592  * A default run method, used for operating this parser
593  * object in the background. It is intended for extending Thread
594  * or implementing Runnable. Turn off with -Jnorun .
595  */

596 public void run()
597 {
598   yyparse();
599 }
600 //## end of method run() ########################################
601

602
603
604 //## Constructors ###############################################
605
/**
606  * Default constructor. Turn off with -Jnoconstruct .
607
608  */

609 public HtmlParser()
610 {
611   //nothing to do
612
}
613
614
615 /**
616  * Create a parser, setting the debug to true or false.
617  * @param debugMe true for debugging, false for no debug.
618  */

619 public HtmlParser(boolean debugMe)
620 {
621   yydebug=debugMe;
622 }
623 //###############################################################
624

625
626
627 }
628 //################### END OF CLASS ##############################
629
Popular Tags