1 12 13 14 package hotsax.html.sax; 15 16 17 18 20 21 22 import java.io.*; 23 25 26 27 28 32 public class HtmlParser 33 { 34 35 boolean yydebug; int yynerrs; int yyerrflag; int yychar; 40 void debug(String msg) 45 { 46 if (yydebug) 47 System.out.println(msg); 48 } 49 50 final static int YYSTACKSIZE = 500; int stateptrmax; int statemax; void state_push(int state) 58 { 59 if (stateptr>=YYSTACKSIZE) return; 61 statestk[++stateptr]=state; 62 if (stateptr>statemax) 63 { 64 statemax=state; 65 stateptrmax=stateptr; 66 } 67 } 68 int state_pop() 69 { 70 if (stateptr<0) return -1; 72 return statestk[stateptr--]; 73 } 74 void state_drop(int cnt) 75 { 76 int ptr; 77 ptr=stateptr-cnt; 78 if (ptr<0) 79 return; 80 stateptr = ptr; 81 } 82 int state_peek(int relative) 83 { 84 int ptr; 85 ptr=stateptr-relative; 86 if (ptr<0) 87 return -1; 88 return statestk[ptr]; 89 } 90 boolean init_stacks() 94 { 95 statestk = new int[YYSTACKSIZE]; 96 stateptr = -1; 97 statemax = -1; 98 stateptrmax = -1; 99 val_init(); 100 return true; 101 } 102 void dump_stacks(int count) 106 { 107 int i; 108 System.out.println("=index==state====value= s:"+stateptr+" v:"+valptr); 109 for (i=0;i<count;i++) 110 System.out.println(" "+i+" "+statestk[i]+" "+valstk[i]); 111 System.out.println("======================"); 112 } 113 114 115 118 119 String yytext;HtmlParserVal yyval; HtmlParserVal yylval;HtmlParserVal valstk[]; 123 int valptr; 124 void val_init() 128 { 129 valstk=new HtmlParserVal[YYSTACKSIZE]; 130 yyval=new HtmlParserVal(0); 131 yylval=new HtmlParserVal(0); 132 valptr=-1; 133 } 134 void val_push(HtmlParserVal val) 135 { 136 if (valptr>=YYSTACKSIZE) 137 return; 138 valstk[++valptr]=val; 139 } 140 HtmlParserVal val_pop() 141 { 142 if (valptr<0) 143 return new HtmlParserVal(-1); 144 return valstk[valptr--]; 145 } 146 void val_drop(int cnt) 147 { 148 int ptr; 149 ptr=valptr-cnt; 150 if (ptr<0) 151 return; 152 valptr = ptr; 153 } 154 HtmlParserVal val_peek(int relative) 155 { 156 int ptr; 157 ptr=valptr-relative; 158 if (ptr<0) 159 return new HtmlParserVal(-1); 160 return valstk[ptr]; 161 } 162 public final static short UNDEFINED=257; 164 public final static short SOF=258; 165 public final static short TAG_START=259; 166 public final static short TAG_END=260; 167 public final static short TAG_EMPTY=261; 168 public final static short ATTR=262; 169 public final static short VAL=263; 170 public final static short TEXT=264; 171 public final static short COMMENT=265; 172 public final static short PI=266; 173 public final static short DOCTYPE=267; 174 public final static short CDATA=268; 175 public final static short TAG_START_COMPLETE=269; 176 public final static short EOF=270; 177 public final static short YYERRCODE=256; 178 final static short yylhs[] = { -1, 179 0, 1, 0, 3, 0, 2, 2, 4, 5, 5, 180 5, 5, 5, 5, 5, 5, 5, 5, 5, 181 }; 182 final static short yylen[] = { 2, 183 0, 0, 3, 0, 4, 1, 2, 1, 1, 1, 184 1, 1, 1, 1, 1, 1, 1, 1, 1, 185 }; 186 final static short yydefred[] = { 0, 187 0, 0, 0, 0, 3, 9, 11, 14, 12, 13, 188 18, 19, 16, 15, 17, 10, 0, 6, 8, 5, 189 7, 190 }; 191 final static short yydgoto[] = { 2, 192 3, 17, 4, 18, 19, 193 }; 194 final static short yysindex[] = { -223, 195 0, 0, -234, -235, 0, 0, 0, 0, 0, 0, 196 0, 0, 0, 0, 0, 0, -259, 0, 0, 0, 197 0, 198 }; 199 final static short yyrindex[] = { 37, 200 -247, 0, 0, 0, 0, 0, 0, 0, 0, 0, 201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 202 0, 203 }; 204 final static short yygindex[] = { 0, 205 0, 0, 0, 21, 0, 206 }; 207 final static int YYTABLESIZE=38; 208 final static short yytable[] = { 6, 209 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 210 20, 4, 4, 4, 4, 4, 4, 4, 4, 4, 211 4, 4, 2, 6, 7, 8, 9, 10, 11, 12, 212 13, 14, 15, 16, 1, 5, 1, 21, 213 }; 214 final static short yycheck[] = { 259, 215 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 216 270, 259, 260, 261, 262, 263, 264, 265, 266, 267, 217 268, 269, 270, 259, 260, 261, 262, 263, 264, 265, 218 266, 267, 268, 269, 258, 270, 0, 17, 219 }; 220 final static short YYFINAL=2; 221 final static short YYMAXTOKEN=270; 222 final static String yyname[] = { 223 "end-of-file",null,null,null,null,null,null,null,null,null,null,null,null,null, 224 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 225 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 226 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 227 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 228 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 229 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 230 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 231 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 232 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 233 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 234 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 235 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 236 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 237 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 238 null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, 239 null,null,null,"UNDEFINED","SOF","TAG_START","TAG_END","TAG_EMPTY","ATTR","VAL", 240 "TEXT","COMMENT","PI","DOCTYPE","CDATA","TAG_START_COMPLETE","EOF", 241 }; 242 final static String yyrule[] = { 243 "$accept : document", 244 "document :", 245 "$$1 :", 246 "document : SOF $$1 EOF", 247 "$$2 :", 248 "document : SOF $$2 docstuff EOF", 249 "docstuff : start", 250 "docstuff : docstuff start", 251 "start : elements", 252 "elements : TAG_START", 253 "elements : TAG_START_COMPLETE", 254 "elements : TAG_END", 255 "elements : ATTR", 256 "elements : VAL", 257 "elements : TAG_EMPTY", 258 "elements : DOCTYPE", 259 "elements : PI", 260 "elements : CDATA", 261 "elements : TEXT", 262 "elements : COMMENT", 263 }; 264 265 267 private HtmlLexer lexer; 268 269 private int stateptr; private int statestk[]; 271 272 273 274 275 private ParserDelegate delegate; 276 277 public ParserDelegate getDelegate() { return delegate; } 278 279 private HtmlParserVal tagName; 280 281 282 private int yylex () { 283 int yyl_token = -1; 284 try { 285 yyl_token = lexer._yylex(); 286 287 if (yydebug) 288 System.out.println("token: " + yyl_token + " " + yyname[yyl_token] + " " + yylval.toString()); 289 } 290 catch (IOException e) { 291 System.err.println("IO error :"+e); 292 } 293 return yyl_token; 294 } 295 296 297 public void yyerror (String error) { 298 System.err.println ("Error: " + error); 299 } 300 301 302 public HtmlParser(Reader r) { 303 lexer = new HtmlLexer(r, this); 304 delegate = new ParserDelegate(this); 305 } 306 307 308 309 310 static boolean interactive; 312 313 public static void main(String args[]) throws IOException { 314 System.out.println("BYACC/Java with JFlex HtmlParser"); 315 316 HtmlParser yyparser; 317 if ( args.length > 0 ) { 318 yyparser = new HtmlParser(new FileReader(args[0])); 320 if (args.length > 1) 321 yyparser.yydebug = true; 322 } 323 else { 324 System.out.println("[Quit with CTRL-D]"); 326 System.out.print("Expression: "); 327 interactive = true; 328 yyparser = new HtmlParser(new InputStreamReader(System.in)); 329 } 330 331 yyparser.yyparse(); 332 333 if (interactive) { 334 System.out.println(); 335 System.out.println("Have a nice day"); 336 } 337 } 338 void yylexdebug(int state,int ch) 343 { 344 String s=null; 345 if (ch < 0) ch=0; 346 if (ch <= YYMAXTOKEN) s = yyname[ch]; if (s==null) 349 s = "illegal-symbol"; 350 debug("state "+state+", reading "+ch+" ("+s+")"); 351 } 352 353 354 355 356 357 int yyn; int yym; int yystate; String yys; 363 364 int yyparse() 368 { 369 boolean doaction; 370 init_stacks(); 371 yynerrs = 0; 372 yyerrflag = 0; 373 yychar = -1; yystate=0; state_push(yystate); while (true) { 378 doaction=true; 379 if (yydebug) debug("loop"); 380 for (yyn=yydefred[yystate];yyn==0;yyn=yydefred[yystate]) 382 { 383 if (yydebug) debug("yyn:"+yyn+" state:"+yystate+" yychar:"+yychar); 384 if (yychar < 0) { 386 yychar = yylex(); if (yydebug) debug(" next yychar:"+yychar); 388 if (yychar < 0) { 391 yychar = 0; if (yydebug) 393 yylexdebug(yystate,yychar); 394 } 395 } yyn = yysindex[yystate]; if ((yyn != 0) && (yyn += yychar) >= 0 && 398 yyn <= YYTABLESIZE && yycheck[yyn] == yychar) 399 { 400 if (yydebug) 401 debug("state "+yystate+", shifting to state "+yytable[yyn]); 402 yystate = yytable[yyn]; state_push(yystate); val_push(yylval); yychar = -1; if (yyerrflag > 0) --yyerrflag; doaction=false; break; } 412 413 yyn = yyrindex[yystate]; if ((yyn !=0 ) && (yyn += yychar) >= 0 && 415 yyn <= YYTABLESIZE && yycheck[yyn] == yychar) 416 { if (yydebug) debug("reduce"); 418 yyn = yytable[yyn]; 419 doaction=true; break; } 422 else { 424 if (yyerrflag==0) 425 { 426 yyerror("syntax error"); 427 yynerrs++; 428 } 429 if (yyerrflag < 3) { 431 yyerrflag = 3; 432 while (true) { 434 if (stateptr<0) { 436 yyerror("stack underflow. aborting..."); return 1; 438 } 439 yyn = yysindex[state_peek(0)]; 440 if ((yyn != 0) && (yyn += YYERRCODE) >= 0 && 441 yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE) 442 { 443 if (yydebug) 444 debug("state "+state_peek(0)+", error recovery shifting to state "+yytable[yyn]+" "); 445 yystate = yytable[yyn]; 446 state_push(yystate); 447 val_push(yylval); 448 doaction=false; 449 break; 450 } 451 else 452 { 453 if (yydebug) 454 debug("error recovery discarding state "+state_peek(0)+" "); 455 if (stateptr<0) { 457 yyerror("Stack underflow. aborting..."); return 1; 459 } 460 state_pop(); 461 val_pop(); 462 } 463 } 464 } 465 else { 467 if (yychar == 0) 468 return 1; if (yydebug) 470 { 471 yys = null; 472 if (yychar <= YYMAXTOKEN) yys = yyname[yychar]; 473 if (yys == null) yys = "illegal-symbol"; 474 debug("state "+yystate+", error recovery discards token "+yychar+" ("+yys+")"); 475 } 476 yychar = -1; } 478 } } if (!doaction) continue; yym = yylen[yyn]; if (yydebug) 484 debug("state "+yystate+", reducing "+yym+" by rule "+yyn+" ("+yyrule[yyn]+")"); 485 if (yym>0) yyval = val_peek(yym-1); switch(yyn) 488 { 489 case 2: 491 { delegate.startDocument(); } 493 break; 494 case 3: 495 { delegate.endDocument(); } 497 break; 498 case 4: 499 { delegate.startDocument(); } 501 break; 502 case 5: 503 { delegate.endDocument(); } 505 break; 506 case 9: 507 { delegate.startElement(); tagName = yyval; } 509 break; 510 case 10: 511 { delegate.startElement(tagName); } 513 break; 514 case 11: 515 { delegate.endElement(yyval); } 517 break; 518 case 12: 519 { delegate.addAttribute(yyval); } 521 break; 522 case 14: 523 { delegate.startElement(); tagName = yyval; } 525 break; 526 case 15: 527 { delegate.startDTD(yyval); } 529 break; 530 case 16: 531 { delegate.processingInstruction(yyval); } 533 break; 534 case 17: 535 { delegate.startCDATA(); } 537 break; 538 case 18: 539 { delegate.characters(yyval); } 541 break; 542 case 19: 543 { delegate.comment(yyval); } 545 break; 546 } if (yydebug) debug("reduce"); 551 state_drop(yym); yystate = state_peek(0); val_drop(yym); yym = yylhs[yyn]; if (yystate == 0 && yym == 0) { 557 debug("After reduction, shifting from state 0 to state "+YYFINAL+""); 558 yystate = YYFINAL; state_push(YYFINAL); val_push(yyval); if (yychar < 0) { 563 yychar = yylex(); if (yychar<0) yychar=0; if (yydebug) 566 yylexdebug(yystate,yychar); 567 } 568 if (yychar == 0) break; } else { yyn = yygindex[yym]; if ((yyn != 0) && (yyn += yystate) >= 0 && 575 yyn <= YYTABLESIZE && yycheck[yyn] == yystate) 576 yystate = yytable[yyn]; else 578 yystate = yydgoto[yym]; debug("after reduction, shifting from state "+state_peek(0)+" to state "+yystate+""); 580 state_push(yystate); val_push(yyval); } 583 } return 0;} 586 588 589 590 596 public void run() 597 { 598 yyparse(); 599 } 600 602 603 604 609 public HtmlParser() 610 { 611 } 613 614 615 619 public HtmlParser(boolean debugMe) 620 { 621 yydebug=debugMe; 622 } 623 625 626 627 } 628 | Popular Tags |