1 19 20 package org.netbeans.editor.ext.html; 21 22 import org.netbeans.editor.Syntax; 23 import org.netbeans.editor.TokenID; 24 25 34 35 public class HTMLSyntax extends Syntax { 36 37 42 protected int subState = INIT; 43 44 45 private static final int ISI_TEXT = 1; private static final int ISI_ERROR = 2; private static final int ISA_LT = 3; private static final int ISA_SLASH = 4; private static final int ISI_ENDTAG = 5; private static final int ISP_ENDTAG_X = 6; private static final int ISP_ENDTAG_WS = 7; private static final int ISI_TAG = 8; private static final int ISP_TAG_X = 9; private static final int ISP_TAG_WS = 10; private static final int ISI_ARG = 11; private static final int ISP_ARG_X = 12; private static final int ISP_ARG_WS = 13; private static final int ISP_EQ = 14; private static final int ISP_EQ_WS = 15; private static final int ISI_VAL = 16; private static final int ISI_VAL_QUOT = 17; private static final int ISI_VAL_DQUOT = 18; private static final int ISA_SGML_ESCAPE = 19; private static final int ISA_SGML_DASH = 20; private static final int ISI_HTML_COMMENT = 21; private static final int ISA_HTML_COMMENT_DASH = 22; private static final int ISI_HTML_COMMENT_WS = 23; private static final int ISI_SGML_DECL = 24; 70 private static final int ISA_SGML_DECL_DASH = 25; 71 private static final int ISI_SGML_COMMENT = 26; 72 private static final int ISA_SGML_COMMENT_DASH = 27; 73 private static final int ISA_REF = 28; private static final int ISI_REF_NAME = 29; private static final int ISA_REF_HASH = 30; private static final int ISI_REF_DEC = 31; private static final int ISA_REF_X = 32; private static final int ISI_REF_HEX = 33; private static final int ISI_TAG_SLASH = 34; 81 public HTMLSyntax() { 82 tokenContextPath = HTMLTokenContext.contextPath; 83 } 84 85 private final boolean isAZ( char ch ) { 86 return( (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ); 87 } 88 89 private final boolean isName( char ch ) { 90 return Character.isLetterOrDigit(ch) || 91 ch == '-' || ch == '_' || ch == '.' || ch == ':'; 92 97 } 98 99 109 110 private final boolean isWS( char ch ) { 111 return Character.isWhitespace(ch); 112 } 115 116 protected TokenID parseToken() { 117 char actChar; 118 119 while(offset < stopOffset) { 120 actChar = buffer[offset]; 121 switch( state ) { 124 case INIT: switch( actChar ) { 126 case '<': 127 state = ISA_LT; 128 break; 129 case '&': 130 state = ISA_REF; 131 subState = ISI_TEXT; 132 break; 133 default: 134 state = ISI_TEXT; 135 break; 136 } 137 break; 138 139 case ISI_TEXT: switch( actChar ) { 141 case '<': 142 case '&': 143 state = INIT; 144 return HTMLTokenContext.TEXT; 145 } 146 break; 147 148 case ISI_ERROR: offset++; 150 state = INIT; 151 return HTMLTokenContext.ERROR; 152 153 case ISA_LT: if( isAZ( actChar ) ) { state = ISI_TAG; 156 return HTMLTokenContext.TAG_OPEN_SYMBOL; 157 } 158 switch( actChar ) { 159 case '/': state = ISA_SLASH; 161 offset++; 162 return HTMLTokenContext.TAG_OPEN_SYMBOL; 163 case '>': offset++; 165 state = INIT; 166 return HTMLTokenContext.TAG_CLOSE_SYMBOL; 167 case '!': 168 state = ISA_SGML_ESCAPE; 169 break; 170 default: state = ISI_TEXT; 172 continue; } 174 break; 175 176 case ISA_SLASH: if( isAZ( actChar ) ) { state = ISI_ENDTAG; 179 break; 180 } 181 switch( actChar ) { 182 case '>': offset++; 184 state = INIT; 185 return HTMLTokenContext.TAG_CLOSE_SYMBOL; 186 default: state = ISI_TEXT; 188 continue; } 190 192 case ISI_ENDTAG: if( isName( actChar ) ) break; state = ISP_ENDTAG_X; 195 return HTMLTokenContext.TAG_CLOSE; 196 197 198 case ISP_ENDTAG_X: if( isWS( actChar ) ) { 200 state = ISP_ENDTAG_WS; 201 break; 202 } 203 switch( actChar ) { 204 case '>': offset++; 206 state = INIT; 207 return HTMLTokenContext.TAG_CLOSE_SYMBOL; 208 case '<': state = INIT; 210 continue; 211 default: 212 state = ISI_ERROR; 213 continue; } 215 217 case ISP_ENDTAG_WS: if( isWS( actChar ) ) break; state = ISP_ENDTAG_X; 220 return HTMLTokenContext.WS; 221 222 223 case ISI_TAG: if( isName( actChar ) ) break; state = ISP_TAG_X; 226 return HTMLTokenContext.TAG_OPEN; 227 228 case ISP_TAG_X: if( isWS( actChar ) ) { 230 state = ISP_TAG_WS; 231 break; 232 } 233 if( isAZ( actChar ) ) { 234 state = ISI_ARG; 235 break; 236 } 237 switch( actChar ) { 238 case '/': 239 offset++; 240 state = ISI_TAG_SLASH; 241 continue; 242 case '>': 243 offset++; 244 state = INIT; 245 return HTMLTokenContext.TAG_CLOSE_SYMBOL; 246 case '<': 247 state = INIT; 248 continue; default: 250 state = ISI_ERROR; 251 continue; 252 } 253 255 case ISP_TAG_WS: if( isWS( actChar ) ) break; state = ISP_TAG_X; 258 return HTMLTokenContext.WS; 259 260 case ISI_TAG_SLASH: 261 switch( actChar ) { 262 case '>': 263 offset++; 264 state = INIT; 265 return HTMLTokenContext.TAG_CLOSE_SYMBOL; 266 default: 267 state = ISI_ERROR; 268 continue; 269 } 270 271 case ISI_ARG: if( isName( actChar ) ) break; state = ISP_ARG_X; 274 return HTMLTokenContext.ARGUMENT; 275 276 case ISP_ARG_X: 277 if( isWS( actChar ) ) { 278 state = ISP_ARG_WS; 279 break; 280 } 281 if( isAZ( actChar ) ) { 282 state = ISI_ARG; 283 break; 284 } 285 switch( actChar ) { 286 case '/': 287 case '>': 288 offset++; 289 state = INIT; 290 return HTMLTokenContext.TAG_OPEN; 291 case '<': 292 state = INIT; 293 continue; case '=': 295 offset++; 296 state = ISP_EQ; 297 return HTMLTokenContext.OPERATOR; 298 default: 299 state = ISI_ERROR; 300 continue; 301 } 302 304 case ISP_ARG_WS: 305 if( isWS( actChar ) ) break; state = ISP_ARG_X; 307 return HTMLTokenContext.WS; 308 309 case ISP_EQ: 310 if( isWS( actChar ) ) { 311 state = ISP_EQ_WS; 312 break; 313 } 314 switch( actChar ) { 315 case '\'': 316 state = ISI_VAL_QUOT; 317 break; 318 case '"': 319 state = ISI_VAL_DQUOT; 320 break; 321 case '>': 322 offset++; 323 state = INIT; 324 return HTMLTokenContext.TAG_OPEN; 325 default: 326 state = ISI_VAL; break; 328 } 329 break; 330 331 case ISP_EQ_WS: 332 if( isWS( actChar ) ) break; state = ISP_EQ; 334 return HTMLTokenContext.WS; 335 336 337 case ISI_VAL: 338 if( !isWS( actChar ) 339 && !(actChar == '/' || actChar == '>' || actChar == '<')) break; state = ISP_TAG_X; 341 return HTMLTokenContext.VALUE; 342 343 case ISI_VAL_QUOT: 344 switch( actChar ) { 345 case '\'': 346 offset++; 347 state = ISP_TAG_X; 348 return HTMLTokenContext.VALUE; 349 case '&': 350 if( offset == tokenOffset ) { 351 subState = state; 352 state = ISA_REF; 353 break; 354 } else { 355 return HTMLTokenContext.VALUE; 356 } 357 } 358 break; 360 case ISI_VAL_DQUOT: 361 switch( actChar ) { 362 case '"': 363 offset++; 364 state = ISP_TAG_X; 365 return HTMLTokenContext.VALUE; 366 case '&': 367 if( offset == tokenOffset ) { 368 subState = state; 369 state = ISA_REF; 370 break; 371 } else { 372 return HTMLTokenContext.VALUE; 373 } 374 } 375 break; 377 378 379 case ISA_SGML_ESCAPE: if( isAZ(actChar) ) { 381 state = ISI_SGML_DECL; 382 break; 383 } 384 switch( actChar ) { 385 case '-': 386 state = ISA_SGML_DASH; 387 break; 388 default: 389 state = ISI_TEXT; 390 continue; 391 } 392 break; 393 394 case ISA_SGML_DASH: switch( actChar ) { 396 case '-': 397 state = ISI_HTML_COMMENT; 398 break; 399 default: 400 state = ISI_TEXT; 401 continue; 402 } 403 break; 404 405 case ISI_HTML_COMMENT: switch( actChar ) { 407 case '-': 408 state = ISA_HTML_COMMENT_DASH; 409 break; 410 case '\n': 412 offset++; 413 return HTMLTokenContext.BLOCK_COMMENT; 416 } 417 break; 418 419 case ISA_HTML_COMMENT_DASH: 420 switch( actChar ) { 421 case '-': 422 state = ISI_HTML_COMMENT_WS; 423 break; 424 default: 425 state = ISI_HTML_COMMENT; 426 continue; 427 } 428 break; 429 430 case ISI_HTML_COMMENT_WS: if( isWS( actChar ) ) break; switch( actChar ) { 433 case '>': 434 offset++; 435 state = INIT; 436 return HTMLTokenContext.BLOCK_COMMENT; 437 default: 438 state = ISI_HTML_COMMENT; 439 continue; 440 } 441 443 case ISI_SGML_DECL: 444 switch( actChar ) { 445 case '>': 446 offset++; 447 state = INIT; 448 return HTMLTokenContext.DECLARATION; 449 case '-': 450 if( offset == tokenOffset ) { 451 state = ISA_SGML_DECL_DASH; 452 break; 453 } else { 454 return HTMLTokenContext.DECLARATION; 455 } 456 } 457 break; 458 459 case ISA_SGML_DECL_DASH: 460 if( actChar == '-' ) { 461 state = ISI_SGML_COMMENT; 462 break; 463 } else { 464 state = ISI_SGML_DECL; 465 continue; 466 } 467 468 case ISI_SGML_COMMENT: 469 switch( actChar ) { 470 case '-': 471 state = ISA_SGML_COMMENT_DASH; 472 break; 473 } 474 break; 475 476 case ISA_SGML_COMMENT_DASH: 477 if( actChar == '-' ) { 478 offset++; 479 state = ISI_SGML_DECL; 480 return HTMLTokenContext.SGML_COMMENT; 481 } else { 482 state = ISI_SGML_COMMENT; 483 continue; 484 } 485 486 487 case ISA_REF: 488 if( isAZ( actChar ) ) { 489 state = ISI_REF_NAME; 490 break; 491 } 492 if( actChar == '#' ) { 493 state = ISA_REF_HASH; 494 break; 495 } 496 state = subState; 497 continue; 498 499 case ISI_REF_NAME: 500 if( isName( actChar ) ) break; 501 if( actChar == ';' ) offset++; 502 state = subState; 503 return HTMLTokenContext.CHARACTER; 504 505 case ISA_REF_HASH: 506 if( actChar >= '0' && actChar <= '9' ) { 507 state = ISI_REF_DEC; 508 break; 509 } 510 if( actChar == 'x' || actChar == 'X' ) { 511 state = ISA_REF_X; 512 break; 513 } 514 if( isAZ( actChar ) ) { 515 offset++; 516 state = subState; 517 return HTMLTokenContext.ERROR; 518 } 519 state = subState; 520 continue; 521 522 case ISI_REF_DEC: 523 if( actChar >= '0' && actChar <= '9' ) break; 524 if( actChar == ';' ) offset++; 525 state = subState; 526 return HTMLTokenContext.CHARACTER; 527 528 case ISA_REF_X: 529 if( (actChar >= '0' && actChar <= '9') || 530 (actChar >= 'a' && actChar <= 'f') || 531 (actChar >= 'A' && actChar <= 'F') 532 ) { 533 state = ISI_REF_HEX; 534 break; 535 } 536 state = subState; 537 return HTMLTokenContext.ERROR; 539 case ISI_REF_HEX: 540 if( (actChar >= '0' && actChar <= '9') || 541 (actChar >= 'a' && actChar <= 'f') || 542 (actChar >= 'A' && actChar <= 'F') 543 ) break; 544 if( actChar == ';' ) offset++; 545 state = subState; 546 return HTMLTokenContext.CHARACTER; 547 } 548 549 550 offset = ++offset; 551 } 553 557 if( lastBuffer ) { 558 switch( state ) { 559 case INIT: 560 case ISI_TEXT: 561 case ISA_LT: 562 case ISA_SLASH: 563 case ISA_SGML_ESCAPE: 564 case ISA_SGML_DASH: 565 case ISI_TAG_SLASH: 566 return HTMLTokenContext.TEXT; 567 568 case ISA_REF: 569 case ISA_REF_HASH: 570 if( subState == ISI_TEXT ) return HTMLTokenContext.TEXT; 571 else return HTMLTokenContext.VALUE; 572 573 case ISI_HTML_COMMENT: 574 case ISA_HTML_COMMENT_DASH: 575 case ISI_HTML_COMMENT_WS: 576 return HTMLTokenContext.BLOCK_COMMENT; 577 578 case ISI_TAG: 579 return HTMLTokenContext.TAG_OPEN; 580 case ISI_ENDTAG: 581 return HTMLTokenContext.TAG_CLOSE; 582 583 case ISI_ARG: 584 return HTMLTokenContext.ARGUMENT; 585 586 case ISI_ERROR: 587 return HTMLTokenContext.ERROR; 588 589 case ISP_ARG_WS: 590 case ISP_TAG_WS: 591 case ISP_ENDTAG_WS: 592 case ISP_EQ_WS: 593 return HTMLTokenContext.WS; 594 595 case ISP_ARG_X: 596 case ISP_TAG_X: 597 case ISP_ENDTAG_X: 598 case ISP_EQ: 599 return HTMLTokenContext.WS; 600 601 case ISI_VAL: 602 case ISI_VAL_QUOT: 603 case ISI_VAL_DQUOT: 604 return HTMLTokenContext.VALUE; 605 606 case ISI_SGML_DECL: 607 case ISA_SGML_DECL_DASH: 608 return HTMLTokenContext.DECLARATION; 609 610 case ISI_SGML_COMMENT: 611 case ISA_SGML_COMMENT_DASH: 612 return HTMLTokenContext.SGML_COMMENT; 613 614 case ISI_REF_NAME: 615 case ISI_REF_DEC: 616 case ISA_REF_X: 617 case ISI_REF_HEX: 618 return HTMLTokenContext.CHARACTER; 619 } 620 } 621 622 return null; 623 } 624 625 public String getStateName(int stateNumber) { 626 switch(stateNumber) { 627 case INIT: 628 return "INIT"; case ISI_TEXT: 630 return "ISI_TEXT"; case ISA_LT: 632 return "ISA_LT"; case ISA_SLASH: 634 return "ISA_SLASH"; case ISA_SGML_ESCAPE: 636 return "ISA_SGML_ESCAPE"; case ISA_SGML_DASH: 638 return "ISA_SGML_DASH"; case ISI_HTML_COMMENT: 640 return "ISI_HTML_COMMENT"; case ISA_HTML_COMMENT_DASH: 642 return "ISA_HTML_COMMENT_DASH"; case ISI_HTML_COMMENT_WS: 644 return "ISI_HTML_COMMENT_WS"; case ISI_TAG: 646 return "ISI_TAG"; case ISI_ENDTAG: 648 return "ISI_ENDTAG"; case ISI_ARG: 650 return "ISI_ARG"; case ISI_ERROR: 652 return "ISI_ERROR"; case ISP_ARG_WS: 654 return "ISP_ARG_WS"; case ISP_TAG_WS: 656 return "ISP_TAG_WS"; case ISP_ENDTAG_WS: 658 return "ISP_ENDTAG_WS"; case ISP_ARG_X: 660 return "ISP_ARG_X"; case ISP_TAG_X: 662 return "ISP_TAG_X"; case ISP_ENDTAG_X: 664 return "ISP_ENDTAG_X"; case ISP_EQ: 666 return "ISP_EQ"; case ISI_VAL: 668 return "ISI_VAL"; case ISI_VAL_QUOT: 670 return "ISI_VAL_QUOT"; case ISI_VAL_DQUOT: 672 return "ISI_VAL_DQUOT"; case ISI_SGML_DECL: 674 return "ISI_SGML_DECL"; case ISA_SGML_DECL_DASH: 676 return "ISA_SGML_DECL_DASH"; case ISI_SGML_COMMENT: 678 return "ISI_SGML_COMMENT"; case ISA_SGML_COMMENT_DASH: 680 return "ISA_SGML_COMMENT_DASH"; case ISA_REF: 682 return "ISA_REF"; case ISI_REF_NAME: 684 return "ISI_REF_NAME"; case ISA_REF_HASH: 686 return "ISA_REF_HASH"; case ISI_REF_DEC: 688 return "ISI_REF_DEC"; case ISA_REF_X: 690 return "ISA_REF_X"; case ISI_REF_HEX: 692 return "ISI_REF_HEX"; default: 694 return super.getStateName(stateNumber); 695 } 696 } 697 698 704 public void loadState(StateInfo stateInfo) { 705 super.loadState( stateInfo ); 706 subState = ((HTMLStateInfo)stateInfo).getSubState(); 707 } 708 709 710 public void storeState(StateInfo stateInfo) { 711 super.storeState( stateInfo ); 712 ((HTMLStateInfo)stateInfo).setSubState( subState ); 713 } 714 715 716 public int compareState(StateInfo stateInfo) { 717 if( super.compareState( stateInfo ) == DIFFERENT_STATE ) return DIFFERENT_STATE; 718 return ( ((HTMLStateInfo)stateInfo).getSubState() == subState) ? EQUAL_STATE : DIFFERENT_STATE; 719 } 720 721 722 public StateInfo createStateInfo() { 723 return new HTMLStateInfo(); 724 } 725 726 727 728 public static class HTMLStateInfo extends Syntax.BaseStateInfo { 729 730 731 private int subState; 732 733 public int getSubState() { 734 return subState; 735 } 736 737 public void setSubState(int subState) { 738 this.subState = subState; 739 } 740 741 public String toString(Syntax syntax) { 742 return super.toString(syntax) + ", subState=" + (syntax == null ? "" : syntax.getStateName(getSubState())); } 744 745 } 746 747 748 } 749 | Popular Tags |