1 5 6 package org.w3c.tidy; 7 8 33 34 public class ParserImpl { 35 36 38 private static void parseTag(Lexer lexer, Node node, short mode) 39 { 40 44 53 54 if (!((node.tag.model & Dict.CM_INLINE) != 0)) 55 lexer.insertspace = false; 56 57 if ((node.tag.model & Dict.CM_EMPTY) != 0) 58 { 59 lexer.waswhite = false; 60 return; 61 } 62 63 if (node.tag.parser == null || node.type == Node.StartEndTag) 64 return; 65 66 node.tag.parser.parse(lexer, node, mode); 67 } 68 69 private static void moveToHead(Lexer lexer, Node element, Node node) 70 { 71 Node head; 72 TagTable tt = lexer.configuration.tt; 73 74 75 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 76 { 77 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); 78 79 while (element.tag != tt.tagHtml) 80 element = element.parent; 81 82 for (head = element.content; head != null; head = head.next) 83 { 84 if (head.tag == tt.tagHead) 85 { 86 Node.insertNodeAtEnd(head, node); 87 break; 88 } 89 } 90 91 if (node.tag.parser != null) 92 parseTag(lexer, node, Lexer.IgnoreWhitespace); 93 } 94 else 95 { 96 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 97 } 98 } 99 100 public static class ParseHTML implements Parser { 101 102 public void parse( Lexer lexer, Node html, short mode ) 103 { 104 Node node, head; 105 Node frameset = null; 106 Node noframes = null; 107 108 lexer.configuration.XmlTags = false; 109 lexer.seenBodyEndTag = 0; 110 TagTable tt = lexer.configuration.tt; 111 112 for (;;) 113 { 114 node = lexer.getToken(Lexer.IgnoreWhitespace); 115 116 if (node == null) 117 { 118 node = lexer.inferredTag("head"); 119 break; 120 } 121 122 if (node.tag == tt.tagHead) 123 break; 124 125 if (node.tag == html.tag && node.type == Node.EndTag) 126 { 127 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); 128 continue; 129 } 130 131 132 if (Node.insertMisc(html, node)) 133 continue; 134 135 lexer.ungetToken(); 136 node = lexer.inferredTag("head"); 137 break; 138 } 139 140 head = node; 141 Node.insertNodeAtEnd(html, head); 142 getParseHead().parse(lexer, head, mode); 143 144 for (;;) 145 { 146 node = lexer.getToken(Lexer.IgnoreWhitespace); 147 148 if (node == null) 149 { 150 if (frameset == null) 151 node = lexer.inferredTag("body"); 152 153 return; 154 } 155 156 157 if (node.tag == html.tag) 158 { 159 if (node.type != Node.StartTag && frameset == null) 160 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); 161 162 continue; 163 } 164 165 166 if (Node.insertMisc(html, node)) 167 continue; 168 169 170 if (node.tag == tt.tagBody) 171 { 172 if (node.type != Node.StartTag) 173 { 174 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); 175 continue; 176 } 177 178 if (frameset != null) 179 { 180 lexer.ungetToken(); 181 182 if (noframes == null) 183 { 184 noframes = lexer.inferredTag("noframes"); 185 Node.insertNodeAtEnd(frameset, noframes); 186 Report.warning(lexer, html, noframes, Report.INSERTING_TAG); 187 } 188 189 parseTag(lexer, noframes, mode); 190 continue; 191 } 192 193 break; 194 } 195 196 197 if (node.tag == tt.tagFrameset) 198 { 199 if (node.type != Node.StartTag) 200 { 201 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); 202 continue; 203 } 204 205 if (frameset != null) 206 Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET); 207 else 208 frameset = node; 209 210 Node.insertNodeAtEnd(html, node); 211 parseTag(lexer, node, mode); 212 213 217 218 for (node = frameset.content; node != null; node = node.next) 219 { 220 if (node.tag == tt.tagNoframes) 221 noframes = node; 222 } 223 continue; 224 } 225 226 227 if (node.tag == tt.tagNoframes) 228 { 229 if (node.type != Node.StartTag) 230 { 231 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); 232 continue; 233 } 234 235 if (frameset == null) 236 { 237 Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); 238 node = lexer.inferredTag("body"); 239 break; 240 } 241 242 if (noframes == null) 243 { 244 noframes = node; 245 Node.insertNodeAtEnd(frameset, noframes); 246 } 247 248 parseTag(lexer, noframes, mode); 249 continue; 250 } 251 252 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 253 { 254 if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0) 255 { 256 moveToHead(lexer, html, node); 257 continue; 258 } 259 } 260 261 lexer.ungetToken(); 262 263 264 265 if (frameset != null) 266 { 267 if (noframes == null) 268 { 269 noframes = lexer.inferredTag("noframes"); 270 Node.insertNodeAtEnd(frameset, noframes); 271 } 272 else 273 Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT); 274 275 parseTag(lexer, noframes, mode); 276 continue; 277 } 278 279 node = lexer.inferredTag("body"); 280 break; 281 } 282 283 284 285 Node.insertNodeAtEnd(html, node); 286 parseTag(lexer, node, mode); 287 } 288 289 }; 290 291 public static class ParseHead implements Parser { 292 293 public void parse( Lexer lexer, Node head, short mode ) 294 { 295 Node node; 296 int HasTitle = 0; 297 int HasBase = 0; 298 TagTable tt = lexer.configuration.tt; 299 300 while (true) 301 { 302 node = lexer.getToken(Lexer.IgnoreWhitespace); 303 if (node == null) break; 304 if (node.tag == head.tag && node.type == Node.EndTag) 305 { 306 head.closed = true; 307 break; 308 } 309 310 if (node.type == Node.TextNode) 311 { 312 lexer.ungetToken(); 313 break; 314 } 315 316 317 if (Node.insertMisc(head, node)) 318 continue; 319 320 if (node.type == Node.DocTypeTag) 321 { 322 Node.insertDocType(lexer, head, node); 323 continue; 324 } 325 326 327 if (node.tag == null) 328 { 329 Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED); 330 continue; 331 } 332 333 if (!((node.tag.model & Dict.CM_HEAD) != 0)) 334 { 335 lexer.ungetToken(); 336 break; 337 } 338 339 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 340 { 341 if (node.tag == tt.tagTitle) 342 { 343 ++HasTitle; 344 345 if (HasTitle > 1) 346 Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS); 347 } 348 else if (node.tag == tt.tagBase) 349 { 350 ++HasBase; 351 352 if (HasBase > 1) 353 Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS); 354 } 355 else if (node.tag == tt.tagNoscript) 356 Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN); 357 358 Node.insertNodeAtEnd(head, node); 359 parseTag(lexer, node, Lexer.IgnoreWhitespace); 360 continue; 361 } 362 363 364 Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED); 365 } 366 367 if (HasTitle == 0) 368 { 369 Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT); 370 Node.insertNodeAtEnd(head, lexer.inferredTag( "title")); 371 } 372 } 373 374 }; 375 376 public static class ParseTitle implements Parser { 377 378 public void parse( Lexer lexer, Node title, short mode ) 379 { 380 Node node; 381 382 while (true) 383 { 384 node = lexer.getToken(Lexer.MixedContent); 385 if (node == null) break; 386 if (node.tag == title.tag && node.type == Node.EndTag) 387 { 388 title.closed = true; 389 Node.trimSpaces(lexer, title); 390 return; 391 } 392 393 if (node.type == Node.TextNode) 394 { 395 396 if (title.content == null) 397 Node.trimInitialSpace(lexer, title, node); 398 399 if (node.start >= node.end) 400 { 401 continue; 402 } 403 404 Node.insertNodeAtEnd(title, node); 405 continue; 406 } 407 408 409 if (Node.insertMisc(title, node)) 410 continue; 411 412 413 if (node.tag == null) 414 { 415 Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED); 416 continue; 417 } 418 419 420 Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE); 421 lexer.ungetToken(); 422 Node.trimSpaces(lexer, title); 423 return; 424 } 425 426 Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR); 427 } 428 429 }; 430 431 public static class ParseScript implements Parser { 432 433 public void parse( Lexer lexer, Node script, short mode ) 434 { 435 441 442 Node node; 443 444 node = lexer.getCDATA( script); 445 446 if (node != null) 447 Node.insertNodeAtEnd(script, node); 448 } 449 450 }; 451 452 public static class ParseBody implements Parser { 453 454 public void parse( Lexer lexer, Node body, short mode ) 455 { 456 Node node; 457 boolean checkstack, iswhitenode; 458 459 mode = Lexer.IgnoreWhitespace; 460 checkstack = true; 461 TagTable tt = lexer.configuration.tt; 462 463 while (true) 464 { 465 node = lexer.getToken(mode); 466 if (node == null) break; 467 if (node.tag == body.tag && node.type == Node.EndTag) 468 { 469 body.closed = true; 470 Node.trimSpaces(lexer, body); 471 lexer.seenBodyEndTag = 1; 472 mode = Lexer.IgnoreWhitespace; 473 474 if (body.parent.tag == tt.tagNoframes) 475 break; 476 477 continue; 478 } 479 480 if (node.tag == tt.tagNoframes) 481 { 482 if (node.type == Node.StartTag) 483 { 484 Node.insertNodeAtEnd(body, node); 485 getParseBlock().parse(lexer, node, mode); 486 continue; 487 } 488 489 if (node.type == Node.EndTag && 490 body.parent.tag == tt.tagNoframes) 491 { 492 Node.trimSpaces(lexer, body); 493 lexer.ungetToken(); 494 break; 495 } 496 } 497 498 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset) 499 && body.parent.tag == tt.tagNoframes) 500 { 501 Node.trimSpaces(lexer, body); 502 lexer.ungetToken(); 503 break; 504 } 505 506 if (node.tag == tt.tagHtml) 507 { 508 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 509 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); 510 511 continue; 512 } 513 514 iswhitenode = false; 515 516 if (node.type == Node.TextNode && 517 node.end <= node.start + 1 && 518 node.textarray[node.start] == (byte)' ') 519 iswhitenode = true; 520 521 522 if (Node.insertMisc(body, node)) 523 continue; 524 525 if (lexer.seenBodyEndTag == 1 && !iswhitenode) 526 { 527 ++lexer.seenBodyEndTag; 528 Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY); 529 } 530 531 532 if (node.type == Node.TextNode) 533 { 534 if (iswhitenode && mode == Lexer.IgnoreWhitespace) 535 { 536 continue; 537 } 538 539 if (lexer.configuration.EncloseBodyText && !iswhitenode) 540 { 541 Node para; 542 543 lexer.ungetToken(); 544 para = lexer.inferredTag("p"); 545 Node.insertNodeAtEnd(body, para); 546 parseTag(lexer, para, mode); 547 mode = Lexer.MixedContent; 548 continue; 549 } 550 else 551 lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20); 552 553 if (checkstack) 554 { 555 checkstack = false; 556 557 if (lexer.inlineDup( node) > 0) 558 continue; 559 } 560 561 Node.insertNodeAtEnd(body, node); 562 mode = Lexer.MixedContent; 563 continue; 564 } 565 566 if (node.type == Node.DocTypeTag) 567 { 568 Node.insertDocType(lexer, body, node); 569 continue; 570 } 571 572 if (node.tag == null || node.tag == tt.tagParam) 573 { 574 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); 575 continue; 576 } 577 578 584 lexer.excludeBlocks = false; 585 586 if (!((node.tag.model & Dict.CM_BLOCK) != 0) && 587 !((node.tag.model & Dict.CM_INLINE) != 0)) 588 { 589 590 if (!((node.tag.model & Dict.CM_HEAD) != 0)) 591 Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN); 592 593 if ((node.tag.model & Dict.CM_HTML) != 0) 594 { 595 596 if (node.tag == tt.tagBody && body.implicit 597 && body.attributes == null) 598 { 599 body.attributes = node.attributes; 600 node.attributes = null; 601 } 602 603 continue; 604 } 605 606 if ((node.tag.model & Dict.CM_HEAD) != 0) 607 { 608 moveToHead(lexer, body, node); 609 continue; 610 } 611 612 if ((node.tag.model & Dict.CM_LIST) != 0) 613 { 614 lexer.ungetToken(); 615 node = lexer.inferredTag( "ul"); 616 Node.addClass(node, "noindent"); 617 lexer.excludeBlocks = true; 618 } 619 else if ((node.tag.model & Dict.CM_DEFLIST) != 0) 620 { 621 lexer.ungetToken(); 622 node = lexer.inferredTag( "dl"); 623 lexer.excludeBlocks = true; 624 } 625 else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0) 626 { 627 lexer.ungetToken(); 628 node = lexer.inferredTag( "table"); 629 lexer.excludeBlocks = true; 630 } 631 else 632 { 633 643 if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0)) 644 { 645 lexer.ungetToken(); 646 return; 647 } 648 649 650 continue; 651 } 652 } 653 654 if (node.type == Node.EndTag) 655 { 656 if (node.tag == tt.tagBr) 657 node.type = Node.StartTag; 658 else if (node.tag == tt.tagP) 659 { 660 Node.coerceNode(lexer, node, tt.tagBr); 661 Node.insertNodeAtEnd(body, node); 662 node = lexer.inferredTag("br"); 663 } 664 else if ((node.tag.model & Dict.CM_INLINE) != 0) 665 lexer.popInline(node); 666 } 667 668 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 669 { 670 if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0)) 671 { 672 673 674 if (node.tag == tt.tagImg) 675 lexer.versions &= ~Dict.VERS_HTML40_STRICT; 676 else 677 lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20); 678 679 if (checkstack && !node.implicit) 680 { 681 checkstack = false; 682 683 if (lexer.inlineDup( node) > 0) 684 continue; 685 } 686 687 mode = Lexer.MixedContent; 688 } 689 else 690 { 691 checkstack = true; 692 mode = Lexer.IgnoreWhitespace; 693 } 694 695 if (node.implicit) 696 Report.warning(lexer, body, node, Report.INSERTING_TAG); 697 698 Node.insertNodeAtEnd(body, node); 699 parseTag(lexer, node, mode); 700 continue; 701 } 702 703 704 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); 705 } 706 } 707 708 }; 709 710 public static class ParseFrameSet implements Parser { 711 712 public void parse( Lexer lexer, Node frameset, short mode ) 713 { 714 Node node; 715 TagTable tt = lexer.configuration.tt; 716 717 lexer.badAccess |= Report.USING_FRAMES; 718 719 while (true) 720 { 721 node = lexer.getToken(Lexer.IgnoreWhitespace); 722 if (node == null) break; 723 if (node.tag == frameset.tag && node.type == Node.EndTag) 724 { 725 frameset.closed = true; 726 Node.trimSpaces(lexer, frameset); 727 return; 728 } 729 730 731 if (Node.insertMisc(frameset, node)) 732 continue; 733 734 if (node.tag == null) 735 { 736 Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED); 737 continue; 738 } 739 740 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 741 { 742 if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0) 743 { 744 moveToHead(lexer, frameset, node); 745 continue; 746 } 747 } 748 749 if (node.tag == tt.tagBody) 750 { 751 lexer.ungetToken(); 752 node = lexer.inferredTag("noframes"); 753 Report.warning(lexer, frameset, node, Report.INSERTING_TAG); 754 } 755 756 if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0) 757 { 758 Node.insertNodeAtEnd(frameset, node); 759 lexer.excludeBlocks = false; 760 parseTag(lexer, node, Lexer.MixedContent); 761 continue; 762 } 763 else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0) 764 { 765 Node.insertNodeAtEnd(frameset, node); 766 continue; 767 } 768 769 770 Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED); 771 } 772 773 Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR); 774 } 775 776 }; 777 778 public static class ParseInline implements Parser { 779 780 public void parse( Lexer lexer, Node element, short mode ) 781 { 782 Node node, parent; 783 TagTable tt = lexer.configuration.tt; 784 785 if ((element.tag.model & Dict.CM_EMPTY) != 0) 786 return; 787 788 if (element.tag == tt.tagA) 789 { 790 if (element.attributes == null) 791 { 792 Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED); 793 Node.discardElement(element); 794 return; 795 } 796 } 797 798 806 if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt)) 807 lexer.inlineDup( null); 808 else if ((element.tag.model & Dict.CM_INLINE) != 0 && 809 element.tag != tt.tagA && element.tag != tt.tagSpan) 810 lexer.pushInline( element); 811 812 if (element.tag == tt.tagNobr) 813 lexer.badLayout |= Report.USING_NOBR; 814 else if (element.tag == tt.tagFont) 815 lexer.badLayout |= Report.USING_FONT; 816 817 818 if (mode != Lexer.Preformatted) 819 mode = Lexer.MixedContent; 820 821 while (true) 822 { 823 node = lexer.getToken(mode); 824 if (node == null) break; 825 826 if (node.tag == element.tag && node.type == Node.EndTag) 827 { 828 if ((element.tag.model & Dict.CM_INLINE) != 0 && 829 element.tag != tt.tagA) 830 lexer.popInline( node); 831 832 if (!((mode & Lexer.Preformatted) != 0)) 833 Node.trimSpaces(lexer, element); 834 839 if (element.tag == tt.tagFont && 840 element.content != null && 841 element.content == element.last) 842 { 843 Node child = element.content; 844 845 if (child.tag == tt.tagA) 846 { 847 child.parent = element.parent; 848 child.next = element.next; 849 child.prev = element.prev; 850 851 if (child.prev != null) 852 child.prev.next = child; 853 else 854 child.parent.content = child; 855 856 if (child.next != null) 857 child.next.prev = child; 858 else 859 child.parent.last = child; 860 861 element.next = null; 862 element.prev = null; 863 element.parent = child; 864 element.content = child.content; 865 element.last = child.last; 866 child.content = element; 867 child.last = element; 868 for (child = element.content; child != null; child = child.next) 869 child.parent = element; 870 } 871 } 872 element.closed = true; 873 Node.trimSpaces(lexer, element); 874 Node.trimEmptyElement(lexer, element); 875 return; 876 } 877 878 879 880 881 if (node.type == Node.StartTag 882 && node.tag == element.tag 883 && lexer.isPushed(node) 884 && !node.implicit 885 && !element.implicit 886 && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0) 887 && node.tag != tt.tagA 888 && node.tag != tt.tagFont 889 && node.tag != tt.tagBig 890 && node.tag != tt.tagSmall) 891 { 892 if (element.content != null && node.attributes == null) 893 { 894 Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG); 895 node.type = Node.EndTag; 896 lexer.ungetToken(); 897 continue; 898 } 899 900 Report.warning(lexer, element, node, Report.NESTED_EMPHASIS); 901 } 902 903 if (node.type == Node.TextNode) 904 { 905 906 if (element.content == null && 907 !((mode & Lexer.Preformatted) != 0)) 908 Node.trimSpaces(lexer, element); 909 910 if (node.start >= node.end) 911 { 912 continue; 913 } 914 915 Node.insertNodeAtEnd(element, node); 916 continue; 917 } 918 919 920 if (Node.insertMisc(element, node)) 921 continue; 922 923 924 if (node.tag == tt.tagHtml) 925 { 926 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 927 { 928 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 929 continue; 930 } 931 932 933 lexer.ungetToken(); 934 if (!((mode & Lexer.Preformatted) != 0)) 935 Node.trimSpaces(lexer, element); 936 Node.trimEmptyElement(lexer, element); 937 return; 938 } 939 940 941 if (node.tag == tt.tagP && 942 node.type == Node.StartTag && 943 ((mode & Lexer.Preformatted) != 0 || 944 element.tag == tt.tagDt || 945 element.isDescendantOf(tt.tagDt))) 946 { 947 node.tag = tt.tagBr; 948 node.element = "br"; 949 Node.trimSpaces(lexer, element); 950 Node.insertNodeAtEnd(element, node); 951 continue; 952 } 953 954 955 if (node.tag == null || node.tag == tt.tagParam) 956 { 957 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 958 continue; 959 } 960 961 if (node.tag == tt.tagBr && node.type == Node.EndTag) 962 node.type = Node.StartTag; 963 964 if (node.type == Node.EndTag) 965 { 966 967 if (node.tag == tt.tagBr) 968 node.type = Node.StartTag; 969 else if (node.tag == tt.tagP) 970 { 971 972 if (!element.isDescendantOf(tt.tagP)) 973 { 974 Node.coerceNode(lexer, node, tt.tagBr); 975 Node.trimSpaces(lexer, element); 976 Node.insertNodeAtEnd(element, node); 977 node = lexer.inferredTag("br"); 978 continue; 979 } 980 } 981 else if ((node.tag.model & Dict.CM_INLINE) != 0 982 && node.tag != tt.tagA 983 && !((node.tag.model & Dict.CM_OBJECT) != 0) 984 && (element.tag.model & Dict.CM_INLINE) != 0) 985 { 986 987 lexer.popInline( element); 988 989 if (element.tag != tt.tagA) 990 { 991 if (node.tag == tt.tagA && node.tag != element.tag) 992 { 993 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); 994 lexer.ungetToken(); 995 } 996 else 997 { 998 Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG); 999 } 1000 1001 if (!((mode & Lexer.Preformatted) != 0)) 1002 Node.trimSpaces(lexer, element); 1003 Node.trimEmptyElement(lexer, element); 1004 return; 1005 } 1006 1007 1008 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1009 continue; 1010 } 1011 else if (lexer.exiled 1012 && node.tag.model != 0 1013 && (node.tag.model & Dict.CM_TABLE) != 0) 1014 { 1015 lexer.ungetToken(); 1016 Node.trimSpaces(lexer, element); 1017 Node.trimEmptyElement(lexer, element); 1018 return; 1019 } 1020 } 1021 1022 1023 if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0) 1024 { 1025 if (node.tag == element.tag) 1026 { 1027 Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG); 1028 } 1029 else 1030 { 1031 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); 1032 lexer.ungetToken(); 1033 } 1034 if (!((mode & Lexer.Preformatted) != 0)) 1035 Node.trimSpaces(lexer, element); 1036 Node.trimEmptyElement(lexer, element); 1037 return; 1038 } 1039 1040 1044 if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node)) 1045 { 1046 1047 if (node.attributes == null) 1048 { 1049 node.type = Node.EndTag; 1050 Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG); 1051 lexer.popInline( node); 1052 lexer.ungetToken(); 1053 continue; 1054 } 1055 1056 lexer.ungetToken(); 1057 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); 1058 lexer.popInline( element); 1059 if (!((mode & Lexer.Preformatted) != 0)) 1060 Node.trimSpaces(lexer, element); 1061 Node.trimEmptyElement(lexer, element); 1062 return; 1063 } 1064 1065 if ((element.tag.model & Dict.CM_HEADING) != 0) 1066 { 1067 if (node.tag == tt.tagCenter || 1068 node.tag == tt.tagDiv) 1069 { 1070 if (node.type != Node.StartTag && 1071 node.type != Node.StartEndTag) 1072 { 1073 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1074 continue; 1075 } 1076 1077 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); 1078 1079 1080 if (element.content == null) 1081 { 1082 Node.insertNodeAsParent(element, node); 1083 continue; 1084 } 1085 1086 1087 Node.insertNodeAfterElement(element, node); 1088 1089 if (!((mode & Lexer.Preformatted) != 0)) 1090 Node.trimSpaces(lexer, element); 1091 1092 element = lexer.cloneNode(element); 1093 element.start = lexer.lexsize; 1094 element.end = lexer.lexsize; 1095 Node.insertNodeAtEnd(node, element); 1096 continue; 1097 } 1098 1099 if (node.tag == tt.tagHr) 1100 { 1101 if (node.type != Node.StartTag && 1102 node.type != Node.StartEndTag) 1103 { 1104 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1105 continue; 1106 } 1107 1108 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); 1109 1110 1111 if (element.content == null) 1112 { 1113 Node.insertNodeBeforeElement(element, node); 1114 continue; 1115 } 1116 1117 1118 Node.insertNodeAfterElement(element, node); 1119 1120 if (!((mode & Lexer.Preformatted) != 0)) 1121 Node.trimSpaces(lexer, element); 1122 1123 element = lexer.cloneNode(element); 1124 element.start = lexer.lexsize; 1125 element.end = lexer.lexsize; 1126 Node.insertNodeAfterElement(node, element); 1127 continue; 1128 } 1129 } 1130 1131 if (element.tag == tt.tagDt) 1132 { 1133 if (node.tag == tt.tagHr) 1134 { 1135 Node dd; 1136 1137 if (node.type != Node.StartTag && 1138 node.type != Node.StartEndTag) 1139 { 1140 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1141 continue; 1142 } 1143 1144 Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); 1145 dd = lexer.inferredTag("dd"); 1146 1147 1148 if (element.content == null) 1149 { 1150 Node.insertNodeBeforeElement(element, dd); 1151 Node.insertNodeAtEnd(dd, node); 1152 continue; 1153 } 1154 1155 1156 Node.insertNodeAfterElement(element, dd); 1157 Node.insertNodeAtEnd(dd, node); 1158 1159 if (!((mode & Lexer.Preformatted) != 0)) 1160 Node.trimSpaces(lexer, element); 1161 1162 element = lexer.cloneNode(element); 1163 element.start = lexer.lexsize; 1164 element.end = lexer.lexsize; 1165 Node.insertNodeAfterElement(dd, element); 1166 continue; 1167 } 1168 } 1169 1170 1171 1175 if (node.type == Node.EndTag) 1176 { 1177 for (parent = element.parent; 1178 parent != null; parent = parent.parent) 1179 { 1180 if (node.tag == parent.tag) 1181 { 1182 if (!((element.tag.model & Dict.CM_OPT) != 0) && 1183 !element.implicit) 1184 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); 1185 1186 if (element.tag == tt.tagA) 1187 lexer.popInline(element); 1188 1189 lexer.ungetToken(); 1190 1191 if (!((mode & Lexer.Preformatted) != 0)) 1192 Node.trimSpaces(lexer, element); 1193 1194 Node.trimEmptyElement(lexer, element); 1195 return; 1196 } 1197 } 1198 } 1199 1200 1201 if (!((node.tag.model & Dict.CM_INLINE) != 0)) 1202 { 1203 if (node.type != Node.StartTag) 1204 { 1205 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1206 continue; 1207 } 1208 1209 if (!((element.tag.model & Dict.CM_OPT) != 0)) 1210 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); 1211 1212 if ((node.tag.model & Dict.CM_HEAD) != 0 && 1213 !((node.tag.model & Dict.CM_BLOCK) != 0)) 1214 { 1215 moveToHead(lexer, element, node); 1216 continue; 1217 } 1218 1219 1223 if (element.tag == tt.tagA) 1224 { 1225 if (node.tag != null && 1226 !((node.tag.model & Dict.CM_HEADING) != 0)) 1227 lexer.popInline(element); 1228 else if (!(element.content != null)) 1229 { 1230 Node.discardElement(element); 1231 lexer.ungetToken(); 1232 return; 1233 } 1234 } 1235 1236 lexer.ungetToken(); 1237 1238 if (!((mode & Lexer.Preformatted) != 0)) 1239 Node.trimSpaces(lexer, element); 1240 1241 Node.trimEmptyElement(lexer, element); 1242 return; 1243 } 1244 1245 1246 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 1247 { 1248 if (node.implicit) 1249 Report.warning(lexer, element, node, Report.INSERTING_TAG); 1250 1251 1252 if (node.tag == tt.tagBr) 1253 Node.trimSpaces(lexer, element); 1254 1255 Node.insertNodeAtEnd(element, node); 1256 parseTag(lexer, node, mode); 1257 continue; 1258 } 1259 1260 1261 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1262 } 1263 1264 if (!((element.tag.model & Dict.CM_OPT) != 0)) 1265 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR); 1266 1267 Node.trimEmptyElement(lexer, element); 1268 } 1269 }; 1270 1271 public static class ParseList implements Parser { 1272 1273 public void parse( Lexer lexer, Node list, short mode ) 1274 { 1275 Node node; 1276 Node parent; 1277 TagTable tt = lexer.configuration.tt; 1278 1279 if ((list.tag.model & Dict.CM_EMPTY) != 0) 1280 return; 1281 1282 lexer.insert = -1; 1283 1284 while (true) 1285 { 1286 node = lexer.getToken(Lexer.IgnoreWhitespace); 1287 if (node == null) break; 1288 1289 if (node.tag == list.tag && node.type == Node.EndTag) 1290 { 1291 if ((list.tag.model & Dict.CM_OBSOLETE) != 0) 1292 Node.coerceNode(lexer, list, tt.tagUl); 1293 1294 list.closed = true; 1295 Node.trimEmptyElement(lexer, list); 1296 return; 1297 } 1298 1299 1300 if (Node.insertMisc(list, node)) 1301 continue; 1302 1303 if (node.type != Node.TextNode && node.tag == null) 1304 { 1305 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); 1306 continue; 1307 } 1308 1309 1313 if (node.type == Node.EndTag) 1314 { 1315 if (node.tag == tt.tagForm) 1316 { 1317 lexer.badForm = 1; 1318 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); 1319 continue; 1320 } 1321 1322 if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0) 1323 { 1324 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); 1325 lexer.popInline(node); 1326 continue; 1327 } 1328 1329 for (parent = list.parent; 1330 parent != null; parent = parent.parent) 1331 { 1332 if (node.tag == parent.tag) 1333 { 1334 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); 1335 lexer.ungetToken(); 1336 1337 if ((list.tag.model & Dict.CM_OBSOLETE) != 0) 1338 Node.coerceNode(lexer, list, tt.tagUl); 1339 1340 Node.trimEmptyElement(lexer, list); 1341 return; 1342 } 1343 } 1344 1345 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); 1346 continue; 1347 } 1348 1349 if (node.tag != tt.tagLi) 1350 { 1351 lexer.ungetToken(); 1352 1353 if (node.tag != null && 1354 (node.tag.model & Dict.CM_BLOCK) != 0 && 1355 lexer.excludeBlocks) 1356 { 1357 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); 1358 Node.trimEmptyElement(lexer, list); 1359 return; 1360 } 1361 1362 node = lexer.inferredTag("li"); 1363 node.addAttribute("style", "list-style: none"); 1364 Report.warning(lexer, list, node, Report.MISSING_STARTTAG); 1365 } 1366 1367 1368 Node.insertNodeAtEnd(list, node); 1369 parseTag(lexer, node, Lexer.IgnoreWhitespace); 1370 } 1371 1372 if ((list.tag.model & Dict.CM_OBSOLETE) != 0) 1373 Node.coerceNode(lexer, list, tt.tagUl); 1374 1375 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR); 1376 Node.trimEmptyElement(lexer, list); 1377 } 1378 1379 }; 1380 1381 public static class ParseDefList implements Parser { 1382 1383 public void parse( Lexer lexer, Node list, short mode ) 1384 { 1385 Node node, parent; 1386 TagTable tt = lexer.configuration.tt; 1387 1388 if ((list.tag.model & Dict.CM_EMPTY) != 0) 1389 return; 1390 1391 lexer.insert = -1; 1392 1393 while (true) 1394 { 1395 node = lexer.getToken(Lexer.IgnoreWhitespace); 1396 if (node == null) break; 1397 if (node.tag == list.tag && node.type == Node.EndTag) 1398 { 1399 list.closed = true; 1400 Node.trimEmptyElement(lexer, list); 1401 return; 1402 } 1403 1404 1405 if (Node.insertMisc(list, node)) 1406 continue; 1407 1408 if (node.type == Node.TextNode) 1409 { 1410 lexer.ungetToken(); 1411 node = lexer.inferredTag( "dt"); 1412 Report.warning(lexer, list, node, Report.MISSING_STARTTAG); 1413 } 1414 1415 if (node.tag == null) 1416 { 1417 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); 1418 continue; 1419 } 1420 1421 1425 if (node.type == Node.EndTag) 1426 { 1427 if (node.tag == tt.tagForm) 1428 { 1429 lexer.badForm = 1; 1430 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); 1431 continue; 1432 } 1433 1434 for (parent = list.parent; 1435 parent != null; parent = parent.parent) 1436 { 1437 if (node.tag == parent.tag) 1438 { 1439 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); 1440 1441 lexer.ungetToken(); 1442 Node.trimEmptyElement(lexer, list); 1443 return; 1444 } 1445 } 1446 } 1447 1448 1449 if (node.tag == tt.tagCenter) 1450 { 1451 if (list.content != null) 1452 Node.insertNodeAfterElement(list, node); 1453 else 1454 { 1455 Node.insertNodeBeforeElement(list, node); 1456 Node.discardElement(list); 1457 } 1458 1459 1460 parseTag(lexer, node, mode); 1461 1462 1463 list = lexer.inferredTag("dl"); 1464 Node.insertNodeAfterElement(node, list); 1465 continue; 1466 } 1467 1468 if (!(node.tag == tt.tagDt || node.tag == tt.tagDd)) 1469 { 1470 lexer.ungetToken(); 1471 1472 if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)) 1473 { 1474 Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN); 1475 Node.trimEmptyElement(lexer, list); 1476 return; 1477 } 1478 1479 1480 if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks) 1481 { 1482 Node.trimEmptyElement(lexer, list); 1483 return; 1484 } 1485 1486 node = lexer.inferredTag( "dd"); 1487 Report.warning(lexer, list, node, Report.MISSING_STARTTAG); 1488 } 1489 1490 if (node.type == Node.EndTag) 1491 { 1492 Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); 1493 continue; 1494 } 1495 1496 1497 Node.insertNodeAtEnd(list, node); 1498 parseTag(lexer, node, Lexer.IgnoreWhitespace); 1499 } 1500 1501 Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR); 1502 Node.trimEmptyElement(lexer, list); 1503 } 1504 1505 }; 1506 1507 public static class ParsePre implements Parser { 1508 1509 public void parse( Lexer lexer, Node pre, short mode ) 1510 { 1511 Node node, parent; 1512 TagTable tt = lexer.configuration.tt; 1513 1514 if ((pre.tag.model & Dict.CM_EMPTY) != 0) 1515 return; 1516 1517 if ((pre.tag.model & Dict.CM_OBSOLETE) != 0) 1518 Node.coerceNode(lexer, pre, tt.tagPre); 1519 1520 lexer.inlineDup( null); 1521 1522 while (true) 1523 { 1524 node = lexer.getToken(Lexer.Preformatted); 1525 if (node == null) break; 1526 if (node.tag == pre.tag && node.type == Node.EndTag) 1527 { 1528 Node.trimSpaces(lexer, pre); 1529 pre.closed = true; 1530 Node.trimEmptyElement(lexer, pre); 1531 return; 1532 } 1533 1534 if (node.tag == tt.tagHtml) 1535 { 1536 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 1537 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); 1538 1539 continue; 1540 } 1541 1542 if (node.type == Node.TextNode) 1543 { 1544 1545 if (pre.content == null) 1546 { 1547 if (node.textarray[node.start] == (byte)'\n') 1548 ++node.start; 1549 1550 if (node.start >= node.end) 1551 { 1552 continue; 1553 } 1554 } 1555 1556 Node.insertNodeAtEnd(pre, node); 1557 continue; 1558 } 1559 1560 1561 if (Node.insertMisc(pre, node)) 1562 continue; 1563 1564 1565 if (node.tag == null || node.tag == tt.tagParam) 1566 { 1567 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); 1568 continue; 1569 } 1570 1571 if (node.tag == tt.tagP) 1572 { 1573 if (node.type == Node.StartTag) 1574 { 1575 Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF); 1576 1577 1578 Node.trimSpaces(lexer, pre); 1579 1580 1581 Node.coerceNode(lexer, node, tt.tagBr); 1582 Node.insertNodeAtEnd(pre, node); 1583 } 1584 else 1585 { 1586 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); 1587 } 1588 continue; 1589 } 1590 1591 if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0)) 1592 { 1593 moveToHead(lexer, pre, node); 1594 continue; 1595 } 1596 1597 1601 if (node.type == Node.EndTag) 1602 { 1603 if (node.tag == tt.tagForm) 1604 { 1605 lexer.badForm = 1; 1606 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); 1607 continue; 1608 } 1609 1610 for (parent = pre.parent; 1611 parent != null; parent = parent.parent) 1612 { 1613 if (node.tag == parent.tag) 1614 { 1615 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE); 1616 1617 lexer.ungetToken(); 1618 Node.trimSpaces(lexer, pre); 1619 Node.trimEmptyElement(lexer, pre); 1620 return; 1621 } 1622 } 1623 } 1624 1625 1626 if (!((node.tag.model & Dict.CM_INLINE) != 0)) 1627 { 1628 if (node.type != Node.StartTag) 1629 { 1630 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); 1631 continue; 1632 } 1633 1634 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE); 1635 lexer.excludeBlocks = true; 1636 1637 1638 if ((node.tag.model & Dict.CM_LIST) != 0) 1639 { 1640 lexer.ungetToken(); 1641 node = lexer.inferredTag( "ul"); 1642 Node.addClass(node, "noindent"); 1643 } 1644 else if ((node.tag.model & Dict.CM_DEFLIST) != 0) 1645 { 1646 lexer.ungetToken(); 1647 node = lexer.inferredTag( "dl"); 1648 } 1649 else if ((node.tag.model & Dict.CM_TABLE) != 0) 1650 { 1651 lexer.ungetToken(); 1652 node = lexer.inferredTag( "table"); 1653 } 1654 1655 Node.insertNodeAfterElement(pre, node); 1656 pre = lexer.inferredTag( "pre"); 1657 Node.insertNodeAfterElement(node, pre); 1658 parseTag(lexer, node, Lexer.IgnoreWhitespace); 1659 lexer.excludeBlocks = false; 1660 continue; 1661 } 1662 1670 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 1671 { 1672 1673 if (node.tag == tt.tagBr) 1674 Node.trimSpaces(lexer, pre); 1675 1676 Node.insertNodeAtEnd(pre, node); 1677 parseTag(lexer, node, Lexer.Preformatted); 1678 continue; 1679 } 1680 1681 1682 Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); 1683 } 1684 1685 Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR); 1686 Node.trimEmptyElement(lexer, pre); 1687 } 1688 1689 }; 1690 1691 public static class ParseBlock implements Parser { 1692 1693 public void parse( Lexer lexer, Node element, short mode ) 1694 1699 { 1700 Node node, parent; 1701 boolean checkstack; 1702 int istackbase = 0; 1703 TagTable tt = lexer.configuration.tt; 1704 1705 checkstack = true; 1706 1707 if ((element.tag.model & Dict.CM_EMPTY) != 0) 1708 return; 1709 1710 if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm)) 1711 Report.warning(lexer, element, null, Report.ILLEGAL_NESTING); 1712 1713 1721 if ((element.tag.model & Dict.CM_OBJECT) != 0) 1722 { 1723 istackbase = lexer.istackbase; 1724 lexer.istackbase = lexer.istack.size(); 1725 } 1726 1727 if (!((element.tag.model & Dict.CM_MIXED) != 0)) 1728 lexer.inlineDup( null); 1729 1730 mode = Lexer.IgnoreWhitespace; 1731 1732 while (true) 1733 { 1734 node = lexer.getToken(mode ); 1735 if (node == null) break; 1736 1737 if (node.type == Node.EndTag && node.tag != null && 1738 (node.tag == element.tag || element.was == node.tag)) 1739 { 1740 1741 if ((element.tag.model & Dict.CM_OBJECT) != 0) 1742 { 1743 1744 while (lexer.istack.size() > lexer.istackbase) 1745 lexer.popInline( null); 1746 lexer.istackbase = istackbase; 1747 } 1748 1749 element.closed = true; 1750 Node.trimSpaces(lexer, element); 1751 Node.trimEmptyElement(lexer, element); 1752 return; 1753 } 1754 1755 if (node.tag == tt.tagHtml || 1756 node.tag == tt.tagHead || 1757 node.tag == tt.tagBody) 1758 { 1759 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 1760 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1761 1762 continue; 1763 } 1764 1765 if (node.type == Node.EndTag) 1766 { 1767 if (node.tag == null) 1768 { 1769 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1770 1771 continue; 1772 } 1773 else if (node.tag == tt.tagBr) 1774 node.type = Node.StartTag; 1775 else if (node.tag == tt.tagP) 1776 { 1777 Node.coerceNode(lexer, node, tt.tagBr); 1778 Node.insertNodeAtEnd(element, node); 1779 node = lexer.inferredTag("br"); 1780 } 1781 else 1782 { 1783 1787 for (parent = element.parent; 1788 parent != null; parent = parent.parent) 1789 { 1790 if (node.tag == parent.tag) 1791 { 1792 if (!((element.tag.model & Dict.CM_OPT) != 0)) 1793 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); 1794 1795 lexer.ungetToken(); 1796 1797 if ((element.tag.model & Dict.CM_OBJECT) != 0) 1798 { 1799 1800 while (lexer.istack.size() > lexer.istackbase) 1801 lexer.popInline( null); 1802 lexer.istackbase = istackbase; 1803 } 1804 1805 Node.trimSpaces(lexer, element); 1806 Node.trimEmptyElement(lexer, element); 1807 return; 1808 } 1809 } 1810 1811 if (lexer.exiled 1812 && node.tag.model != 0 1813 && (node.tag.model & Dict.CM_TABLE) != 0) 1814 { 1815 lexer.ungetToken(); 1816 Node.trimSpaces(lexer, element); 1817 Node.trimEmptyElement(lexer, element); 1818 return; 1819 } 1820 } 1821 } 1822 1823 1824 if (node.type == Node.TextNode) 1825 { 1826 boolean iswhitenode = false; 1827 1828 if (node.type == Node.TextNode && 1829 node.end <= node.start + 1 && 1830 lexer.lexbuf[node.start] == (byte)' ') 1831 iswhitenode = true; 1832 1833 if (lexer.configuration.EncloseBlockText && !iswhitenode) 1834 { 1835 lexer.ungetToken(); 1836 node = lexer.inferredTag("p"); 1837 Node.insertNodeAtEnd(element, node); 1838 parseTag(lexer, node, Lexer.MixedContent); 1839 continue; 1840 } 1841 1842 if (checkstack) 1843 { 1844 checkstack = false; 1845 1846 if (!((element.tag.model & Dict.CM_MIXED) != 0)) 1847 { 1848 if (lexer.inlineDup( node) > 0) 1849 continue; 1850 } 1851 } 1852 1853 Node.insertNodeAtEnd(element, node); 1854 mode = Lexer.MixedContent; 1855 1859 lexer.versions &= ~Dict.VERS_HTML40_STRICT; 1860 continue; 1861 } 1862 1863 if (Node.insertMisc(element, node)) 1864 continue; 1865 1866 1867 if (node.tag == tt.tagParam) 1868 { 1869 if (((element.tag.model & Dict.CM_PARAM) != 0) && 1870 (node.type == Node.StartTag || node.type == Node.StartEndTag)) 1871 { 1872 Node.insertNodeAtEnd(element, node); 1873 continue; 1874 } 1875 1876 1877 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1878 continue; 1879 } 1880 1881 1882 if (node.tag == tt.tagArea) 1883 { 1884 if ((element.tag == tt.tagMap) && 1885 (node.type == Node.StartTag || node.type == Node.StartEndTag)) 1886 { 1887 Node.insertNodeAtEnd(element, node); 1888 continue; 1889 } 1890 1891 1892 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1893 continue; 1894 } 1895 1896 1897 if (node.tag == null) 1898 { 1899 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1900 continue; 1901 } 1902 1903 1913 1914 if (!((node.tag.model & Dict.CM_INLINE) != 0)) 1915 { 1916 if (node.type != Node.StartTag && node.type != Node.StartEndTag) 1917 { 1918 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 1919 continue; 1920 } 1921 1922 if (element.tag == tt.tagTd || element.tag == tt.tagTh) 1923 { 1924 1925 1926 if ((node.tag.model & Dict.CM_HEAD) != 0) 1927 { 1928 moveToHead(lexer, element, node); 1929 continue; 1930 } 1931 1932 if ((node.tag.model & Dict.CM_LIST) != 0) 1933 { 1934 lexer.ungetToken(); 1935 node = lexer.inferredTag( "ul"); 1936 Node.addClass(node, "noindent"); 1937 lexer.excludeBlocks = true; 1938 } 1939 else if ((node.tag.model & Dict.CM_DEFLIST) != 0) 1940 { 1941 lexer.ungetToken(); 1942 node = lexer.inferredTag( "dl"); 1943 lexer.excludeBlocks = true; 1944 } 1945 1946 1947 if (!((node.tag.model & Dict.CM_BLOCK) != 0)) 1948 { 1949 lexer.ungetToken(); 1950 Node.trimSpaces(lexer, element); 1951 Node.trimEmptyElement(lexer, element); 1952 return; 1953 } 1954 } 1955 else if ((node.tag.model & Dict.CM_BLOCK) != 0) 1956 { 1957 if (lexer.excludeBlocks) 1958 { 1959 if (!((element.tag.model & Dict.CM_OPT) != 0)) 1960 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); 1961 1962 lexer.ungetToken(); 1963 1964 if ((element.tag.model & Dict.CM_OBJECT) != 0) 1965 lexer.istackbase = istackbase; 1966 1967 Node.trimSpaces(lexer, element); 1968 Node.trimEmptyElement(lexer, element); 1969 return; 1970 } 1971 } 1972 else 1973 { 1974 if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit) 1975 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); 1976 1977 if ((node.tag.model & Dict.CM_HEAD) != 0) 1978 { 1979 moveToHead(lexer, element, node); 1980 continue; 1981 } 1982 1983 lexer.ungetToken(); 1984 1985 if ((node.tag.model & Dict.CM_LIST) != 0) 1986 { 1987 if (element.parent != null && element.parent.tag != null && 1988 element.parent.tag.parser == getParseList()) 1989 { 1990 Node.trimSpaces(lexer, element); 1991 Node.trimEmptyElement(lexer, element); 1992 return; 1993 } 1994 1995 node = lexer.inferredTag("ul"); 1996 Node.addClass(node, "noindent"); 1997 } 1998 else if ((node.tag.model & Dict.CM_DEFLIST) != 0) 1999 { 2000 if (element.parent.tag == tt.tagDl) 2001 { 2002 Node.trimSpaces(lexer, element); 2003 Node.trimEmptyElement(lexer, element); 2004 return; 2005 } 2006 2007 node = lexer.inferredTag("dl"); 2008 } 2009 else if ((node.tag.model & Dict.CM_TABLE) != 0 || 2010 (node.tag.model & Dict.CM_ROW) != 0) 2011 { 2012 node = lexer.inferredTag("table"); 2013 } 2014 else if ((element.tag.model & Dict.CM_OBJECT) != 0) 2015 { 2016 2017 while (lexer.istack.size() > lexer.istackbase) 2018 lexer.popInline( null); 2019 lexer.istackbase = istackbase; 2020 Node.trimSpaces(lexer, element); 2021 Node.trimEmptyElement(lexer, element); 2022 return; 2023 2024 } 2025 else 2026 { 2027 Node.trimSpaces(lexer, element); 2028 Node.trimEmptyElement(lexer, element); 2029 return; 2030 } 2031 } 2032 } 2033 2034 2035 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 2036 { 2037 if ((node.tag.model & Dict.CM_INLINE) != 0) 2038 { 2039 if (checkstack && !node.implicit) 2040 { 2041 checkstack = false; 2042 2043 if (lexer.inlineDup( node) > 0) 2044 continue; 2045 } 2046 2047 mode = Lexer.MixedContent; 2048 } 2049 else 2050 { 2051 checkstack = true; 2052 mode = Lexer.IgnoreWhitespace; 2053 } 2054 2055 2056 if (node.tag == tt.tagBr) 2057 Node.trimSpaces(lexer, element); 2058 2059 Node.insertNodeAtEnd(element, node); 2060 2061 if (node.implicit) 2062 Report.warning(lexer, element, node, Report.INSERTING_TAG); 2063 2064 parseTag(lexer, node, Lexer.IgnoreWhitespace ); 2065 continue; 2066 } 2067 2068 2069 if (node.type == Node.EndTag) 2070 lexer.popInline( node); 2071 2072 Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); 2073 } 2074 2075 if (!((element.tag.model & Dict.CM_OPT) != 0)) 2076 Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR); 2077 2078 if ((element.tag.model & Dict.CM_OBJECT) != 0) 2079 { 2080 2081 while (lexer.istack.size() > lexer.istackbase) 2082 lexer.popInline( null); 2083 lexer.istackbase = istackbase; 2084 } 2085 2086 Node.trimSpaces(lexer, element); 2087 Node.trimEmptyElement(lexer, element); 2088 } 2089 2090 }; 2091 2092 public static class ParseTableTag implements Parser { 2093 2094 public void parse( Lexer lexer, Node table, short mode ) 2095 { 2096 Node node, parent; 2097 int istackbase; 2098 TagTable tt = lexer.configuration.tt; 2099 2100 lexer.deferDup(); 2101 istackbase = lexer.istackbase; 2102 lexer.istackbase = lexer.istack.size(); 2103 2104 while (true) 2105 { 2106 node = lexer.getToken(Lexer.IgnoreWhitespace); 2107 if (node == null) break; 2108 if (node.tag == table.tag && node.type == Node.EndTag) 2109 { 2110 lexer.istackbase = istackbase; 2111 table.closed = true; 2112 Node.trimEmptyElement(lexer, table); 2113 return; 2114 } 2115 2116 2117 if (Node.insertMisc(table, node)) 2118 continue; 2119 2120 2121 if (node.tag == null && node.type != Node.TextNode) 2122 { 2123 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); 2124 continue; 2125 } 2126 2127 2128 2129 if (node.type != Node.EndTag) 2130 { 2131 if (node.tag == tt.tagTd || 2132 node.tag == tt.tagTh || 2133 node.tag == tt.tagTable) 2134 { 2135 lexer.ungetToken(); 2136 node = lexer.inferredTag( "tr"); 2137 Report.warning(lexer, table, node, Report.MISSING_STARTTAG); 2138 } 2139 else if (node.type == Node.TextNode 2140 || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) 2141 { 2142 Node.insertNodeBeforeElement(table, node); 2143 Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN); 2144 lexer.exiled = true; 2145 2146 2152 if (false) 2154 parseTag(lexer, node, Lexer.IgnoreWhitespace); 2155 2156 lexer.exiled = false; 2157 continue; 2158 } 2159 else if ((node.tag.model & Dict.CM_HEAD) != 0) 2160 { 2161 moveToHead(lexer, table, node); 2162 continue; 2163 } 2164 } 2165 2166 2170 if (node.type == Node.EndTag) 2171 { 2172 if (node.tag == tt.tagForm) 2173 { 2174 lexer.badForm = 1; 2175 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); 2176 continue; 2177 } 2178 2179 if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0) 2180 { 2181 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); 2182 continue; 2183 } 2184 2185 for (parent = table.parent; 2186 parent != null; parent = parent.parent) 2187 { 2188 if (node.tag == parent.tag) 2189 { 2190 Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE); 2191 lexer.ungetToken(); 2192 lexer.istackbase = istackbase; 2193 Node.trimEmptyElement(lexer, table); 2194 return; 2195 } 2196 } 2197 } 2198 2199 if (!((node.tag.model & Dict.CM_TABLE) != 0)) 2200 { 2201 lexer.ungetToken(); 2202 Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN); 2203 lexer.istackbase = istackbase; 2204 Node.trimEmptyElement(lexer, table); 2205 return; 2206 } 2207 2208 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 2209 { 2210 Node.insertNodeAtEnd(table, node);; 2211 parseTag(lexer, node, Lexer.IgnoreWhitespace); 2212 continue; 2213 } 2214 2215 2216 Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); 2217 } 2218 2219 Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR); 2220 Node.trimEmptyElement(lexer, table); 2221 lexer.istackbase = istackbase; 2222 } 2223 2224 }; 2225 2226 public static class ParseColGroup implements Parser { 2227 2228 public void parse( Lexer lexer, Node colgroup, short mode ) 2229 { 2230 Node node, parent; 2231 TagTable tt = lexer.configuration.tt; 2232 2233 if ((colgroup.tag.model & Dict.CM_EMPTY) != 0) 2234 return; 2235 2236 while (true) 2237 { 2238 node = lexer.getToken(Lexer.IgnoreWhitespace); 2239 if (node == null) break; 2240 if (node.tag == colgroup.tag && node.type == Node.EndTag) 2241 { 2242 colgroup.closed = true; 2243 return; 2244 } 2245 2246 2250 if (node.type == Node.EndTag) 2251 { 2252 if (node.tag == tt.tagForm) 2253 { 2254 lexer.badForm = 1; 2255 Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); 2256 continue; 2257 } 2258 2259 for (parent = colgroup.parent; 2260 parent != null; parent = parent.parent) 2261 { 2262 2263 if (node.tag == parent.tag) 2264 { 2265 lexer.ungetToken(); 2266 return; 2267 } 2268 } 2269 } 2270 2271 if (node.type == Node.TextNode) 2272 { 2273 lexer.ungetToken(); 2274 return; 2275 } 2276 2277 2278 if (Node.insertMisc(colgroup, node)) 2279 continue; 2280 2281 2282 if (node.tag == null) 2283 { 2284 Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); 2285 continue; 2286 } 2287 2288 if (node.tag != tt.tagCol) 2289 { 2290 lexer.ungetToken(); 2291 return; 2292 } 2293 2294 if (node.type == Node.EndTag) 2295 { 2296 Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); 2297 continue; 2298 } 2299 2300 2301 Node.insertNodeAtEnd(colgroup, node); 2302 parseTag(lexer, node, Lexer.IgnoreWhitespace); 2303 } 2304 } 2305 2306 }; 2307 2308 public static class ParseRowGroup implements Parser { 2309 2310 public void parse( Lexer lexer, Node rowgroup, short mode ) 2311 { 2312 Node node, parent; 2313 TagTable tt = lexer.configuration.tt; 2314 2315 if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0) 2316 return; 2317 2318 while (true) 2319 { 2320 node = lexer.getToken(Lexer.IgnoreWhitespace); 2321 if (node == null) break; 2322 if (node.tag == rowgroup.tag) 2323 { 2324 if (node.type == Node.EndTag) 2325 { 2326 rowgroup.closed = true; 2327 Node.trimEmptyElement(lexer, rowgroup); 2328 return; 2329 } 2330 2331 lexer.ungetToken(); 2332 return; 2333 } 2334 2335 2336 if (node.tag == tt.tagTable && node.type == Node.EndTag) 2337 { 2338 lexer.ungetToken(); 2339 Node.trimEmptyElement(lexer, rowgroup); 2340 return; 2341 } 2342 2343 2344 if (Node.insertMisc(rowgroup, node)) 2345 continue; 2346 2347 2348 if (node.tag == null && node.type != Node.TextNode) 2349 { 2350 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); 2351 continue; 2352 } 2353 2354 2359 2360 if (node.type != Node.EndTag) 2361 { 2362 if (node.tag == tt.tagTd || node.tag == tt.tagTh) 2363 { 2364 lexer.ungetToken(); 2365 node = lexer.inferredTag("tr"); 2366 Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG); 2367 } 2368 else if (node.type == Node.TextNode 2369 || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) 2370 { 2371 Node.moveBeforeTable(rowgroup, node, tt); 2372 Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN); 2373 lexer.exiled = true; 2374 2375 if (node.type != Node.TextNode) 2376 parseTag(lexer, node, Lexer.IgnoreWhitespace); 2377 2378 lexer.exiled = false; 2379 continue; 2380 } 2381 else if ((node.tag.model & Dict.CM_HEAD) != 0) 2382 { 2383 Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN); 2384 moveToHead(lexer, rowgroup, node); 2385 continue; 2386 } 2387 } 2388 2389 2393 if (node.type == Node.EndTag) 2394 { 2395 if (node.tag == tt.tagForm) 2396 { 2397 lexer.badForm = 1; 2398 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); 2399 continue; 2400 } 2401 2402 if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh) 2403 { 2404 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); 2405 continue; 2406 } 2407 2408 for (parent = rowgroup.parent; 2409 parent != null; parent = parent.parent) 2410 { 2411 if (node.tag == parent.tag) 2412 { 2413 lexer.ungetToken(); 2414 Node.trimEmptyElement(lexer, rowgroup); 2415 return; 2416 } 2417 } 2418 } 2419 2420 2424 if ((node.tag.model & Dict.CM_ROWGRP) != 0) 2425 { 2426 if (node.type != Node.EndTag) 2427 lexer.ungetToken(); 2428 2429 Node.trimEmptyElement(lexer, rowgroup); 2430 return; 2431 } 2432 2433 if (node.type == Node.EndTag) 2434 { 2435 Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); 2436 continue; 2437 } 2438 2439 if (!(node.tag == tt.tagTr)) 2440 { 2441 node = lexer.inferredTag( "tr"); 2442 Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG); 2443 lexer.ungetToken(); 2444 } 2445 2446 2447 Node.insertNodeAtEnd(rowgroup, node); 2448 parseTag(lexer, node, Lexer.IgnoreWhitespace); 2449 } 2450 2451 Node.trimEmptyElement(lexer, rowgroup); 2452 } 2453 2454 }; 2455 2456 public static class ParseRow implements Parser { 2457 2458 public void parse( Lexer lexer, Node row, short mode ) 2459 { 2460 Node node, parent; 2461 boolean exclude_state; 2462 TagTable tt = lexer.configuration.tt; 2463 2464 if ((row.tag.model & Dict.CM_EMPTY) != 0) 2465 return; 2466 2467 while (true) 2468 { 2469 node = lexer.getToken(Lexer.IgnoreWhitespace); 2470 if (node == null) break; 2471 if (node.tag == row.tag) 2472 { 2473 if (node.type == Node.EndTag) 2474 { 2475 row.closed = true; 2476 Node.fixEmptyRow(lexer, row); 2477 return; 2478 } 2479 2480 lexer.ungetToken(); 2481 Node.fixEmptyRow(lexer, row); 2482 return; 2483 } 2484 2485 2489 if (node.type == Node.EndTag) 2490 { 2491 if (node.tag == tt.tagForm) 2492 { 2493 lexer.badForm = 1; 2494 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); 2495 continue; 2496 } 2497 2498 if (node.tag == tt.tagTd || node.tag == tt.tagTh) 2499 { 2500 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); 2501 continue; 2502 } 2503 2504 for (parent = row.parent; 2505 parent != null; parent = parent.parent) 2506 { 2507 if (node.tag == parent.tag) 2508 { 2509 lexer.ungetToken(); 2510 Node.trimEmptyElement(lexer, row); 2511 return; 2512 } 2513 } 2514 } 2515 2516 2517 if (Node.insertMisc(row, node)) 2518 continue; 2519 2520 2521 if (node.tag == null && node.type != Node.TextNode) 2522 { 2523 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); 2524 continue; 2525 } 2526 2527 2528 if (node.tag == tt.tagTable) 2529 { 2530 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); 2531 continue; 2532 } 2533 2534 2535 if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0) 2536 { 2537 lexer.ungetToken(); 2538 Node.trimEmptyElement(lexer, row); 2539 return; 2540 } 2541 2542 if (node.type == Node.EndTag) 2543 { 2544 Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); 2545 continue; 2546 } 2547 2548 2552 2553 if (node.type != Node.EndTag) 2554 { 2555 if (node.tag == tt.tagForm) 2556 { 2557 lexer.ungetToken(); 2558 node = lexer.inferredTag("td"); 2559 Report.warning(lexer, row, node, Report.MISSING_STARTTAG); 2560 } 2561 else if (node.type == Node.TextNode 2562 || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) 2563 { 2564 Node.moveBeforeTable(row, node, tt); 2565 Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); 2566 lexer.exiled = true; 2567 2568 if (node.type != Node.TextNode) 2569 parseTag(lexer, node, Lexer.IgnoreWhitespace); 2570 2571 lexer.exiled = false; 2572 continue; 2573 } 2574 else if ((node.tag.model & Dict.CM_HEAD) != 0) 2575 { 2576 Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); 2577 moveToHead(lexer, row, node); 2578 continue; 2579 } 2580 } 2581 2582 if (!(node.tag == tt.tagTd || node.tag == tt.tagTh)) 2583 { 2584 Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); 2585 continue; 2586 } 2587 2588 2589 Node.insertNodeAtEnd(row, node); 2590 exclude_state = lexer.excludeBlocks; 2591 lexer.excludeBlocks = false; 2592 parseTag(lexer, node, Lexer.IgnoreWhitespace); 2593 lexer.excludeBlocks = exclude_state; 2594 2595 2596 2597 while (lexer.istack.size() > lexer.istackbase) 2598 lexer.popInline( null); 2599 } 2600 2601 Node.trimEmptyElement(lexer, row); 2602 } 2603 2604 }; 2605 2606 public static class ParseNoFrames implements Parser { 2607 2608 public void parse( Lexer lexer, Node noframes, short mode ) 2609 { 2610 Node node; 2611 boolean checkstack; 2612 TagTable tt = lexer.configuration.tt; 2613 2614 lexer.badAccess |= Report.USING_NOFRAMES; 2615 mode = Lexer.IgnoreWhitespace; 2616 checkstack = true; 2617 2618 while (true) 2619 { 2620 node = lexer.getToken(mode); 2621 if (node == null) break; 2622 if (node.tag == noframes.tag && node.type == Node.EndTag) 2623 { 2624 noframes.closed = true; 2625 Node.trimSpaces(lexer, noframes); 2626 return; 2627 } 2628 2629 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)) 2630 { 2631 Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE); 2632 Node.trimSpaces(lexer, noframes); 2633 lexer.ungetToken(); 2634 return; 2635 } 2636 2637 if (node.tag == tt.tagHtml) 2638 { 2639 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 2640 Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED); 2641 2642 continue; 2643 } 2644 2645 2646 if (Node.insertMisc(noframes, node)) 2647 continue; 2648 2649 if (node.tag == tt.tagBody && node.type == Node.StartTag) 2650 { 2651 Node.insertNodeAtEnd(noframes, node); 2652 parseTag(lexer, node, Lexer.IgnoreWhitespace ); 2653 continue; 2654 } 2655 2656 2657 if (node.type == Node.TextNode || node.tag != null) 2658 { 2659 lexer.ungetToken(); 2660 node = lexer.inferredTag("body"); 2661 if (lexer.configuration.XmlOut) 2662 Report.warning(lexer, noframes, node, Report.INSERTING_TAG); 2663 Node.insertNodeAtEnd(noframes, node); 2664 parseTag(lexer, node, Lexer.IgnoreWhitespace ); 2665 continue; 2666 } 2667 2668 Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED); 2669 } 2670 2671 Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR); 2672 } 2673 2674 }; 2675 2676 public static class ParseSelect implements Parser { 2677 2678 public void parse( Lexer lexer, Node field, short mode ) 2679 { 2680 Node node; 2681 TagTable tt = lexer.configuration.tt; 2682 2683 lexer.insert = -1; 2684 2685 while (true) 2686 { 2687 node = lexer.getToken(Lexer.IgnoreWhitespace); 2688 if (node == null) break; 2689 if (node.tag == field.tag && node.type == Node.EndTag) 2690 { 2691 field.closed = true; 2692 Node.trimSpaces(lexer, field); 2693 return; 2694 } 2695 2696 2697 if (Node.insertMisc(field, node)) 2698 continue; 2699 2700 if (node.type == Node.StartTag && 2701 (node.tag == tt.tagOption || 2702 node.tag == tt.tagOptgroup || 2703 node.tag == tt.tagScript)) 2704 { 2705 Node.insertNodeAtEnd(field, node); 2706 parseTag(lexer, node, Lexer.IgnoreWhitespace); 2707 continue; 2708 } 2709 2710 2711 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); 2712 } 2713 2714 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR); 2715 } 2716 2717 }; 2718 2719 public static class ParseText implements Parser { 2720 2721 public void parse( Lexer lexer, Node field, short mode ) 2722 { 2723 Node node; 2724 TagTable tt = lexer.configuration.tt; 2725 2726 lexer.insert = -1; 2727 2728 if (field.tag == tt.tagTextarea) 2729 mode = Lexer.Preformatted; 2730 2731 while (true) 2732 { 2733 node = lexer.getToken(mode); 2734 if (node == null) break; 2735 if (node.tag == field.tag && node.type == Node.EndTag) 2736 { 2737 field.closed = true; 2738 Node.trimSpaces(lexer, field); 2739 return; 2740 } 2741 2742 2743 if (Node.insertMisc(field, node)) 2744 continue; 2745 2746 if (node.type == Node.TextNode) 2747 { 2748 2749 if (field.content == null && !((mode & Lexer.Preformatted) != 0)) 2750 Node.trimSpaces(lexer, field); 2751 2752 if (node.start >= node.end) 2753 { 2754 continue; 2755 } 2756 2757 Node.insertNodeAtEnd(field, node); 2758 continue; 2759 } 2760 2761 if (node.tag == tt.tagFont) 2762 { 2763 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); 2764 continue; 2765 } 2766 2767 2768 if (!((field.tag.model & Dict.CM_OPT) != 0)) 2769 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE); 2770 2771 lexer.ungetToken(); 2772 Node.trimSpaces(lexer, field); 2773 return; 2774 } 2775 2776 if (!((field.tag.model & Dict.CM_OPT) != 0)) 2777 Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR); 2778 } 2779 2780 }; 2781 2782 public static class ParseOptGroup implements Parser { 2783 2784 public void parse( Lexer lexer, Node field, short mode ) 2785 { 2786 Node node; 2787 TagTable tt = lexer.configuration.tt; 2788 2789 lexer.insert = -1; 2790 2791 while (true) 2792 { 2793 node = lexer.getToken(Lexer.IgnoreWhitespace); 2794 if (node == null) break; 2795 if (node.tag == field.tag && node.type == Node.EndTag) 2796 { 2797 field.closed = true; 2798 Node.trimSpaces(lexer, field); 2799 return; 2800 } 2801 2802 2803 if (Node.insertMisc(field, node)) 2804 continue; 2805 2806 if (node.type == Node.StartTag && 2807 (node.tag == tt.tagOption || node.tag == tt.tagOptgroup)) 2808 { 2809 if (node.tag == tt.tagOptgroup) 2810 Report.warning(lexer, field, node, Report.CANT_BE_NESTED); 2811 2812 Node.insertNodeAtEnd(field, node); 2813 parseTag(lexer, node, Lexer.MixedContent); 2814 continue; 2815 } 2816 2817 2818 Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); 2819 } 2820 } 2821 2822 }; 2823 2824 public static Parser getParseHTML() 2825 { 2826 return _parseHTML; 2827 } 2828 2829 public static Parser getParseHead() 2830 { 2831 return _parseHead; 2832 } 2833 2834 public static Parser getParseTitle() 2835 { 2836 return _parseTitle; 2837 } 2838 2839 public static Parser getParseScript() 2840 { 2841 return _parseScript; 2842 } 2843 2844 public static Parser getParseBody() 2845 { 2846 return _parseBody; 2847 } 2848 2849 public static Parser getParseFrameSet() 2850 { 2851 return _parseFrameSet; 2852 } 2853 2854 public static Parser getParseInline() 2855 { 2856 return _parseInline; 2857 } 2858 2859 public static Parser getParseList() 2860 { 2861 return _parseList; 2862 } 2863 2864 public static Parser getParseDefList() 2865 { 2866 return _parseDefList; 2867 } 2868 2869 public static Parser getParsePre() 2870 { 2871 return _parsePre; 2872 } 2873 2874 public static Parser getParseBlock() 2875 { 2876 return _parseBlock; 2877 } 2878 2879 public static Parser getParseTableTag() 2880 { 2881 return _parseTableTag; 2882 } 2883 2884 public static Parser getParseColGroup() 2885 { 2886 return _parseColGroup; 2887 } 2888 2889 public static Parser getParseRowGroup() 2890 { 2891 return _parseRowGroup; 2892 } 2893 2894 public static Parser getParseRow() 2895 { 2896 return _parseRow; 2897 } 2898 2899 public static Parser getParseNoFrames() 2900 { 2901 return _parseNoFrames; 2902 } 2903 2904 public static Parser getParseSelect() 2905 { 2906 return _parseSelect; 2907 } 2908 2909 public static Parser getParseText() 2910 { 2911 return _parseText; 2912 } 2913 2914 public static Parser getParseOptGroup() 2915 { 2916 return _parseOptGroup; 2917 } 2918 2919 2920 private static Parser _parseHTML = new ParseHTML(); 2921 private static Parser _parseHead = new ParseHead(); 2922 private static Parser _parseTitle = new ParseTitle(); 2923 private static Parser _parseScript = new ParseScript(); 2924 private static Parser _parseBody = new ParseBody(); 2925 private static Parser _parseFrameSet = new ParseFrameSet(); 2926 private static Parser _parseInline = new ParseInline(); 2927 private static Parser _parseList = new ParseList(); 2928 private static Parser _parseDefList = new ParseDefList(); 2929 private static Parser _parsePre = new ParsePre(); 2930 private static Parser _parseBlock = new ParseBlock(); 2931 private static Parser _parseTableTag = new ParseTableTag(); 2932 private static Parser _parseColGroup = new ParseColGroup(); 2933 private static Parser _parseRowGroup = new ParseRowGroup(); 2934 private static Parser _parseRow = new ParseRow(); 2935 private static Parser _parseNoFrames = new ParseNoFrames(); 2936 private static Parser _parseSelect = new ParseSelect(); 2937 private static Parser _parseText = new ParseText(); 2938 private static Parser _parseOptGroup = new ParseOptGroup(); 2939 2940 2943 public static Node parseDocument(Lexer lexer) 2944 { 2945 Node node, document, html; 2946 Node doctype = null; 2947 TagTable tt = lexer.configuration.tt; 2948 2949 document = lexer.newNode(); 2950 document.type = Node.RootNode; 2951 2952 while (true) 2953 { 2954 node = lexer.getToken(Lexer.IgnoreWhitespace); 2955 if (node == null) break; 2956 2957 2958 if (Node.insertMisc(document, node)) 2959 continue; 2960 2961 if (node.type == Node.DocTypeTag) 2962 { 2963 if (doctype == null) 2964 { 2965 Node.insertNodeAtEnd(document, node); 2966 doctype = node; 2967 } 2968 else 2969 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); 2970 continue; 2971 } 2972 2973 if (node.type == Node.EndTag) 2974 { 2975 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); continue; 2977 } 2978 2979 if (node.type != Node.StartTag || node.tag != tt.tagHtml) 2980 { 2981 lexer.ungetToken(); 2982 html = lexer.inferredTag("html"); 2983 } 2984 else 2985 html = node; 2986 2987 Node.insertNodeAtEnd(document, html); 2988 getParseHTML().parse(lexer, html, (short)0); break; 2990 } 2991 2992 return document; 2993 } 2994 2995 3012 3013 public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt) 3014 { 3015 AttVal attribute; 3016 3017 3018 for (attribute = element.attributes; attribute != null; attribute = attribute.next) 3019 { 3020 if (attribute.attribute.equals("xml:space")) 3021 { 3022 if (attribute.value.equals("preserve")) 3023 return true; 3024 3025 return false; 3026 } 3027 } 3028 3029 3030 if (Lexer.wstrcasecmp(element.element, "pre") == 0 3031 || Lexer.wstrcasecmp(element.element, "script") == 0 3032 || Lexer.wstrcasecmp(element.element, "style") == 0) 3033 return true; 3034 3035 if ( (tt != null) && (tt.findParser(element) == getParsePre()) ) 3036 return true; 3037 3038 3039 if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0) 3040 return true; 3041 3042 return false; 3043 } 3044 3045 3048 public static void parseXMLElement(Lexer lexer, Node element, short mode) 3049 { 3050 Node node; 3051 3052 3053 3054 if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0) 3055 return; 3056 3057 3058 3059 if (XMLPreserveWhiteSpace(element, lexer.configuration.tt)) 3060 mode = Lexer.Preformatted; 3061 3062 while (true) 3063 { 3064 node = lexer.getToken(mode); 3065 if (node == null) break; 3066 if (node.type == Node.EndTag && node.element.equals(element.element)) 3067 { 3068 element.closed = true; 3069 break; 3070 } 3071 3072 3073 if (node.type == Node.EndTag) 3074 { 3075 Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG); 3076 continue; 3077 } 3078 3079 3080 if (node.type == Node.StartTag) 3081 parseXMLElement(lexer, node, mode); 3082 3083 Node.insertNodeAtEnd(element, node); 3084 } 3085 3086 3090 3091 node = element.content; 3092 3093 if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted) 3094 { 3095 if (node.textarray[node.start] == (byte)' ') 3096 { 3097 node.start++; 3098 3099 if (node.start >= node.end) 3100 Node.discardElement(node); 3101 } 3102 } 3103 3104 3108 3109 node = element.last; 3110 3111 if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted) 3112 { 3113 if (node.textarray[node.end - 1] == (byte)' ') 3114 { 3115 node.end--; 3116 3117 if (node.start >= node.end) 3118 Node.discardElement(node); 3119 } 3120 } 3121 } 3122 3123 public static Node parseXMLDocument(Lexer lexer) 3124 { 3125 Node node, document, doctype; 3126 3127 document = lexer.newNode(); 3128 document.type = Node.RootNode; 3129 doctype = null; 3130 lexer.configuration.XmlTags = true; 3131 3132 while (true) 3133 { 3134 node = lexer.getToken(Lexer.IgnoreWhitespace); 3135 if (node == null) break; 3136 3137 if (node.type == Node.EndTag) 3138 { 3139 Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG); 3140 continue; 3141 } 3142 3143 3144 if (Node.insertMisc(document, node)) 3145 continue; 3146 3147 if (node.type == Node.DocTypeTag) 3148 { 3149 if (doctype == null) 3150 { 3151 Node.insertNodeAtEnd(document, node); 3152 doctype = node; 3153 } 3154 else 3155 Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); continue; 3157 } 3158 3159 3160 if (node.type == Node.StartTag) 3161 { 3162 Node.insertNodeAtEnd(document, node); 3163 parseXMLElement(lexer, node, Lexer.IgnoreWhitespace); 3164 } 3165 3166 } 3167 3168if (false) { 3170 node = document.findDocType(); 3171 3172 if (node != null) 3173 Node.discardElement(node); 3174} 3176 if (doctype != null && !lexer.checkDocTypeKeyWords(doctype)) 3177 Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE); 3178 3179 3180 if (lexer.configuration.XmlPi) 3181 lexer.fixXMLPI(document); 3182 3183 return document; 3184 } 3185 3186 public static boolean isJavaScript(Node node) 3187 { 3188 boolean result = false; 3189 AttVal attr; 3190 3191 if (node.attributes == null) 3192 return true; 3193 3194 for (attr = node.attributes; attr != null; attr = attr.next) 3195 { 3196 if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0 3197 || Lexer.wstrcasecmp(attr.attribute, "type") == 0) 3198 && Lexer.wsubstr(attr.value, "javascript")) 3199 result = true; 3200 } 3201 3202 return result; 3203 } 3204 3205} 3206 | Popular Tags |