1 5 6 package org.w3c.tidy; 7 8 33 34 57 58 import java.io.PrintWriter ; 59 import java.util.Stack ; 60 import java.util.Vector ; 61 62 public class Lexer { 63 64 65 public StreamIn in; 66 public PrintWriter errout; 67 public short badAccess; 68 public short badLayout; 69 public short badChars; 70 public short badForm; 71 public short warnings; 72 public short errors; 73 public int lines; 74 public int columns; 75 public boolean waswhite; 76 public boolean pushed; 77 public boolean insertspace; 78 public boolean excludeBlocks; 79 public boolean exiled; 80 public boolean isvoyager; 81 public short versions; 82 public int doctype; 83 public boolean badDoctype; 84 public int txtstart; 85 public int txtend; 86 public short state; 87 public Node token; 88 89 98 public byte[] lexbuf; 99 public int lexlength; 100 public int lexsize; 101 102 103 public Node inode; 104 public int insert; 105 public Stack istack; 106 public int istackbase; 107 108 public Style styles; 109 110 public Configuration configuration; 111 protected int seenBodyEndTag; 112 private Vector nodeList; 113 114 public Lexer(StreamIn in, Configuration configuration) 115 { 116 this.in = in; 117 this.lines = 1; 118 this.columns = 1; 119 this.state = LEX_CONTENT; 120 this.badAccess = 0; 121 this.badLayout = 0; 122 this.badChars = 0; 123 this.badForm = 0; 124 this.warnings = 0; 125 this.errors = 0; 126 this.waswhite = false; 127 this.pushed = false; 128 this.insertspace = false; 129 this.exiled = false; 130 this.isvoyager = false; 131 this.versions = Dict.VERS_EVERYTHING; 132 this.doctype = Dict.VERS_UNKNOWN; 133 this.badDoctype = false; 134 this.txtstart = 0; 135 this.txtend = 0; 136 this.token = null; 137 this.lexbuf = null; 138 this.lexlength = 0; 139 this.lexsize = 0; 140 this.inode = null; 141 this.insert = -1; 142 this.istack = new Stack (); 143 this.istackbase = 0; 144 this.styles = null; 145 this.configuration = configuration; 146 this.seenBodyEndTag = 0; 147 this.nodeList = new Vector (); 148 } 149 150 public Node newNode() 151 { 152 Node node = new Node(); 153 nodeList.addElement(node); 154 return node; 155 } 156 157 public Node newNode(short type, byte[] textarray, int start, int end) 158 { 159 Node node = new Node(type, textarray, start, end); 160 nodeList.addElement(node); 161 return node; 162 } 163 164 public Node newNode(short type, byte[] textarray, int start, int end, String element) 165 { 166 Node node = new Node(type, textarray, start, end, element, configuration.tt); 167 nodeList.addElement(node); 168 return node; 169 } 170 171 public Node cloneNode(Node node) 172 { 173 Node cnode = (Node)node.clone(); 174 nodeList.addElement(cnode); 175 for (AttVal att = cnode.attributes; att != null; att = att.next) { 176 if (att.asp != null) 177 nodeList.addElement(att.asp); 178 if (att.php != null) 179 nodeList.addElement(att.php); 180 } 181 return cnode; 182 } 183 184 public AttVal cloneAttributes(AttVal attrs) 185 { 186 AttVal cattrs = (AttVal)attrs.clone(); 187 for (AttVal att = cattrs; att != null; att = att.next) { 188 if (att.asp != null) 189 nodeList.addElement(att.asp); 190 if (att.php != null) 191 nodeList.addElement(att.php); 192 } 193 return cattrs; 194 } 195 196 protected void updateNodeTextArrays(byte[] oldtextarray, byte[] newtextarray) 197 { 198 Node node; 199 for (int i = 0; i < nodeList.size(); i++) { 200 node = (Node)(nodeList.elementAt(i)); 201 if (node.textarray == oldtextarray) 202 node.textarray = newtextarray; 203 } 204 } 205 206 207 public Node newLineNode() 208 { 209 Node node = newNode(); 210 211 node.textarray = this.lexbuf; 212 node.start = this.lexsize; 213 addCharToLexer((int)'\n'); 214 node.end = this.lexsize; 215 return node; 216 } 217 218 222 public static byte[] getBytes(String str) { 223 try { 224 return str.getBytes("UTF8"); 225 } catch (java.io.UnsupportedEncodingException e) { 226 throw new Error ("string to UTF-8 conversion failed: " + e.getMessage()); 227 } 228 } 229 230 public static String getString(byte[] bytes, int offset, int length) { 231 try { 232 return new String (bytes, offset, length, "UTF8"); 233 } catch (java.io.UnsupportedEncodingException e) { 234 throw new Error ("UTF-8 to string conversion failed: " + e.getMessage()); 235 } 236 } 237 238 public boolean endOfInput() 239 { 240 return this.in.isEndOfStream(); 241 } 242 243 public void addByte(int c) 244 { 245 if (this.lexsize + 1 >= this.lexlength) 246 { 247 while (this.lexsize + 1 >= this.lexlength) 248 { 249 if (this.lexlength == 0) 250 this.lexlength = 8192; 251 else 252 this.lexlength = this.lexlength * 2; 253 } 254 255 byte[] temp = this.lexbuf; 256 this.lexbuf = new byte[ this.lexlength ]; 257 if (temp != null) 258 { 259 System.arraycopy( temp, 0, this.lexbuf, 0, temp.length ); 260 updateNodeTextArrays(temp, this.lexbuf); 261 } 262 } 263 264 this.lexbuf[this.lexsize++] = (byte)c; 265 this.lexbuf[this.lexsize] = (byte)'\0'; 266 } 267 268 public void changeChar(byte c) 269 { 270 if (this.lexsize > 0) 271 { 272 this.lexbuf[this.lexsize-1] = c; 273 } 274 } 275 276 277 public void addCharToLexer(int c) 278 { 279 if (c < 128) 280 addByte(c); 281 else if (c <= 0x7FF) 282 { 283 addByte(0xC0 | (c >> 6)); 284 addByte(0x80 | (c & 0x3F)); 285 } 286 else if (c <= 0xFFFF) 287 { 288 addByte(0xE0 | (c >> 12)); 289 addByte(0x80 | ((c >> 6) & 0x3F)); 290 addByte(0x80 | (c & 0x3F)); 291 } 292 else if (c <= 0x1FFFFF) 293 { 294 addByte(0xF0 | (c >> 18)); 295 addByte(0x80 | ((c >> 12) & 0x3F)); 296 addByte(0x80 | ((c >> 6) & 0x3F)); 297 addByte(0x80 | (c & 0x3F)); 298 } 299 else 300 { 301 addByte(0xF8 | (c >> 24)); 302 addByte(0x80 | ((c >> 18) & 0x3F)); 303 addByte(0x80 | ((c >> 12) & 0x3F)); 304 addByte(0x80 | ((c >> 6) & 0x3F)); 305 addByte(0x80 | (c & 0x3F)); 306 } 307 } 308 309 public void addStringToLexer(String str) 310 { 311 for ( int i = 0; i < str.length(); i++ ) { 312 addCharToLexer( (int)str.charAt(i) ); 313 } 314 } 315 316 327 public void parseEntity(short mode) 328 { 329 short map; 330 int start; 331 boolean first = true; 332 boolean semicolon = false; 333 boolean numeric = false; 334 int c, ch, startcol; 335 String str; 336 337 start = this.lexsize - 1; 338 startcol = this.in.curcol - 1; 339 340 while (true) 341 { 342 c = this.in.readChar(); 343 if (c == StreamIn.EndOfStream) break; 344 if (c == ';') 345 { 346 semicolon = true; 347 break; 348 } 349 350 if (first && c == '#') 351 { 352 addCharToLexer(c); 353 first = false; 354 numeric = true; 355 continue; 356 } 357 358 first = false; 359 map = MAP((char)c); 360 361 365 if (numeric && ((c == 'x') || ((map & DIGIT) != 0))) 366 { 367 addCharToLexer(c); 368 continue; 369 } 370 if (!numeric && ((map & NAMECHAR) != 0)) 371 { 372 addCharToLexer(c); 373 continue; 374 } 375 376 377 378 this.in.ungetChar(c); 379 break; 380 } 381 382 str = getString( this.lexbuf, start, this.lexsize - start ); 383 ch = EntityTable.getDefaultEntityTable().entityCode( str ); 384 385 386 if (ch <= 0) 387 { 388 389 this.lines = this.in.curline; 390 this.columns = startcol; 391 392 if (this.lexsize > start +1 ) 393 { 394 Report.entityError(this, Report.UNKNOWN_ENTITY, str, ch); 395 396 if (semicolon) 397 addCharToLexer(';'); 398 } 399 else 400 { 401 Report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch); 402 } 403 } 404 else 405 { 406 if (c != ';') 407 { 408 409 this.lines = this.in.curline; 410 this.columns = startcol; 411 Report.entityError(this, Report.MISSING_SEMICOLON, str, c); 412 } 413 414 this.lexsize = start; 415 416 if (ch == 160 && (mode & Preformatted) != 0) 417 ch = ' '; 418 419 addCharToLexer(ch); 420 421 if (ch == '&' && !this.configuration.QuoteAmpersand) 422 { 423 addCharToLexer('a'); 424 addCharToLexer('m'); 425 addCharToLexer('p'); 426 addCharToLexer(';'); 427 } 428 } 429 } 430 431 public char parseTagName() 432 { 433 short map; 434 int c; 435 436 437 438 c = this.lexbuf[this.txtstart]; 439 map = MAP((char)c); 440 441 if (!this.configuration.XmlTags && (map & UPPERCASE) != 0) 442 { 443 c += (int)((int)'a' - (int)'A'); 444 this.lexbuf[this.txtstart] = (byte)c; 445 } 446 447 while (true) 448 { 449 c = this.in.readChar(); 450 if (c == StreamIn.EndOfStream) break; 451 map = MAP((char)c); 452 453 if ((map & NAMECHAR) == 0) 454 break; 455 456 457 458 if (!this.configuration.XmlTags && (map & UPPERCASE) != 0) 459 c += (int)((int)'a' - (int)'A'); 460 461 addCharToLexer(c); 462 } 463 464 this.txtend = this.lexsize; 465 return (char)c; 466 } 467 468 public void addStringLiteral(String str) 469 { 470 for ( int i = 0; i < str.length(); i++ ) { 471 addCharToLexer( (int)str.charAt(i) ); 472 } 473 } 474 475 476 public short HTMLVersion() 477 { 478 short versions; 479 480 versions = this.versions; 481 482 if ((versions & Dict.VERS_HTML20) != 0) 483 return Dict.VERS_HTML20; 484 485 if ((versions & Dict.VERS_HTML32) != 0) 486 return Dict.VERS_HTML32; 487 488 if ((versions & Dict.VERS_HTML40_STRICT) != 0) 489 return Dict.VERS_HTML40_STRICT; 490 491 if ((versions & Dict.VERS_HTML40_LOOSE) != 0) 492 return Dict.VERS_HTML40_LOOSE; 493 494 if ((versions & Dict.VERS_FRAMES) != 0) 495 return Dict.VERS_FRAMES; 496 497 return Dict.VERS_UNKNOWN; 498 } 499 500 public String HTMLVersionName() 501 { 502 short guessed; 503 int j; 504 505 guessed = apparentVersion(); 506 507 for (j = 0; j < W3CVersion.length; ++j) 508 { 509 if (guessed == W3CVersion[j].code) 510 { 511 if (this.isvoyager) 512 return W3CVersion[j].voyagerName; 513 514 return W3CVersion[j].name; 515 } 516 } 517 518 return null; 519 } 520 521 522 public boolean addGenerator(Node root) 523 { 524 AttVal attval; 525 Node node; 526 Node head = root.findHEAD(configuration.tt); 527 528 if (head != null) 529 { 530 for (node = head.content; node != null; node = node.next) 531 { 532 if (node.tag == configuration.tt.tagMeta) 533 { 534 attval = node.getAttrByName("name"); 535 536 if (attval != null && attval.value != null && 537 Lexer.wstrcasecmp(attval.value, "generator") == 0) 538 { 539 attval = node.getAttrByName("content"); 540 541 if (attval != null && attval.value != null && 542 attval.value.length() >= 9 && 543 Lexer.wstrcasecmp(attval.value.substring(0, 9), "HTML Tidy") == 0) 544 { 545 return false; 546 } 547 } 548 } 549 } 550 551 node = this.inferredTag("meta"); 552 node.addAttribute("content", "HTML Tidy, see www.w3.org"); 553 node.addAttribute("name", "generator"); 554 Node.insertNodeAtStart(head, node); 555 return true; 556 } 557 558 return false; 559 } 560 561 562 563 564 private static boolean findBadSubString(String s, String p, int len) 565 { 566 int n = s.length(); 567 int i = 0; 568 String ps; 569 570 while (n < len) 571 { 572 ps = p.substring(i, i + n); 573 if (wstrcasecmp(s, ps) == 0) 574 return (!ps.equals(s.substring(0, n))); 575 576 ++i; 577 --len; 578 } 579 580 return false; 581 } 582 583 public boolean checkDocTypeKeyWords(Node doctype) 584 { 585 int len = doctype.end - doctype.start; 586 String s = getString(this.lexbuf, doctype.start, len); 587 588 return !( 589 findBadSubString("SYSTEM", s, len) || 590 findBadSubString("PUBLIC", s, len) || 591 findBadSubString("//DTD", s, len) || 592 findBadSubString("//W3C", s, len) || 593 findBadSubString("//EN", s, len) 594 ); 595 } 596 597 598 public short findGivenVersion(Node doctype) 599 { 600 String p, s; 601 int i, j; 602 int len; 603 String str1; 604 String str2; 605 606 607 str1 = getString(this.lexbuf, doctype.start, 5); 608 if (wstrcasecmp(str1, "html ") != 0) 609 return 0; 610 611 if (!checkDocTypeKeyWords(doctype)) 612 Report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE); 613 614 615 str1 = getString(this.lexbuf, doctype.start + 5, 7); 616 if (wstrcasecmp(str1, "SYSTEM ") == 0) 617 { 618 619 if (!str1.substring(0, 6).equals("SYSTEM")) 620 System.arraycopy( getBytes("SYSTEM"), 0, 621 this.lexbuf, doctype.start + 5, 6 ); 622 return 0; 623 } 624 625 if (wstrcasecmp(str1, "PUBLIC ") == 0) 626 { 627 if (!str1.substring(0, 6).equals("PUBLIC")) 628 System.arraycopy( getBytes("PUBLIC "), 0, 629 this.lexbuf, doctype.start + 5, 6 ); 630 } 631 else 632 this.badDoctype = true; 633 634 for (i = doctype.start; i < doctype.end; ++i) 635 { 636 if (this.lexbuf[i] == (byte)'"') 637 { 638 str1 = getString( this.lexbuf, i + 1, 12 ); 639 str2 = getString( this.lexbuf, i + 1, 13 ); 640 if (str1.equals("-//W3C//DTD ")) 641 { 642 643 for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j); 644 len = j - i - 13; 645 p = getString( this.lexbuf, i + 13, len ); 646 647 for (j = 1; j < W3CVersion.length; ++j) 648 { 649 s = W3CVersion[j].name; 650 if (len == s.length() && s.equals(p)) 651 return W3CVersion[j].code; 652 } 653 654 655 } 656 else if (str2.equals("-//IETF//DTD ")) 657 { 658 659 for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j); 660 len = j - i - 14; 661 662 p = getString( this.lexbuf, i + 14, len ); 663 s = W3CVersion[0].name; 664 if (len == s.length() && s.equals(p)) 665 return W3CVersion[0].code; 666 667 668 } 669 break; 670 } 671 } 672 673 return 0; 674 } 675 676 public void fixHTMLNameSpace(Node root, String profile) 677 { 678 Node node; 679 AttVal prev, attr; 680 681 for (node = root.content; 682 node != null && node.tag != configuration.tt.tagHtml; node = node.next); 683 684 if (node != null) 685 { 686 prev = null; 687 688 for (attr = node.attributes; attr != null; attr = attr.next) 689 { 690 if (attr.attribute.equals("xmlns")) 691 break; 692 693 prev = attr; 694 } 695 696 if (attr != null) 697 { 698 if (!attr.value.equals(profile)) 699 { 700 Report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE); 701 attr.value = profile; 702 } 703 } 704 else 705 { 706 attr = new AttVal( node.attributes, null, (int)'"', 707 "xmlns", profile ); 708 attr.dict = 709 AttributeTable.getDefaultAttributeTable().findAttribute( attr ); 710 node.attributes = attr; 711 } 712 } 713 } 714 715 public boolean setXHTMLDocType(Node root) 716 { 717 String fpi = " "; 718 String sysid = ""; 719 String namespace = XHTML_NAMESPACE; 720 Node doctype; 721 722 doctype = root.findDocType(); 723 724 if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT) 725 { 726 if (doctype != null) 727 Node.discardElement(doctype); 728 return true; 729 } 730 731 if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO) 732 { 733 734 if ((this.versions & Dict.VERS_HTML40_STRICT) != 0) 735 { 736 fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; 737 sysid = voyager_strict; 738 } 739 else if ((this.versions & Dict.VERS_LOOSE) != 0) 740 { 741 fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; 742 sysid = voyager_loose; 743 } 744 else if ((this.versions & Dict.VERS_FRAMES) != 0) 745 { 746 fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN"; 747 sysid = voyager_frameset; 748 } 749 else 750 { 751 fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; 752 sysid = voyager_loose; 753 } 754 } 755 else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT) 756 { 757 fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; 758 sysid = voyager_strict; 759 } 760 else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) 761 { 762 fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; 763 sysid = voyager_loose; 764 } 765 766 fixHTMLNameSpace(root, namespace); 767 768 if (doctype == null) 769 { 770 doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0); 771 doctype.next = root.content; 772 doctype.parent = root; 773 doctype.prev = null; 774 root.content = doctype; 775 } 776 777 if (configuration.docTypeMode == Configuration.DOCTYPE_USER && 778 configuration.docTypeStr != null) 779 { 780 fpi = configuration.docTypeStr; 781 sysid = ""; 782 } 783 784 this.txtstart = this.lexsize; 785 this.txtend = this.lexsize; 786 787 788 addStringLiteral("html PUBLIC "); 789 790 791 if (fpi.charAt(0) == '"') 792 addStringLiteral(fpi); 793 else 794 { 795 addStringLiteral("\""); 796 addStringLiteral(fpi); 797 addStringLiteral("\""); 798 } 799 800 if (sysid.length() + 6 >= this.configuration.wraplen) 801 addStringLiteral("\n\""); 802 else 803 addStringLiteral("\n \""); 804 805 806 addStringLiteral(sysid); 807 addStringLiteral("\""); 808 809 this.txtend = this.lexsize; 810 811 doctype.start = this.txtstart; 812 doctype.end = this.txtend; 813 814 return false; 815 } 816 817 public short apparentVersion() 818 { 819 switch (this.doctype) 820 { 821 case Dict.VERS_UNKNOWN: 822 return HTMLVersion(); 823 824 case Dict.VERS_HTML20: 825 if ((this.versions & Dict.VERS_HTML20) != 0) 826 return Dict.VERS_HTML20; 827 828 break; 829 830 case Dict.VERS_HTML32: 831 if ((this.versions & Dict.VERS_HTML32) != 0) 832 return Dict.VERS_HTML32; 833 834 break; 835 836 case Dict.VERS_HTML40_STRICT: 837 if ((this.versions & Dict.VERS_HTML40_STRICT) != 0) 838 return Dict.VERS_HTML40_STRICT; 839 840 break; 841 842 case Dict.VERS_HTML40_LOOSE: 843 if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0) 844 return Dict.VERS_HTML40_LOOSE; 845 846 break; 847 848 case Dict.VERS_FRAMES: 849 if ((this.versions & Dict.VERS_FRAMES) != 0) 850 return Dict.VERS_FRAMES; 851 852 break; 853 } 854 855 Report.warning(this, null, null, Report.INCONSISTENT_VERSION); 856 return this.HTMLVersion(); 857 } 858 859 860 public boolean fixDocType(Node root) 861 { 862 Node doctype; 863 int guessed = Dict.VERS_HTML40_STRICT, i; 864 865 if (this.badDoctype) 866 Report.warning(this, null, null, Report.MALFORMED_DOCTYPE); 867 868 if (configuration.XmlOut) 869 return true; 870 871 doctype = root.findDocType(); 872 873 if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT) 874 { 875 if (doctype != null) 876 Node.discardElement(doctype); 877 return true; 878 } 879 880 if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT) 881 { 882 Node.discardElement(doctype); 883 doctype = null; 884 guessed = Dict.VERS_HTML40_STRICT; 885 } 886 else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) 887 { 888 Node.discardElement(doctype); 889 doctype = null; 890 guessed = Dict.VERS_HTML40_LOOSE; 891 } 892 else if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO) 893 { 894 if (doctype != null) 895 { 896 if (this.doctype == Dict.VERS_UNKNOWN) 897 return false; 898 899 switch (this.doctype) 900 { 901 case Dict.VERS_UNKNOWN: 902 return false; 903 904 case Dict.VERS_HTML20: 905 if ((this.versions & Dict.VERS_HTML20) != 0) 906 return true; 907 908 break; 909 910 case Dict.VERS_HTML32: 911 if ((this.versions & Dict.VERS_HTML32) != 0) 912 return true; 913 914 break; 915 916 case Dict.VERS_HTML40_STRICT: 917 if ((this.versions & Dict.VERS_HTML40_STRICT) != 0) 918 return true; 919 920 break; 921 922 case Dict.VERS_HTML40_LOOSE: 923 if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0) 924 return true; 925 926 break; 927 928 case Dict.VERS_FRAMES: 929 if ((this.versions & Dict.VERS_FRAMES) != 0) 930 return true; 931 932 break; 933 } 934 935 936 } 937 938 939 guessed = HTMLVersion(); 940 } 941 942 if (guessed == Dict.VERS_UNKNOWN) 943 return false; 944 945 946 if (this.configuration.XmlOut || this.configuration.XmlTags || this.isvoyager) 947 { 948 if (doctype != null) 949 Node.discardElement(doctype); 950 951 for (i = 0; i < W3CVersion.length; ++i) 952 { 953 if (guessed == W3CVersion[i].code) 954 { 955 fixHTMLNameSpace(root, W3CVersion[i].profile); 956 break; 957 } 958 } 959 960 return true; 961 } 962 963 if (doctype == null) 964 { 965 doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0); 966 doctype.next = root.content; 967 doctype.parent = root; 968 doctype.prev = null; 969 root.content = doctype; 970 } 971 972 this.txtstart = this.lexsize; 973 this.txtend = this.lexsize; 974 975 976 addStringLiteral("html PUBLIC "); 977 978 if (configuration.docTypeMode == Configuration.DOCTYPE_USER && 979 configuration.docTypeStr != null) 980 addStringLiteral(configuration.docTypeStr); 981 else if (guessed == Dict.VERS_HTML20) 982 addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\""); 983 else 984 { 985 addStringLiteral("\"-//W3C//DTD "); 986 987 for (i = 0; i < W3CVersion.length; ++i) 988 { 989 if (guessed == W3CVersion[i].code) 990 { 991 addStringLiteral(W3CVersion[i].name); 992 break; 993 } 994 } 995 996 addStringLiteral("//EN\""); 997 } 998 999 this.txtend = this.lexsize; 1000 1001 doctype.start = this.txtstart; 1002 doctype.end = this.txtend; 1003 1004 return true; 1005 } 1006 1007 1008 public boolean fixXMLPI(Node root) 1009 { 1010 Node xml; 1011 int s; 1012 1013 if( root.content != null && root.content.type == Node.ProcInsTag) 1014 { 1015 s = root.content.start; 1016 1017 if (this.lexbuf[s] == (byte)'x' && 1018 this.lexbuf[s+1] == (byte)'m' && 1019 this.lexbuf[s+2] == (byte)'l') 1020 return true; 1021 } 1022 1023 xml = newNode(Node.ProcInsTag, this.lexbuf, 0, 0); 1024 xml.next = root.content; 1025 1026 if (root.content != null) 1027 { 1028 root.content.prev = xml; 1029 xml.next = root.content; 1030 } 1031 1032 root.content = xml; 1033 1034 this.txtstart = this.lexsize; 1035 this.txtend = this.lexsize; 1036 addStringLiteral("xml version=\"1.0\""); 1037 if (this.configuration.CharEncoding == Configuration.LATIN1) 1038 addStringLiteral(" encoding=\"ISO-8859-1\""); 1039 this.txtend = this.lexsize; 1040 1041 xml.start = this.txtstart; 1042 xml.end = this.txtend; 1043 return false; 1044 } 1045 1046 public Node inferredTag(String name) 1047 { 1048 Node node; 1049 1050 node = newNode(Node.StartTag, 1051 this.lexbuf, 1052 this.txtstart, 1053 this.txtend, 1054 name); 1055 node.implicit = true; 1056 return node; 1057 } 1058 1059 public static boolean expectsContent(Node node) 1060 { 1061 if (node.type != Node.StartTag) 1062 return false; 1063 1064 1065 if (node.tag == null) 1066 return true; 1067 1068 if ((node.tag.model & Dict.CM_EMPTY) != 0) 1069 return false; 1070 1071 return true; 1072 } 1073 1074 1079 public Node getCDATA(Node container) 1080 { 1081 int c, lastc, start, len, i; 1082 String str; 1083 boolean endtag = false; 1084 1085 this.lines = this.in.curline; 1086 this.columns = this.in.curcol; 1087 this.waswhite = false; 1088 this.txtstart = this.lexsize; 1089 this.txtend = this.lexsize; 1090 1091 lastc = (int)'\0'; 1092 start = -1; 1093 1094 while (true) 1095 { 1096 c = this.in.readChar(); 1097 if (c == StreamIn.EndOfStream) break; 1098 1099 1100 if (c == (int)'/' && lastc == (int)'<') 1101 { 1102 if (endtag) 1103 { 1104 this.lines = this.in.curline; 1105 this.columns = this.in.curcol - 3; 1106 1107 Report.warning(this, null, null, Report.BAD_CDATA_CONTENT); 1108 } 1109 1110 start = this.lexsize + 1; 1111 endtag = true; 1112 } 1113 else if (c == (int)'>' && start >= 0) 1114 { 1115 len = this.lexsize - start; 1116 if (len == container.element.length()) 1117 { 1118 str = getString( this.lexbuf, start, len ); 1119 if (Lexer.wstrcasecmp(str, container.element) == 0) 1120 { 1121 this.txtend = start - 2; 1122 break; 1123 } 1124 } 1125 1126 this.lines = this.in.curline; 1127 this.columns = this.in.curcol - 3; 1128 1129 Report.warning(this, null, null, Report.BAD_CDATA_CONTENT); 1130 1131 1132 1133 if (ParserImpl.isJavaScript(container)) 1134 { 1135 for (i = this.lexsize; i > start-1; --i) 1136 this.lexbuf[i] = this.lexbuf[i-1]; 1137 1138 this.lexbuf[start-1] = (byte)'\\'; 1139 this.lexsize++; 1140 } 1141 1142 start = -1; 1143 } 1144 else if (c == (int)'\r') 1145 { 1146 c = this.in.readChar(); 1147 1148 if (c != (int)'\n') 1149 this.in.ungetChar(c); 1150 1151 c = (int)'\n'; 1152 } 1153 1154 addCharToLexer((int)c); 1155 this.txtend = this.lexsize; 1156 lastc = c; 1157 } 1158 1159 if (c == StreamIn.EndOfStream) 1160 Report.warning(this, container, null, Report.MISSING_ENDTAG_FOR); 1161 1162 if (this.txtend > this.txtstart) 1163 { 1164 this.token = newNode(Node.TextNode, 1165 this.lexbuf, 1166 this.txtstart, 1167 this.txtend); 1168 return this.token; 1169 } 1170 1171 return null; 1172 } 1173 1174 public void ungetToken() 1175 { 1176 this.pushed = true; 1177 } 1178 1179 public static final short IgnoreWhitespace = 0; 1180 public static final short MixedContent = 1; 1181 public static final short Preformatted = 2; 1182 public static final short IgnoreMarkup = 3; 1183 1184 1191 1192 public Node getToken(short mode) 1193 { 1194 short map; 1195 int c = 0; 1196 int lastc; 1197 int badcomment = 0; 1198 MutableBoolean isempty = new MutableBoolean(); 1199 AttVal attributes; 1200 1201 if (this.pushed) 1202 { 1203 1204 if (this.token.type != Node.TextNode || 1205 (this.insert == -1 && this.inode == null)) 1206 { 1207 this.pushed = false; 1208 return this.token; 1209 } 1210 } 1211 1212 1214 1215 if (this.insert != -1 || this.inode != null) 1216 return insertedToken(); 1217 1218 this.lines = this.in.curline; 1219 this.columns = this.in.curcol; 1220 this.waswhite = false; 1221 1222 this.txtstart = this.lexsize; 1223 this.txtend = this.lexsize; 1224 1225 while (true) 1226 { 1227 c = this.in.readChar(); 1228 if (c == StreamIn.EndOfStream) break; 1229 if (this.insertspace && mode != IgnoreWhitespace) 1230 { 1231 addCharToLexer(' '); 1232 this.waswhite = true; 1233 this.insertspace = false; 1234 } 1235 1236 1237 1238 if (c == '\r') 1239 { 1240 c = this.in.readChar(); 1241 1242 if (c != '\n') 1243 this.in.ungetChar(c); 1244 1245 c = '\n'; 1246 } 1247 1248 addCharToLexer(c); 1249 1250 switch (this.state) 1251 { 1252 case LEX_CONTENT: 1253 map = MAP((char)c); 1254 1255 1260 if (((map & WHITE) != 0) && (mode == IgnoreWhitespace) 1261 && this.lexsize == this.txtstart + 1) 1262 { 1263 --this.lexsize; 1264 this.waswhite = false; 1265 this.lines = this.in.curline; 1266 this.columns = this.in.curcol; 1267 continue; 1268 } 1269 1270 if (c == '<') 1271 { 1272 this.state = LEX_GT; 1273 continue; 1274 } 1275 1276 if ((map & WHITE) != 0) 1277 { 1278 1279 if (this.waswhite) 1280 { 1281 if (mode != Preformatted && mode != IgnoreMarkup) 1282 { 1283 --this.lexsize; 1284 this.lines = this.in.curline; 1285 this.columns = this.in.curcol; 1286 } 1287 } 1288 else 1289 { 1290 this.waswhite = true; 1291 lastc = c; 1292 1293 if (mode != Preformatted && mode != IgnoreMarkup && c != ' ') 1294 changeChar((byte)' '); 1295 } 1296 1297 continue; 1298 } 1299 else if (c == '&' && mode != IgnoreMarkup) 1300 parseEntity(mode); 1301 1302 1303 if (mode == IgnoreWhitespace) 1304 mode = MixedContent; 1305 1306 this.waswhite = false; 1307 continue; 1308 1309 case LEX_GT: 1310 1311 1312 if (c == '/') 1313 { 1314 c = this.in.readChar(); 1315 if (c == StreamIn.EndOfStream) 1316 { 1317 this.in.ungetChar(c); 1318 continue; 1319 } 1320 1321 addCharToLexer(c); 1322 map = MAP((char)c); 1323 1324 if ((map & LETTER) != 0) 1325 { 1326 this.lexsize -= 3; 1327 this.txtend = this.lexsize; 1328 this.in.ungetChar(c); 1329 this.state = LEX_ENDTAG; 1330 this.lexbuf[this.lexsize] = (byte)'\0'; 1331 this.in.curcol -= 2; 1332 1333 1334 if (this.txtend > this.txtstart) 1335 { 1336 1337 if (mode == IgnoreWhitespace && this.lexbuf[this.lexsize - 1] == (byte)' ') 1338 { 1339 this.lexsize -= 1; 1340 this.txtend = this.lexsize; 1341 } 1342 1343 this.token = newNode(Node.TextNode, 1344 this.lexbuf, 1345 this.txtstart, 1346 this.txtend); 1347 return this.token; 1348 } 1349 1350 continue; 1351 } 1352 1353 1354 this.waswhite = false; 1355 this.state = LEX_CONTENT; 1356 continue; 1357 } 1358 1359 if (mode == IgnoreMarkup) 1360 { 1361 1362 this.waswhite = false; 1363 this.state = LEX_CONTENT; 1364 continue; 1365 } 1366 1367 1371 if (c == '!') 1372 { 1373 c = this.in.readChar(); 1374 1375 if (c == '-') 1376 { 1377 c = this.in.readChar(); 1378 1379 if (c == '-') 1380 { 1381 this.state = LEX_COMMENT; 1382 this.lexsize -= 2; 1383 this.txtend = this.lexsize; 1384 1385 1386 if (this.txtend > this.txtstart) 1387 { 1388 this.token = newNode(Node.TextNode, 1389 this.lexbuf, 1390 this.txtstart, 1391 this.txtend); 1392 return this.token; 1393 } 1394 1395 this.txtstart = this.lexsize; 1396 continue; 1397 } 1398 1399 Report.warning(this, null, null, Report.MALFORMED_COMMENT); 1400 } 1401 else if (c == 'd' || c == 'D') 1402 { 1403 this.state = LEX_DOCTYPE; 1404 this.lexsize -= 2; 1405 this.txtend = this.lexsize; 1406 mode = IgnoreWhitespace; 1407 1408 1409 1410 for (;;) 1411 { 1412 c = this.in.readChar(); 1413 1414 if (c == StreamIn.EndOfStream || c == '>') 1415 { 1416 this.in.ungetChar(c); 1417 break; 1418 } 1419 1420 map = MAP((char)c); 1421 1422 if ((map & WHITE) == 0) 1423 continue; 1424 1425 1426 1427 for (;;) 1428 { 1429 c = this.in.readChar(); 1430 1431 if (c == StreamIn.EndOfStream || c == '>') 1432 { 1433 this.in.ungetChar(c); 1434 break; 1435 } 1436 1437 map = MAP((char)c); 1438 1439 if ((map & WHITE) != 0) 1440 continue; 1441 1442 this.in.ungetChar(c); 1443 break; 1444 } 1445 1446 break; 1447 } 1448 1449 1450 if (this.txtend > this.txtstart) 1451 { 1452 this.token = newNode(Node.TextNode, 1453 this.lexbuf, 1454 this.txtstart, 1455 this.txtend); 1456 return this.token; 1457 } 1458 1459 this.txtstart = this.lexsize; 1460 continue; 1461 } 1462 else if (c == '[') 1463 { 1464 1465 this.lexsize -= 2; 1466 this.state = LEX_SECTION; 1467 this.txtend = this.lexsize; 1468 1469 1470 if (this.txtend > this.txtstart) 1471 { 1472 this.token = newNode(Node.TextNode, 1473 this.lexbuf, 1474 this.txtstart, 1475 this.txtend); 1476 return this.token; 1477 } 1478 1479 this.txtstart = this.lexsize; 1480 continue; 1481 } 1482 1483 1484 while (true) 1485 { 1486 c = this.in.readChar(); 1487 if (c == '>') break; 1488 if (c == -1) 1489 { 1490 this.in.ungetChar(c); 1491 break; 1492 } 1493 } 1494 1495 this.lexsize -= 2; 1496 this.lexbuf[this.lexsize] = (byte)'\0'; 1497 this.state = LEX_CONTENT; 1498 continue; 1499 } 1500 1501 1504 1505 if (c == '?') 1506 { 1507 this.lexsize -= 2; 1508 this.state = LEX_PROCINSTR; 1509 this.txtend = this.lexsize; 1510 1511 1512 if (this.txtend > this.txtstart) 1513 { 1514 this.token = newNode(Node.TextNode, 1515 this.lexbuf, 1516 this.txtstart, 1517 this.txtend); 1518 return this.token; 1519 } 1520 1521 this.txtstart = this.lexsize; 1522 continue; 1523 } 1524 1525 1526 if (c == '%') 1527 { 1528 this.lexsize -= 2; 1529 this.state = LEX_ASP; 1530 this.txtend = this.lexsize; 1531 1532 1533 if (this.txtend > this.txtstart) 1534 { 1535 this.token = newNode(Node.TextNode, 1536 this.lexbuf, 1537 this.txtstart, 1538 this.txtend); 1539 return this.token; 1540 } 1541 1542 this.txtstart = this.lexsize; 1543 continue; 1544 } 1545 1546 1547 if (c == '#') 1548 { 1549 this.lexsize -= 2; 1550 this.state = LEX_JSTE; 1551 this.txtend = this.lexsize; 1552 1553 1554 if (this.txtend > this.txtstart) 1555 { 1556 this.token = newNode(Node.TextNode, 1557 this.lexbuf, 1558 this.txtstart, 1559 this.txtend); 1560 return this.token; 1561 } 1562 1563 this.txtstart = this.lexsize; 1564 continue; 1565 } 1566 1567 map = MAP((char)c); 1568 1569 1570 if ((map & LETTER) != 0) 1571 { 1572 this.in.ungetChar(c); 1573 this.lexsize -= 2; 1574 this.txtend = this.lexsize; 1575 this.state = LEX_STARTTAG; 1576 1577 1578 if (this.txtend > this.txtstart) 1579 { 1580 this.token = newNode(Node.TextNode, 1581 this.lexbuf, 1582 this.txtstart, 1583 this.txtend); 1584 return this.token; 1585 } 1586 1587 continue; 1588 } 1589 1590 1591 this.state = LEX_CONTENT; 1592 this.waswhite = false; 1593 continue; 1594 1595 case LEX_ENDTAG: 1596 this.txtstart = this.lexsize - 1; 1597 this.in.curcol += 2; 1598 c = parseTagName(); 1599 this.token = newNode(Node.EndTag, 1600 this.lexbuf, 1601 this.txtstart, 1602 this.txtend, 1603 getString(this.lexbuf, 1604 this.txtstart, 1605 this.txtend - this.txtstart)); 1606 this.lexsize = this.txtstart; 1607 this.txtend = this.txtstart; 1608 1609 1610 while (c != '>') 1611 { 1612 c = this.in.readChar(); 1613 1614 if (c == StreamIn.EndOfStream) 1615 break; 1616 } 1617 1618 if (c == StreamIn.EndOfStream) 1619 { 1620 this.in.ungetChar(c); 1621 continue; 1622 } 1623 1624 this.state = LEX_CONTENT; 1625 this.waswhite = false; 1626 return this.token; 1627 1628 case LEX_STARTTAG: 1629 this.txtstart = this.lexsize - 1; 1630 c = parseTagName(); 1631 isempty.value = false; 1632 attributes = null; 1633 this.token = newNode((isempty.value ? Node.StartEndTag : Node.StartTag), 1634 this.lexbuf, 1635 this.txtstart, 1636 this.txtend, 1637 getString(this.lexbuf, 1638 this.txtstart, 1639 this.txtend - this.txtstart)); 1640 1641 1642 if (c != '>') 1643 { 1644 if (c == '/') 1645 this.in.ungetChar(c); 1646 1647 attributes = parseAttrs(isempty); 1648 } 1649 1650 if (isempty.value) 1651 this.token.type = Node.StartEndTag; 1652 1653 this.token.attributes = attributes; 1654 this.lexsize = this.txtstart; 1655 this.txtend = this.txtstart; 1656 1657 1658 1659 1660 1661 if (expectsContent(this.token) || 1662 this.token.tag == configuration.tt.tagBr) 1663 { 1664 1665 c = this.in.readChar(); 1666 1667 if (c == '\r') 1668 { 1669 c = this.in.readChar(); 1670 1671 if (c != '\n') 1672 this.in.ungetChar(c); 1673 } 1674 else if (c != '\n' && c != '\f') 1675 this.in.ungetChar(c); 1676 1677 this.waswhite = true; 1678 } 1679 else 1680 this.waswhite = false; 1681 1682 this.state = LEX_CONTENT; 1683 1684 if (this.token.tag == null) 1685 Report.error(this, null, this.token, Report.UNKNOWN_ELEMENT); 1686 else if (!this.configuration.XmlTags) 1687 { 1688 this.versions &= this.token.tag.versions; 1689 1690 if ((this.token.tag.versions & Dict.VERS_PROPRIETARY) != 0) 1691 { 1692 if (!this.configuration.MakeClean && (this.token.tag == configuration.tt.tagNobr || 1693 this.token.tag == configuration.tt.tagWbr)) 1694 Report.warning(this, null, this.token, Report.PROPRIETARY_ELEMENT); 1695 } 1696 1697 if (this.token.tag.chkattrs != null) 1698 { 1699 this.token.checkUniqueAttributes(this); 1700 this.token.tag.chkattrs.check(this, this.token); 1701 } 1702 else 1703 this.token.checkAttributes(this); 1704 } 1705 1706 return this.token; 1707 1708 case LEX_COMMENT: 1709 1710 if (c != '-') 1711 continue; 1712 1713 c = this.in.readChar(); 1714 addCharToLexer(c); 1715 1716 if (c != '-') 1717 continue; 1718 1719 end_comment: while (true) { 1720 c = this.in.readChar(); 1721 1722 if (c == '>') 1723 { 1724 if (badcomment != 0) 1725 Report.warning(this, null, null, Report.MALFORMED_COMMENT); 1726 1727 this.txtend = this.lexsize - 2; this.lexbuf[this.lexsize] = (byte)'\0'; 1729 this.state = LEX_CONTENT; 1730 this.waswhite = false; 1731 this.token = newNode(Node.CommentTag, 1732 this.lexbuf, 1733 this.txtstart, 1734 this.txtend); 1735 1736 1737 1738 c = this.in.readChar(); 1739 1740 if (c == '\r') 1741 { 1742 c = this.in.readChar(); 1743 1744 if (c != '\n') 1745 this.token.linebreak = true; 1746 } 1747 1748 if (c == '\n') 1749 this.token.linebreak = true; 1750 else 1751 this.in.ungetChar(c); 1752 1753 return this.token; 1754 } 1755 1756 1757 if (badcomment == 0) 1758 { 1759 this.lines = this.in.curline; 1760 this.columns = this.in.curcol - 3; 1761 } 1762 1763 badcomment++; 1764 if (this.configuration.FixComments) 1765 this.lexbuf[this.lexsize - 2] = (byte)'='; 1766 1767 addCharToLexer(c); 1768 1769 1770 if (c != '-') 1771 break end_comment; 1772 1773 } 1774 1775 this.lexbuf[this.lexsize - 2] = (byte)'='; 1776 continue; 1777 1778 case LEX_DOCTYPE: 1779 map = MAP((char)c); 1780 1781 if ((map & WHITE) != 0) 1782 { 1783 if (this.waswhite) 1784 this.lexsize -= 1; 1785 1786 this.waswhite = true; 1787 } 1788 else 1789 this.waswhite = false; 1790 1791 if (c != '>') 1792 continue; 1793 1794 this.lexsize -= 1; 1795 this.txtend = this.lexsize; 1796 this.lexbuf[this.lexsize] = (byte)'\0'; 1797 this.state = LEX_CONTENT; 1798 this.waswhite = false; 1799 this.token = newNode(Node.DocTypeTag, 1800 this.lexbuf, 1801 this.txtstart, 1802 this.txtend); 1803 1804 this.doctype = findGivenVersion(this.token); 1805 return this.token; 1806 1807 case LEX_PROCINSTR: 1808 1809 1810 if (this.lexsize - this.txtstart == 3) 1811 { 1812 if ((getString(this.lexbuf, this.txtstart, 3)).equals("php")) 1813 { 1814 this.state = LEX_PHP; 1815 continue; 1816 } 1817 } 1818 1819 if (this.configuration.XmlPIs) 1820 { 1821 if (c != '?') 1822 continue; 1823 1824 1825 c = this.in.readChar(); 1826 1827 if (c == StreamIn.EndOfStream) 1828 { 1829 Report.warning(this, null, null, Report.UNEXPECTED_END_OF_FILE); 1830 this.in.ungetChar(c); 1831 continue; 1832 } 1833 1834 addCharToLexer(c); 1835 } 1836 1837 if (c != '>') 1838 continue; 1839 1840 this.lexsize -= 1; 1841 this.txtend = this.lexsize; 1842 this.lexbuf[this.lexsize] = (byte)'\0'; 1843 this.state = LEX_CONTENT; 1844 this.waswhite = false; 1845 this.token = newNode(Node.ProcInsTag, 1846 this.lexbuf, 1847 this.txtstart, 1848 this.txtend); 1849 return this.token; 1850 1851 case LEX_ASP: 1852 if (c != '%') 1853 continue; 1854 1855 1856 c = this.in.readChar(); 1857 1858 1859 if (c != '>') 1860 { 1861 this.in.ungetChar(c); 1862 continue; 1863 } 1864 1865 this.lexsize -= 1; 1866 this.txtend = this.lexsize; 1867 this.lexbuf[this.lexsize] = (byte)'\0'; 1868 this.state = LEX_CONTENT; 1869 this.waswhite = false; 1870 this.token = newNode(Node.AspTag, 1871 this.lexbuf, 1872 this.txtstart, 1873 this.txtend); 1874 return this.token; 1875 1876 case LEX_JSTE: 1877 if (c != '#') 1878 continue; 1879 1880 1881 c = this.in.readChar(); 1882 1883 1884 if (c != '>') 1885 { 1886 this.in.ungetChar(c); 1887 continue; 1888 } 1889 1890 this.lexsize -= 1; 1891 this.txtend = this.lexsize; 1892 this.lexbuf[this.lexsize] = (byte)'\0'; 1893 this.state = LEX_CONTENT; 1894 this.waswhite = false; 1895 this.token = newNode(Node.JsteTag, 1896 this.lexbuf, 1897 this.txtstart, 1898 this.txtend); 1899 return this.token; 1900 1901 case LEX_PHP: 1902 if (c != '?') 1903 continue; 1904 1905 1906 c = this.in.readChar(); 1907 1908 if (c != '>') 1909 { 1910 this.in.ungetChar(c); 1911 continue; 1912 } 1913 1914 this.lexsize -= 1; 1915 this.txtend = this.lexsize; 1916 this.lexbuf[this.lexsize] = (byte)'\0'; 1917 this.state = LEX_CONTENT; 1918 this.waswhite = false; 1919 this.token = newNode(Node.PhpTag, 1920 this.lexbuf, 1921 this.txtstart, 1922 this.txtend); 1923 return this.token; 1924 1925 case LEX_SECTION: 1926 if (c == '[') 1927 { 1928 if (this.lexsize == (this.txtstart + 6) && 1929 (getString(this.lexbuf, this.txtstart, 6)).equals("CDATA[")) 1930 { 1931 this.state = LEX_CDATA; 1932 this.lexsize -= 6; 1933 continue; 1934 } 1935 } 1936 1937 if (c != ']') 1938 continue; 1939 1940 1941 c = this.in.readChar(); 1942 1943 if (c != '>') 1944 { 1945 this.in.ungetChar(c); 1946 continue; 1947 } 1948 1949 this.lexsize -= 1; 1950 this.txtend = this.lexsize; 1951 this.lexbuf[this.lexsize] = (byte)'\0'; 1952 this.state = LEX_CONTENT; 1953 this.waswhite = false; 1954 this.token = newNode(Node.SectionTag, 1955 this.lexbuf, 1956 this.txtstart, 1957 this.txtend); 1958 return this.token; 1959 1960 case LEX_CDATA: 1961 if (c != ']') 1962 continue; 1963 1964 1965 c = this.in.readChar(); 1966 1967 if (c != ']') 1968 { 1969 this.in.ungetChar(c); 1970 continue; 1971 } 1972 1973 1974 c = this.in.readChar(); 1975 1976 if (c != '>') 1977 { 1978 this.in.ungetChar(c); 1979 continue; 1980 } 1981 1982 this.lexsize -= 1; 1983 this.txtend = this.lexsize; 1984 this.lexbuf[this.lexsize] = (byte)'\0'; 1985 this.state = LEX_CONTENT; 1986 this.waswhite = false; 1987 this.token = newNode(Node.CDATATag, 1988 this.lexbuf, 1989 this.txtstart, 1990 this.txtend); 1991 return this.token; 1992 } 1993 } 1994 1995 if (this.state == LEX_CONTENT) 1996 { 1997 this.txtend = this.lexsize; 1998 1999 if (this.txtend > this.txtstart) 2000 { 2001 this.in.ungetChar(c); 2002 2003 if (this.lexbuf[this.lexsize - 1] == (byte)' ') 2004 { 2005 this.lexsize -= 1; 2006 this.txtend = this.lexsize; 2007 } 2008 2009 this.token = newNode(Node.TextNode, 2010 this.lexbuf, 2011 this.txtstart, 2012 this.txtend); 2013 return this.token; 2014 } 2015 } 2016 else if (this.state == LEX_COMMENT) 2017 { 2018 if (c == StreamIn.EndOfStream) 2019 Report.warning(this, null, null, Report.MALFORMED_COMMENT); 2020 2021 this.txtend = this.lexsize; 2022 this.lexbuf[this.lexsize] = (byte)'\0'; 2023 this.state = LEX_CONTENT; 2024 this.waswhite = false; 2025 this.token = newNode(Node.CommentTag, 2026 this.lexbuf, 2027 this.txtstart, 2028 this.txtend); 2029 return this.token; 2030 } 2031 2032 return null; 2033 } 2034 2035 2051 2052 public Node parseAsp() 2053 { 2054 int c; 2055 Node asp = null; 2056 2057 this.txtstart = this.lexsize; 2058 2059 for (;;) 2060 { 2061 c = this.in.readChar(); 2062 addCharToLexer(c); 2063 2064 2065 if (c != '%') 2066 continue; 2067 2068 c = this.in.readChar(); 2069 addCharToLexer(c); 2070 2071 if (c == '>') 2072 break; 2073 } 2074 2075 this.lexsize -= 2; 2076 this.txtend = this.lexsize; 2077 2078 if (this.txtend > this.txtstart) 2079 asp = newNode(Node.AspTag, 2080 this.lexbuf, 2081 this.txtstart, 2082 this.txtend); 2083 2084 this.txtstart = this.txtend; 2085 return asp; 2086 } 2087 2088 2092 public Node parsePhp() 2093 { 2094 int c; 2095 Node php = null; 2096 2097 this.txtstart = this.lexsize; 2098 2099 for (;;) 2100 { 2101 c = this.in.readChar(); 2102 addCharToLexer(c); 2103 2104 2105 if (c != '?') 2106 continue; 2107 2108 c = this.in.readChar(); 2109 addCharToLexer(c); 2110 2111 if (c == '>') 2112 break; 2113 } 2114 2115 this.lexsize -= 2; 2116 this.txtend = this.lexsize; 2117 2118 if (this.txtend > this.txtstart) 2119 php = newNode(Node.PhpTag, 2120 this.lexbuf, 2121 this.txtstart, 2122 this.txtend); 2123 2124 this.txtstart = this.txtend; 2125 return php; 2126 } 2127 2128 2129 public String parseAttribute(MutableBoolean isempty, MutableObject asp, 2130 MutableObject php) 2131 { 2132 int start = 0; 2133 short map; 2135 String attr; 2136 int c = 0; 2137 2138 asp.setObject(null); 2139 php.setObject(null); 2140 2141 2142 for (;;) 2143 { 2144 c = this.in.readChar(); 2145 2146 if (c == '/') 2147 { 2148 c = this.in.readChar(); 2149 2150 if (c == '>') 2151 { 2152 isempty.value = true; 2153 return null; 2154 } 2155 2156 this.in.ungetChar(c); 2157 c = '/'; 2158 break; 2159 } 2160 2161 if (c == '>') 2162 return null; 2163 2164 if (c =='<') 2165 { 2166 c = this.in.readChar(); 2167 2168 if (c == '%') 2169 { 2170 asp.setObject(parseAsp()); 2171 return null; 2172 } 2173 else if (c == '?') 2174 { 2175 php.setObject(parsePhp()); 2176 return null; 2177 } 2178 2179 this.in.ungetChar(c); 2180 Report.attrError(this, this.token, null, Report.UNEXPECTED_GT); 2181 return null; 2182 } 2183 2184 if (c == '"' || c == '\'') 2185 { 2186 Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK); 2187 continue; 2188 } 2189 2190 if (c == StreamIn.EndOfStream) 2191 { 2192 Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); 2193 this.in.ungetChar(c); 2194 return null; 2195 } 2196 2197 map = MAP((char)c); 2198 2199 if ((map & WHITE) == 0) 2200 break; 2201 } 2202 2203 start = this.lexsize; 2204 2205 for (;;) 2206 { 2207 2208 if (c == '=' || c == '>') 2209 { 2210 this.in.ungetChar(c); 2211 break; 2212 } 2213 2214 if (c == '<' || c == StreamIn.EndOfStream) 2215 { 2216 this.in.ungetChar(c); 2217 break; 2218 } 2219 2220 map = MAP((char)c); 2221 2222 if ((map & WHITE) != 0) 2223 break; 2224 2225 2226 2227 2228 if (!this.configuration.XmlTags && (map & UPPERCASE) != 0) 2229 c += (int)('a' - 'A'); 2230 2231 addCharToLexer(c); 2233 2234 c = this.in.readChar(); 2235 } 2236 2237 int len = this.lexsize - start; 2240 attr = (len > 0 ? getString(this.lexbuf, start, len) : null); 2241 this.lexsize = start; 2242 2243 return attr; 2244 } 2245 2246 2251 public int parseServerInstruction() 2252 { 2253 int c, map, delim = '"'; 2254 boolean isrule = false; 2255 2256 c = this.in.readChar(); 2257 addCharToLexer(c); 2258 2259 2260 if (c == '%' || c == '?' || c == '@') 2261 isrule = true; 2262 2263 for (;;) 2264 { 2265 c = this.in.readChar(); 2266 2267 if (c == StreamIn.EndOfStream) 2268 break; 2269 2270 if (c == '>') 2271 { 2272 if (isrule) 2273 addCharToLexer(c); 2274 else 2275 this.in.ungetChar(c); 2276 2277 break; 2278 } 2279 2280 2281 2282 if (!isrule) 2283 { 2284 map = MAP((char)c); 2285 2286 if ((map & WHITE) != 0) 2287 break; 2288 } 2289 2290 addCharToLexer(c); 2291 2292 if (c == '"') 2293 { 2294 do 2295 { 2296 c = this.in.readChar(); 2297 addCharToLexer(c); 2298 } 2299 while (c != '"'); 2300 delim = '\''; 2301 continue; 2302 } 2303 2304 if (c == '\'') 2305 { 2306 do 2307 { 2308 c = this.in.readChar(); 2309 addCharToLexer(c); 2310 } 2311 while (c != '\''); 2312 } 2313 } 2314 2315 return delim; 2316 } 2317 2318 2319 2320 2321 public String parseValue(String name, boolean foldCase, 2322 MutableBoolean isempty, MutableInteger pdelim) 2323 { 2324 int len = 0; 2325 int start; 2326 short map; 2327 boolean seen_gt = false; 2328 boolean munge = true; 2329 int c = 0; 2330 int lastc, delim, quotewarning; 2331 String value; 2332 2333 delim = 0; 2334 pdelim.value = (int)'"'; 2335 2336 2341 if (configuration.LiteralAttribs) 2342 munge = false; 2343 2344 2345 2346 for (;;) 2347 { 2348 c = this.in.readChar(); 2349 2350 if (c == StreamIn.EndOfStream) 2351 { 2352 this.in.ungetChar(c); 2353 break; 2354 } 2355 2356 map = MAP((char)c); 2357 2358 if ((map & WHITE) == 0) 2359 break; 2360 } 2361 2362 2367 2368 if (c != '=') 2369 { 2370 this.in.ungetChar(c); 2371 return null; 2372 } 2373 2374 2375 2376 for (;;) 2377 { 2378 c = this.in.readChar(); 2379 2380 if (c == StreamIn.EndOfStream) 2381 { 2382 this.in.ungetChar(c); 2383 break; 2384 } 2385 2386 map = MAP((char)c); 2387 2388 if ((map & WHITE) == 0) 2389 break; 2390 } 2391 2392 2393 2394 if (c == '"' || c == '\'') 2395 delim = c; 2396 else if (c == '<') 2397 { 2398 start = this.lexsize; 2399 addCharToLexer(c); 2400 pdelim.value = parseServerInstruction(); 2401 len = this.lexsize - start; 2402 this.lexsize = start; 2403 return (len > 0 ? getString(this.lexbuf, start, len) : null); 2404 } 2405 else 2406 this.in.ungetChar(c); 2407 2408 2412 2413 quotewarning = 0; 2414 start = this.lexsize; 2415 c = '\0'; 2416 2417 for (;;) 2418 { 2419 lastc = c; 2420 c = this.in.readChar(); 2421 2422 if (c == StreamIn.EndOfStream) 2423 { 2424 Report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); 2425 this.in.ungetChar(c); 2426 break; 2427 } 2428 2429 if (delim == (char)0) 2430 { 2431 if (c == '>') 2432 { 2433 this.in.ungetChar(c); 2434 break; 2435 } 2436 2437 if (c == '"' || c == '\'') 2438 { 2439 Report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK); 2440 break; 2441 } 2442 2443 if (c == '<') 2444 { 2445 2446 Report.attrError(this, this.token, null, Report.UNEXPECTED_GT); 2447 2448 } 2449 2450 2456 if (c == '/') 2457 { 2458 2459 c = this.in.readChar(); 2460 2461 if (c == '>' && 2462 !AttributeTable.getDefaultAttributeTable().isUrl(name)) 2463 { 2464 isempty.value = true; 2465 this.in.ungetChar(c); 2466 break; 2467 } 2468 2469 2470 this.in.ungetChar(c); 2471 c = '/'; 2472 } 2473 } 2474 else 2475 { 2476 if (c == delim) 2477 break; 2478 2479 2480 2481 if (c == '\r') 2482 { 2483 c = this.in.readChar(); 2484 if (c != '\n') 2485 this.in.ungetChar(c); 2486 2487 c = '\n'; 2488 } 2489 2490 if (c == '\n' || c == '<' || c == '>') 2491 ++quotewarning; 2492 2493 if (c == '>') 2494 seen_gt = true; 2495 } 2496 2497 if (c == '&') 2498 { 2499 addCharToLexer(c); 2500 parseEntity((short)0); 2501 continue; 2502 } 2503 2504 2508 if (c == '\\') 2509 { 2510 c = this.in.readChar(); 2511 2512 if (c != '\n') 2513 { 2514 this.in.ungetChar(c); 2515 c = '\\'; 2516 } 2517 } 2518 2519 map = MAP((char)c); 2520 2521 if ((map & WHITE) != 0) 2522 { 2523 if (delim == (char)0) 2524 break; 2525 2526 if (munge) 2527 { 2528 c = ' '; 2529 2530 if (lastc == ' ') 2531 continue; 2532 } 2533 } 2534 else if (foldCase && (map & UPPERCASE) != 0) 2535 c += (int)('a' - 'A'); 2536 2537 addCharToLexer(c); 2538 } 2539 2540 if (quotewarning > 10 && seen_gt && munge) 2541 { 2542 2549 if (!AttributeTable.getDefaultAttributeTable().isScript(name) && 2550 !(AttributeTable.getDefaultAttributeTable().isUrl(name) && 2551 (getString(this.lexbuf, start, 11)).equals("javascript:"))) 2552 Report.error(this, null, null, Report.SUSPECTED_MISSING_QUOTE); 2553 } 2554 2555 len = this.lexsize - start; 2556 this.lexsize = start; 2557 2558 if (len > 0 || delim != 0) 2559 value = getString(this.lexbuf, start, len); 2560 else 2561 value = null; 2562 2563 2564 if (delim != 0) 2565 pdelim.value = delim; 2566 else 2567 pdelim.value = (int)'"'; 2568 2569 return value; 2570 } 2571 2572 2573 public static boolean isValidAttrName(String attr) 2574 { 2575 short map; 2576 char c; 2577 int i; 2578 2579 2580 c = attr.charAt(0); 2581 map = MAP(c); 2582 2583 if (!((map & LETTER) != 0)) 2584 return false; 2585 2586 2587 for( i = 1; i < attr.length(); i++) 2588 { 2589 c = attr.charAt(i); 2590 map = MAP(c); 2591 2592 if((map & NAMECHAR) != 0) 2593 continue; 2594 2595 return false; 2596 } 2597 2598 return true; 2599 } 2600 2601 2602 2603 public AttVal parseAttrs(MutableBoolean isempty) 2604 { 2605 AttVal av, list; 2606 String attribute, value; 2607 MutableInteger delim = new MutableInteger(); 2608 MutableObject asp = new MutableObject(); 2609 MutableObject php = new MutableObject(); 2610 2611 list = null; 2612 2613 for (; !endOfInput();) 2614 { 2615 attribute = parseAttribute(isempty, asp, php); 2616 2617 if (attribute == null) 2618 { 2619 2620 if (asp.getObject() != null) 2621 { 2622 av = new AttVal(list, null, (Node)asp.getObject(), null, 2623 '\0', null, null ); 2624 list = av; 2625 continue; 2626 } 2627 2628 2629 if (php.getObject() != null) 2630 { 2631 av = new AttVal(list, null, null, (Node)php.getObject(), 2632 '\0', null, null ); 2633 list = av; 2634 continue; 2635 } 2636 2637 break; 2638 } 2639 2640 value = parseValue(attribute, false, isempty, delim); 2641 2642 if (attribute != null && isValidAttrName(attribute)) 2643 { 2644 av = new AttVal( list, null, null, null, 2645 delim.value, attribute, value ); 2646 av.dict = 2647 AttributeTable.getDefaultAttributeTable().findAttribute(av); 2648 list = av; 2649 } 2650 else 2651 { 2652 av = new AttVal( null, null, null, null, 2653 0, attribute, value ); 2654 Report.attrError(this, this.token, value, Report.BAD_ATTRIBUTE_VALUE); 2655 } 2656 } 2657 2658 return list; 2659 } 2660 2661 2677 public void pushInline( Node node ) 2678 { 2679 IStack is; 2680 2681 if (node.implicit) 2682 return; 2683 2684 if (node.tag == null) 2685 return; 2686 2687 if ((node.tag.model & Dict.CM_INLINE) == 0 ) 2688 return; 2689 2690 if ((node.tag.model & Dict.CM_OBJECT) != 0) 2691 return; 2692 2693 if (node.tag != configuration.tt.tagFont && isPushed(node)) 2694 return; 2695 2696 is = new IStack(); 2698 is.tag = node.tag; 2699 is.element = node.element; 2700 if (node.attributes != null) 2701 is.attributes = cloneAttributes(node.attributes); 2702 this.istack.push( is ); 2703 } 2704 2705 2706 public void popInline( Node node ) 2707 { 2708 AttVal av; 2709 IStack is; 2710 2711 if (node != null) { 2712 2713 if (node.tag == null) 2714 return; 2715 2716 if ((node.tag.model & Dict.CM_INLINE) == 0) 2717 return; 2718 2719 if ((node.tag.model & Dict.CM_OBJECT) != 0) 2720 return; 2721 2722 if (node.tag == configuration.tt.tagA) { 2724 2725 while (this.istack.size() > 0) { 2726 is = (IStack)this.istack.pop(); 2727 if (is.tag == configuration.tt.tagA) { 2728 break; 2729 } 2730 } 2731 2732 if (this.insert >= this.istack.size()) 2733 this.insert = -1; 2734 return; 2735 } 2736 } 2737 2738 if (this.istack.size() > 0) { 2739 is = (IStack)this.istack.pop(); 2740 if (this.insert >= this.istack.size()) 2741 this.insert = -1; 2742 } 2743 } 2744 2745 public boolean isPushed( Node node ) 2746 { 2747 int i; 2748 IStack is; 2749 2750 for (i = this.istack.size() - 1; i >= 0; --i) { 2751 is = (IStack)this.istack.elementAt(i); 2752 if (is.tag == node.tag) 2753 return true; 2754 } 2755 2756 return false; 2757 } 2758 2759 2776 public int inlineDup( Node node ) 2777 { 2778 int n; 2779 2780 n = this.istack.size() - this.istackbase; 2781 if ( n > 0 ) { 2782 this.insert = this.istackbase; 2783 this.inode = node; 2784 } 2785 2786 return n; 2787 } 2788 2789 public Node insertedToken() 2790 { 2791 Node node; 2792 IStack is; 2793 int n; 2794 2795 if (this.insert == -1) { 2797 node = this.inode; 2798 this.inode = null; 2799 return node; 2800 } 2801 2802 2805 if (this.inode == null) { 2806 this.lines = this.in.curline; 2807 this.columns = this.in.curcol; 2808 } 2809 2810 node = newNode(Node.StartTag, 2811 this.lexbuf, 2812 this.txtstart, 2813 this.txtend); node.implicit = true; 2816 is = (IStack)this.istack.elementAt( this.insert ); 2817 node.element = is.element; 2818 node.tag = is.tag; 2819 if (is.attributes != null) 2820 node.attributes = cloneAttributes(is.attributes); 2821 2822 n = this.insert; 2824 2825 if (++n < this.istack.size() ) { 2827 this.insert = n; 2828 } else { 2829 this.insert = -1; 2830 } 2831 2832 return node; 2833 } 2834 2835 2836 public static int wstrcasecmp(String s1, String s2) 2837 { 2838 return (s1.equalsIgnoreCase(s2) ? 0 : 1); 2839 } 2840 2841 public static int wstrcaselexcmp(String s1, String s2) 2842 { 2843 char c; 2844 int i = 0; 2845 2846 while ( i < s1.length() && i < s2.length() ) { 2847 c = s1.charAt(i); 2848 if ( toLower(c) != toLower( s2.charAt(i) ) ) { 2849 break; 2850 } 2851 i += 1; 2852 } 2853 if ( i == s1.length() && i == s2.length() ) { 2854 return 0; 2855 } else if ( i == s1.length() ) { 2856 return -1; 2857 } else if ( i == s2.length() ) { 2858 return 1; 2859 } else { 2860 return ( s1.charAt(i) > s2.charAt(i) ? 1 : -1 ); 2861 } 2862 } 2863 2864 public static boolean wsubstr(String s1, String s2) 2865 { 2866 int i; 2867 int len1 = s1.length(); 2868 int len2 = s2.length(); 2869 2870 for (i = 0; i <= len1 - len2; ++i) 2871 { 2872 if (s2.equalsIgnoreCase(s1.substring(i))) 2873 return true; 2874 } 2875 2876 return false; 2877 } 2878 2879 public boolean canPrune(Node element) 2880 { 2881 if (element.type == Node.TextNode) 2882 return true; 2883 2884 if (element.content != null) 2885 return false; 2886 2887 if (element.tag == configuration.tt.tagA && element.attributes != null) 2888 return false; 2889 2890 if (element.tag == configuration.tt.tagP && !this.configuration.DropEmptyParas) 2891 return false; 2892 2893 if (element.tag == null) 2894 return false; 2895 2896 if ((element.tag.model & Dict.CM_ROW) != 0) 2897 return false; 2898 2899 if (element.tag == configuration.tt.tagApplet) 2900 return false; 2901 2902 if (element.tag == configuration.tt.tagObject) 2903 return false; 2904 2905 if (element.attributes != null && 2906 (element.getAttrByName("id") != null || 2907 element.getAttrByName("name") != null) ) 2908 return false; 2909 2910 return true; 2911 } 2912 2913 2914 public void fixId(Node node) 2915 { 2916 AttVal name = node.getAttrByName("name"); 2917 AttVal id = node.getAttrByName("id"); 2918 2919 if (name != null) 2920 { 2921 if (id != null) 2922 { 2923 if (!id.value.equals(name.value)) 2924 Report.attrError(this, node, "name", Report.ID_NAME_MISMATCH); 2925 } 2926 else if (this.configuration.XmlOut) 2927 node.addAttribute("id", name.value); 2928 } 2929 } 2930 2931 2935 public void deferDup() 2936 { 2937 this.insert = -1; 2938 this.inode = null; 2939 } 2940 2941 2942 2943 2944 private static final short DIGIT = 1; 2945 private static final short LETTER = 2; 2946 private static final short NAMECHAR = 4; 2947 private static final short WHITE = 8; 2948 private static final short NEWLINE = 16; 2949 private static final short LOWERCASE = 32; 2950 private static final short UPPERCASE = 64; 2951 2952 2953 2954 private static final short LEX_CONTENT = 0; 2955 private static final short LEX_GT = 1; 2956 private static final short LEX_ENDTAG = 2; 2957 private static final short LEX_STARTTAG = 3; 2958 private static final short LEX_COMMENT = 4; 2959 private static final short LEX_DOCTYPE = 5; 2960 private static final short LEX_PROCINSTR = 6; 2961 private static final short LEX_ENDCOMMENT = 7; 2962 private static final short LEX_CDATA = 8; 2963 private static final short LEX_SECTION = 9; 2964 private static final short LEX_ASP = 10; 2965 private static final short LEX_JSTE = 11; 2966 private static final short LEX_PHP = 12; 2967 2968 2969 private static short[] lexmap = new short[128]; 2970 2971 private static void mapStr(String str, short code) 2972 { 2973 int j; 2974 2975 for ( int i = 0; i < str.length(); i++ ) { 2976 j = (int)str.charAt(i); 2977 lexmap[j] |= code; 2978 } 2979 } 2980 2981 static { 2982 mapStr("\r\n\f", (short)(NEWLINE|WHITE)); 2983 mapStr(" \t", WHITE); 2984 mapStr("-.:_", NAMECHAR); 2985 mapStr("0123456789", (short)(DIGIT|NAMECHAR)); 2986 mapStr("abcdefghijklmnopqrstuvwxyz", (short)(LOWERCASE|LETTER|NAMECHAR)); 2987 mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short)(UPPERCASE|LETTER|NAMECHAR)); 2988 } 2989 2990 private static short MAP( char c ) 2991 { 2992 return ((int)c < 128 ? lexmap[(int)c] : 0); 2993 } 2994 2995 private static boolean isWhite(char c) 2996 { 2997 short m = MAP(c); 2998 2999 return (m & WHITE) != 0; 3000 } 3001 3002 private static boolean isDigit(char c) 3003 { 3004 short m; 3005 3006 m = MAP(c); 3007 3008 return (m & DIGIT) != 0; 3009 } 3010 3011 private static boolean isLetter(char c) 3012 { 3013 short m; 3014 3015 m = MAP(c); 3016 3017 return (m & LETTER) != 0; 3018 } 3019 3020 private static char toLower(char c) 3021 { 3022 short m = MAP(c); 3023 3024 if ((m & UPPERCASE) != 0) 3025 c = (char)( (int)c + (int)'a' - (int)'A' ); 3026 3027 return c; 3028 } 3029 3030 private static char toUpper(char c) 3031 { 3032 short m = MAP(c); 3033 3034 if ((m & LOWERCASE) != 0) 3035 c = (char)( (int)c + (int)'A' - (int)'a' ); 3036 3037 return c; 3038 } 3039 3040 public static char foldCase(char c, boolean tocaps, boolean xmlTags) 3041 { 3042 short m; 3043 3044 if (!xmlTags) 3045 { 3046 m = MAP(c); 3047 3048 if (tocaps) 3049 { 3050 if ((m & LOWERCASE) != 0) 3051 c = (char)( (int)c + (int)'A' - (int)'a' ); 3052 } 3053 else 3054 { 3055 if ((m & UPPERCASE) != 0) 3056 c = (char)( (int)c + (int)'a' - (int)'A' ); 3057 } 3058 } 3059 3060 return c; 3061 } 3062 3063 3064 private static class W3CVersionInfo 3065 { 3066 String name; 3067 String voyagerName; 3068 String profile; 3069 short code; 3070 3071 public W3CVersionInfo( String name, 3072 String voyagerName, 3073 String profile, 3074 short code ) 3075 { 3076 this.name = name; 3077 this.voyagerName = voyagerName; 3078 this.profile = profile; 3079 this.code = code; 3080 } 3081 } 3082 3083 3084 private static final String voyager_loose = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"; 3085 private static final String voyager_strict = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; 3086 private static final String voyager_frameset = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"; 3087 3088 private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; 3089 3090 private static Lexer.W3CVersionInfo[] W3CVersion = 3091 { 3092 new W3CVersionInfo("HTML 4.01", 3093 "XHTML 1.0 Strict", 3094 voyager_strict, 3095 Dict.VERS_HTML40_STRICT), 3096 new W3CVersionInfo("HTML 4.01 Transitional", 3097 "XHTML 1.0 Transitional", 3098 voyager_loose, 3099 Dict.VERS_HTML40_LOOSE), 3100 new W3CVersionInfo("HTML 4.01 Frameset", 3101 "XHTML 1.0 Frameset", 3102 voyager_frameset, 3103 Dict.VERS_FRAMES), 3104 new W3CVersionInfo("HTML 4.0", 3105 "XHTML 1.0 Strict", 3106 voyager_strict, 3107 Dict.VERS_HTML40_STRICT), 3108 new W3CVersionInfo("HTML 4.0 Transitional", 3109 "XHTML 1.0 Transitional", 3110 voyager_loose, 3111 Dict.VERS_HTML40_LOOSE), 3112 new W3CVersionInfo("HTML 4.0 Frameset", 3113 "XHTML 1.0 Frameset", 3114 voyager_frameset, 3115 Dict.VERS_FRAMES), 3116 new W3CVersionInfo("HTML 3.2", 3117 "XHTML 1.0 Transitional", 3118 voyager_loose, 3119 Dict.VERS_HTML32), 3120 new W3CVersionInfo("HTML 2.0", 3121 "XHTML 1.0 Strict", 3122 voyager_strict, 3123 Dict.VERS_HTML20) 3124 }; 3125 3126} 3127 | Popular Tags |