1 5 6 package org.w3c.tidy; 7 8 33 34 69 70 public class Clean { 71 72 private int classNum = 1; 73 74 private TagTable tt; 75 76 public Clean(TagTable tt) 77 { 78 this.tt = tt; 79 } 80 81 private StyleProp insertProperty(StyleProp props, String name, 82 String value) 83 { 84 StyleProp first, prev, prop; 85 int cmp; 86 87 prev = null; 88 first = props; 89 90 while (props != null) 91 { 92 cmp = props.name.compareTo(name); 93 94 if (cmp == 0) 95 { 96 97 return first; 98 } 99 100 if (cmp > 0) { 102 103 104 prop = new StyleProp(name, value, props); 105 106 if (prev != null) 107 prev.next = prop; 108 else 109 first = prop; 110 111 return first; 112 } 113 114 prev = props; 115 props = props.next; 116 } 117 118 prop = new StyleProp(name, value); 119 120 if (prev != null) 121 prev.next = prop; 122 else 123 first = prop; 124 125 return first; 126 } 127 128 135 private StyleProp createProps(StyleProp prop, String style) 136 { 137 int name_end; 138 int value_end; 139 int value_start = 0; 140 int name_start = 0; 141 boolean more; 142 143 name_start = 0; 144 while (name_start < style.length()) 145 { 146 while (name_start < style.length() && 147 style.charAt(name_start) == ' ') 148 ++name_start; 149 150 name_end = name_start; 151 152 while (name_end < style.length()) 153 { 154 if (style.charAt(name_end) == ':') 155 { 156 value_start = name_end + 1; 157 break; 158 } 159 160 ++name_end; 161 } 162 163 if (name_end >= style.length() || style.charAt(name_end) != ':') 164 break; 165 166 while (value_start < style.length() && 167 style.charAt(value_start) == ' ') 168 ++value_start; 169 170 value_end = value_start; 171 more = false; 172 173 while (value_end < style.length()) 174 { 175 if (style.charAt(value_end) == ';') 176 { 177 more = true; 178 break; 179 } 180 181 ++value_end; 182 } 183 184 prop = insertProperty(prop, 185 style.substring(name_start, name_end), 186 style.substring(value_start, value_end)); 187 188 if (more) 189 { 190 name_start = value_end + 1; 191 continue; 192 } 193 194 break; 195 } 196 197 return prop; 198 } 199 200 private String createPropString(StyleProp props) 201 { 202 String style = ""; 203 int len; 204 StyleProp prop; 205 206 207 208 for (len = 0, prop = props; prop != null; prop = prop.next) 209 { 210 len += prop.name.length() + 2; 211 len += prop.value.length() + 2; 212 } 213 214 for (prop = props; prop != null; prop = prop.next) 215 { 216 style = style.concat(prop.name); 217 style = style.concat(": "); 218 219 style = style.concat(prop.value); 220 221 if (prop.next == null) 222 break; 223 224 style = style.concat("; "); 225 } 226 227 return style; 228 } 229 230 233 private String addProperty(String style, String property) 234 { 235 StyleProp prop; 236 237 prop = createProps(null, style); 238 prop = createProps(prop, property); 239 style = createPropString(prop); 240 return style; 241 } 242 243 private String gensymClass(String tag) 244 { 245 String str; 246 247 str = "c" + classNum; 248 classNum++; 249 return str; 250 } 251 252 private String findStyle(Lexer lexer, String tag, String properties) 253 { 254 Style style; 255 256 for (style = lexer.styles; style != null; style=style.next) 257 { 258 if (style.tag.equals(tag) && 259 style.properties.equals(properties)) 260 return style.tagClass; 261 } 262 263 style = new Style(tag, gensymClass(tag), properties, lexer.styles); 264 lexer.styles = style; 265 return style.tagClass; 266 } 267 268 276 private void style2Rule(Lexer lexer, Node node) 277 { 278 AttVal styleattr, classattr; 279 String classname; 280 281 styleattr = node.getAttrByName("style"); 282 283 if (styleattr != null) 284 { 285 classname = findStyle(lexer, node.element, styleattr.value); 286 classattr = node.getAttrByName("class"); 287 288 292 if (classattr != null) 293 { 294 classattr.value = classattr.value + " " + classname; 295 node.removeAttribute(styleattr); 296 } 297 else 298 { 299 styleattr.attribute = "class"; 300 styleattr.value = classname; 301 } 302 } 303 } 304 305 private void addColorRule(Lexer lexer, String selector, String color) 306 { 307 if (color != null) 308 { 309 lexer.addStringLiteral(selector); 310 lexer.addStringLiteral(" { color: "); 311 lexer.addStringLiteral(color); 312 lexer.addStringLiteral(" }\n"); 313 } 314 } 315 316 326 private void cleanBodyAttrs(Lexer lexer, Node body) 327 { 328 AttVal attr; 329 String bgurl = null; 330 String bgcolor = null; 331 String color = null; 332 333 attr = body.getAttrByName("background"); 334 335 if (attr != null) 336 { 337 bgurl = attr.value; 338 attr.value = null; 339 body.removeAttribute(attr); 340 } 341 342 attr = body.getAttrByName("bgcolor"); 343 344 if (attr != null) 345 { 346 bgcolor = attr.value; 347 attr.value = null; 348 body.removeAttribute(attr); 349 } 350 351 attr = body.getAttrByName("text"); 352 353 if (attr != null) 354 { 355 color = attr.value; 356 attr.value = null; 357 body.removeAttribute(attr); 358 } 359 360 if (bgurl != null || bgcolor != null || color != null) 361 { 362 lexer.addStringLiteral(" body {\n"); 363 364 if (bgurl != null) 365 { 366 lexer.addStringLiteral(" background-image: url("); 367 lexer.addStringLiteral(bgurl); 368 lexer.addStringLiteral(");\n"); 369 } 370 371 if (bgcolor != null) 372 { 373 lexer.addStringLiteral(" background-color: "); 374 lexer.addStringLiteral(bgcolor); 375 lexer.addStringLiteral(";\n"); 376 } 377 378 if (color != null) 379 { 380 lexer.addStringLiteral(" color: "); 381 lexer.addStringLiteral(color); 382 lexer.addStringLiteral(";\n"); 383 } 384 385 lexer.addStringLiteral(" }\n"); 386 } 387 388 attr = body.getAttrByName("link"); 389 390 if (attr != null) 391 { 392 addColorRule(lexer, " :link", attr.value); 393 body.removeAttribute(attr); 394 } 395 396 attr = body.getAttrByName("vlink"); 397 398 if (attr != null) 399 { 400 addColorRule(lexer, " :visited", attr.value); 401 body.removeAttribute(attr); 402 } 403 404 attr = body.getAttrByName("alink"); 405 406 if (attr != null) 407 { 408 addColorRule(lexer, " :active", attr.value); 409 body.removeAttribute(attr); 410 } 411 } 412 413 private boolean niceBody(Lexer lexer, Node doc) 414 { 415 Node body = doc.findBody(lexer.configuration.tt); 416 417 if (body != null) 418 { 419 if ( 420 body.getAttrByName("background") != null || 421 body.getAttrByName("bgcolor") != null || 422 body.getAttrByName("text") != null || 423 body.getAttrByName("link") != null || 424 body.getAttrByName("vlink") != null || 425 body.getAttrByName("alink") != null 426 ) 427 { 428 lexer.badLayout |= Report.USING_BODY; 429 return false; 430 } 431 } 432 433 return true; 434 } 435 436 437 private void createStyleElement(Lexer lexer, Node doc) 438 { 439 Node node, head, body; 440 Style style; 441 AttVal av; 442 443 if (lexer.styles == null && niceBody(lexer, doc)) 444 return; 445 446 node = lexer.newNode(Node.StartTag, null, 0, 0, "style"); 447 node.implicit = true; 448 449 450 av = new AttVal(null, null, '"', "type", "text/css"); 451 av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); 452 node.attributes = av; 453 454 body = doc.findBody(lexer.configuration.tt); 455 456 lexer.txtstart = lexer.lexsize; 457 458 if (body != null) 459 cleanBodyAttrs(lexer, body); 460 461 for (style = lexer.styles; style != null; style = style.next) 462 { 463 lexer.addCharToLexer(' '); 464 lexer.addStringLiteral(style.tag); 465 lexer.addCharToLexer('.'); 466 lexer.addStringLiteral(style.tagClass); 467 lexer.addCharToLexer(' '); 468 lexer.addCharToLexer('{'); 469 lexer.addStringLiteral(style.properties); 470 lexer.addCharToLexer('}'); 471 lexer.addCharToLexer('\n'); 472 } 473 474 lexer.txtend = lexer.lexsize; 475 476 Node.insertNodeAtEnd(node, 477 lexer.newNode(Node.TextNode, 478 lexer.lexbuf, 479 lexer.txtstart, 480 lexer.txtend)); 481 482 488 489 head = doc.findHEAD(lexer.configuration.tt); 490 491 if (head != null) 492 Node.insertNodeAtEnd(head, node); 493 } 494 495 496 private void fixNodeLinks(Node node) 497 { 498 Node child; 499 500 if (node.prev != null) 501 node.prev.next = node; 502 else 503 node.parent.content = node; 504 505 if (node.next != null) 506 node.next.prev = node; 507 else 508 node.parent.last = node; 509 510 for (child = node.content; child != null; child = child.next) 511 child.parent = node; 512 } 513 514 518 private void stripOnlyChild(Node node) 519 { 520 Node child; 521 522 child = node.content; 523 node.content = child.content; 524 node.last = child.last; 525 child.content = null; 526 527 for (child = node.content; child != null; child = child.next) 528 child.parent = node; 529 } 530 531 532 private void discardContainer(Node element, MutableObject pnode) 533 { 534 Node node; 535 Node parent = element.parent; 536 537 if (element.content != null) 538 { 539 element.last.next = element.next; 540 541 if (element.next != null) 542 { 543 element.next.prev = element.last; 544 element.last.next = element.next; 545 } 546 else 547 parent.last = element.last; 548 549 if (element.prev != null) 550 { 551 element.content.prev = element.prev; 552 element.prev.next = element.content; 553 } 554 else 555 parent.content = element.content; 556 557 for (node = element.content; node != null; node = node.next) 558 node.parent = parent; 559 560 pnode.setObject(element.content); 561 } 562 else 563 { 564 if (element.next != null) 565 element.next.prev = element.prev; 566 else 567 parent.last = element.prev; 568 569 if (element.prev != null) 570 element.prev.next = element.next; 571 else 572 parent.content = element.next; 573 574 pnode.setObject(element.next); 575 } 576 577 element.next = null; 578 element.content = null; 579 } 580 581 585 private void addStyleProperty(Node node, String property) 586 { 587 AttVal av; 588 589 for (av = node.attributes; av != null; av = av.next) 590 { 591 if (av.attribute.equals("style")) 592 break; 593 } 594 595 596 597 if (av != null) 598 { 599 String s; 600 601 s = addProperty(av.value, property); 602 av.value = s; 603 } 604 else 605 { 606 av = new AttVal(node.attributes, null, '"', "style", property); 607 av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); 608 node.attributes = av; 609 } 610 } 611 612 621 private String mergeProperties(String s1, String s2) 622 { 623 String s; 624 StyleProp prop; 625 626 prop = createProps(null, s1); 627 prop = createProps(prop, s2); 628 s = createPropString(prop); 629 return s; 630 } 631 632 private void mergeStyles(Node node, Node child) 633 { 634 AttVal av; 635 String s1, s2, style; 636 637 for (s2 = null, av = child.attributes; av != null; av = av.next) 638 { 639 if (av.attribute.equals("style")) 640 { 641 s2 = av.value; 642 break; 643 } 644 } 645 646 for (s1 = null, av = node.attributes; av != null; av = av.next) 647 { 648 if (av.attribute.equals("style")) 649 { 650 s1 = av.value; 651 break; 652 } 653 } 654 655 if (s1 != null) 656 { 657 if (s2 != null) 658 { 659 style = mergeProperties(s1, s2); 660 av.value = style; 661 } 662 } 663 else if (s2 != null) 664 { 665 av = new AttVal(node.attributes, null, '"', "style", s2); 666 av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); 667 node.attributes = av; 668 } 669 } 670 671 private String fontSize2Name(String size) 672 { 673 685 686 String [] sizes = 687 { 688 "60%", 689 "70%", 690 "80%", 691 null, 692 "120%", 693 "150%", 694 "200%" 695 }; 696 String buf; 697 698 if (size.length() > 0 && 699 '0' <= size.charAt(0) && size.charAt(0) <= '6') 700 { 701 int n = size.charAt(0) - '0'; 702 return sizes[n]; 703 } 704 705 if (size.length() > 0 && size.charAt(0) == '-') 706 { 707 if (size.length() > 1 && 708 '0' <= size.charAt(1) && size.charAt(1) <= '6') 709 { 710 int n = size.charAt(1) - '0'; 711 double x; 712 713 for (x = 1.0; n > 0; --n) 714 x *= 0.8; 715 716 x *= 100.0; 717 buf = "" + (int)x + "%"; 718 719 return buf; 720 } 721 722 return "smaller"; 723 } 724 725 if (size.length() > 1 && 726 '0' <= size.charAt(1) && size.charAt(1) <= '6') 727 { 728 int n = size.charAt(1) - '0'; 729 double x; 730 731 for (x = 1.0; n > 0; --n) 732 x *= 1.2; 733 734 x *= 100.0; 735 buf = "" + (int)x + "%"; 736 737 return buf; 738 } 739 740 return "larger"; 741 } 742 743 private void addFontFace(Node node, String face) 744 { 745 addStyleProperty(node, "font-family: " + face); 746 } 747 748 private void addFontSize(Node node, String size) 749 { 750 String value; 751 752 if (size.equals("6") && node.tag == tt.tagP) 753 { 754 node.element = "h1"; 755 tt.findTag(node); 756 return; 757 } 758 759 if (size.equals("5") && node.tag == tt.tagP) 760 { 761 node.element = "h2"; 762 tt.findTag(node); 763 return; 764 } 765 766 if (size.equals("4") && node.tag == tt.tagP) 767 { 768 node.element = "h3"; 769 tt.findTag(node); 770 return; 771 } 772 773 value = fontSize2Name(size); 774 775 if (value != null) 776 { 777 addStyleProperty(node, "font-size: " + value); 778 } 779 } 780 781 private void addFontColor(Node node, String color) 782 { 783 addStyleProperty(node, "color: " + color); 784 } 785 786 private void addAlign(Node node, String align) 787 { 788 789 addStyleProperty(node, "text-align: " + align.toLowerCase()); 790 } 791 792 796 private void addFontStyles(Node node, AttVal av) 797 { 798 while (av != null) 799 { 800 if (av.attribute.equals("face")) 801 addFontFace(node, av.value); 802 else if (av.attribute.equals("size")) 803 addFontSize(node, av.value); 804 else if (av.attribute.equals("color")) 805 addFontColor(node, av.value); 806 807 av = av.next; 808 } 809 } 810 811 815 private void textAlign(Lexer lexer, Node node) 816 { 817 AttVal av, prev; 818 819 prev = null; 820 821 for (av = node.attributes; av != null; av = av.next) 822 { 823 if (av.attribute.equals("align")) 824 { 825 if (prev != null) 826 prev.next = av.next; 827 else 828 node.attributes = av.next; 829 830 if (av.value != null) 831 { 832 addAlign(node, av.value); 833 } 834 835 break; 836 } 837 838 prev = av; 839 } 840 } 841 842 846 847 851 852 private boolean dir2Div(Lexer lexer, Node node, MutableObject pnode) 853 { 854 Node child; 855 856 if (node.tag == tt.tagDir || 857 node.tag == tt.tagUl || 858 node.tag == tt.tagOl) 859 { 860 child = node.content; 861 862 if (child == null) 863 return false; 864 865 866 867 if (child.next != null) 868 return false; 869 870 if (child.tag != tt.tagLi) 871 return false; 872 873 if (!child.implicit) 874 return false; 875 876 877 878 node.tag = tt.tagDiv; 879 node.element = "div"; 880 addStyleProperty(node, "margin-left: 2em"); 881 stripOnlyChild(node); 882 return true; 883 884 891 892 893 899 900 901 909 911 912 } 916 917 return false; 918 } 919 920 924 925 private boolean center2Div(Lexer lexer, Node node, MutableObject pnode) 926 { 927 if (node.tag == tt.tagCenter) 928 { 929 if (lexer.configuration.DropFontTags) 930 { 931 if (node.content != null) 932 { 933 Node last = node.last; 934 Node parent = node.parent; 935 936 discardContainer(node, pnode); 937 938 node = lexer.inferredTag("br"); 939 940 if (last.next != null) 941 last.next.prev = node; 942 943 node.next = last.next; 944 last.next = node; 945 node.prev = last; 946 947 if (parent.last == last) 948 parent.last = node; 949 950 node.parent = parent; 951 } 952 else 953 { 954 Node prev = node.prev; 955 Node next = node.next; 956 Node parent = node.parent; 957 discardContainer(node, pnode); 958 959 node = lexer.inferredTag("br"); 960 node.next = next; 961 node.prev = prev; 962 node.parent = parent; 963 964 if (next != null) 965 next.prev = node; 966 else 967 parent.last = node; 968 969 if (prev != null) 970 prev.next = node; 971 else 972 parent.content = node; 973 } 974 975 return true; 976 } 977 node.tag = tt.tagDiv; 978 node.element = "div"; 979 addStyleProperty(node, "text-align: center"); 980 return true; 981 } 982 983 return false; 984 } 985 986 993 private boolean mergeDivs(Lexer lexer, Node node, MutableObject pnode) 994 { 995 Node child; 996 997 if (node.tag != tt.tagDiv) 998 return false; 999 1000 child = node.content; 1001 1002 if (child == null) 1003 return false; 1004 1005 if (child.tag != tt.tagDiv) 1006 return false; 1007 1008 if (child.next != null) 1009 return false; 1010 1011 mergeStyles(node, child); 1012 stripOnlyChild(node); 1013 return true; 1014 } 1015 1016 1020 1021 private boolean nestedList(Lexer lexer, Node node, MutableObject pnode) 1022 { 1023 Node child, list; 1024 1025 if (node.tag == tt.tagUl || node.tag == tt.tagOl) 1026 { 1027 child = node.content; 1028 1029 if (child == null) 1030 return false; 1031 1032 1033 1034 if (child.next != null) 1035 return false; 1036 1037 list = child.content; 1038 1039 if (list == null) 1040 return false; 1041 1042 if (list.tag != node.tag) 1043 return false; 1044 1045 pnode.setObject(node.next); 1046 1047 1048 list.prev = node.prev; 1049 list.next = node.next; 1050 list.parent = node.parent; 1051 fixNodeLinks(list); 1052 1053 1054 child.content = null; 1055 node.content = null; 1056 node.next = null; 1057 1058 1063 1064 if (list.prev != null) 1065 { 1066 node = list; 1067 list = node.prev; 1068 1069 if (list.tag == tt.tagUl || list.tag == tt.tagOl) 1070 { 1071 list.next = node.next; 1072 1073 if (list.next != null) 1074 list.next.prev = list; 1075 1076 child = list.last; 1077 1078 node.parent = child; 1079 node.next = null; 1080 node.prev = child.last; 1081 fixNodeLinks(node); 1082 } 1083 } 1084 1085 cleanNode(lexer, node); 1086 return true; 1087 } 1088 1089 return false; 1090 } 1091 1092 1115 private boolean blockStyle(Lexer lexer, Node node, MutableObject pnode) 1116 { 1117 Node child; 1118 1119 if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0) 1120 { 1121 if (node.tag != tt.tagTable 1122 && node.tag != tt.tagTr 1123 && node.tag != tt.tagLi) 1124 { 1125 1126 if (node.tag != tt.tagCaption) 1127 textAlign(lexer, node); 1128 1129 child = node.content; 1130 1131 if (child == null) 1132 return false; 1133 1134 1135 1136 if (child.next != null) 1137 return false; 1138 1139 if (child.tag == tt.tagB) 1140 { 1141 mergeStyles(node, child); 1142 addStyleProperty(node, "font-weight: bold"); 1143 stripOnlyChild(node); 1144 return true; 1145 } 1146 1147 if (child.tag == tt.tagI) 1148 { 1149 mergeStyles(node, child); 1150 addStyleProperty(node, "font-style: italic"); 1151 stripOnlyChild(node); 1152 return true; 1153 } 1154 1155 if (child.tag == tt.tagFont) 1156 { 1157 mergeStyles(node, child); 1158 addFontStyles(node, child.attributes); 1159 stripOnlyChild(node); 1160 return true; 1161 } 1162 } 1163 } 1164 1165 return false; 1166 } 1167 1168 1169 private boolean inlineStyle(Lexer lexer, Node node, MutableObject pnode) 1170 { 1171 Node child; 1172 1173 if (node.tag != tt.tagFont && (node.tag.model & (Dict.CM_INLINE|Dict.CM_ROW)) != 0) 1174 { 1175 child = node.content; 1176 1177 if (child == null) 1178 return false; 1179 1180 1181 1182 if (child.next != null) 1183 return false; 1184 1185 if (child.tag == tt.tagB && lexer.configuration.LogicalEmphasis) 1186 { 1187 mergeStyles(node, child); 1188 addStyleProperty(node, "font-weight: bold"); 1189 stripOnlyChild(node); 1190 return true; 1191 } 1192 1193 if (child.tag == tt.tagI && lexer.configuration.LogicalEmphasis) 1194 { 1195 mergeStyles(node, child); 1196 addStyleProperty(node, "font-style: italic"); 1197 stripOnlyChild(node); 1198 return true; 1199 } 1200 1201 if (child.tag == tt.tagFont) 1202 { 1203 mergeStyles(node, child); 1204 addFontStyles(node, child.attributes); 1205 stripOnlyChild(node); 1206 return true; 1207 } 1208 } 1209 1210 return false; 1211 } 1212 1213 1218 private boolean font2Span(Lexer lexer, Node node, MutableObject pnode) 1219 { 1220 AttVal av, style, next; 1221 1222 if (node.tag == tt.tagFont) 1223 { 1224 if (lexer.configuration.DropFontTags) 1225 { 1226 discardContainer(node, pnode); 1227 return false; 1228 } 1229 1230 1231 if (node.parent.content == node 1232 && node.next == null) 1233 return false; 1234 1235 addFontStyles(node, node.attributes); 1236 1237 1238 av = node.attributes; 1239 style = null; 1240 1241 while (av != null) 1242 { 1243 next = av.next; 1244 1245 if (av.attribute.equals("style")) 1246 { 1247 av.next = null; 1248 style = av; 1249 } 1250 1251 av = next; 1252 } 1253 1254 node.attributes = style; 1255 1256 node.tag = tt.tagSpan; 1257 node.element = "span"; 1258 1259 return true; 1260 } 1261 1262 return false; 1263 } 1264 1265 1268 private Node cleanNode(Lexer lexer, Node node) 1269 { 1270 Node next = null; 1271 MutableObject o = new MutableObject(); 1272 boolean b = false; 1273 1274 for (next = node; node.isElement(); node = next) 1275 { 1276 o.setObject(next); 1277 1278 b = dir2Div(lexer, node, o); 1279 next = (Node)o.getObject(); 1280 if (b) 1281 continue; 1282 1283 b = nestedList(lexer, node, o); 1284 next = (Node)o.getObject(); 1285 if (b) 1286 continue; 1287 1288 b = center2Div(lexer, node, o); 1289 next = (Node)o.getObject(); 1290 if (b) 1291 continue; 1292 1293 b = mergeDivs(lexer, node, o); 1294 next = (Node)o.getObject(); 1295 if (b) 1296 continue; 1297 1298 b = blockStyle(lexer, node, o); 1299 next = (Node)o.getObject(); 1300 if (b) 1301 continue; 1302 1303 b = inlineStyle(lexer, node, o); 1304 next = (Node)o.getObject(); 1305 if (b) 1306 continue; 1307 1308 b = font2Span(lexer, node, o); 1309 next = (Node)o.getObject(); 1310 if (b) 1311 continue; 1312 1313 break; 1314 } 1315 1316 return next; 1317 } 1318 1319 private Node createStyleProperties(Lexer lexer, Node node) 1320 { 1321 Node child; 1322 1323 if (node.content != null) 1324 { 1325 for (child = node.content; child != null; child = child.next) 1326 { 1327 child = createStyleProperties(lexer, child); 1328 } 1329 } 1330 1331 return cleanNode(lexer, node); 1332 } 1333 1334 private void defineStyleRules(Lexer lexer, Node node) 1335 { 1336 Node child; 1337 1338 if (node.content != null) 1339 { 1340 for (child = node.content; 1341 child != null; child = child.next) 1342 { 1343 defineStyleRules(lexer, child); 1344 } 1345 } 1346 1347 style2Rule(lexer, node); 1348 } 1349 1350 public void cleanTree(Lexer lexer, Node doc) 1351 { 1352 doc = createStyleProperties(lexer, doc); 1353 1354 if (!lexer.configuration.MakeClean) 1355 { 1356 defineStyleRules(lexer, doc); 1357 createStyleElement(lexer, doc); 1358 } 1359 } 1360 1361 1362 public void nestedEmphasis(Node node) 1363 { 1364 MutableObject o = new MutableObject(); 1365 Node next; 1366 1367 while (node != null) 1368 { 1369 next = node.next; 1370 1371 if ((node.tag == tt.tagB || node.tag == tt.tagI) 1372 && node.parent != null && node.parent.tag == node.tag) 1373 { 1374 1375 o.setObject(next); 1376 discardContainer(node, o); 1377 next = (Node)o.getObject(); 1378 node = next; 1379 continue; 1380 } 1381 1382 if (node.content != null) 1383 nestedEmphasis(node.content); 1384 1385 node = next; 1386 } 1387 } 1388 1389 1390 public void emFromI(Node node) 1391 { 1392 while (node != null) 1393 { 1394 if (node.tag == tt.tagI) 1395 { 1396 node.element = tt.tagEm.name; 1397 node.tag = tt.tagEm; 1398 } 1399 else if (node.tag == tt.tagB) 1400 { 1401 node.element = tt.tagStrong.name; 1402 node.tag = tt.tagStrong; 1403 } 1404 1405 if (node.content != null) 1406 emFromI(node.content); 1407 1408 node = node.next; 1409 } 1410 } 1411 1412 1419 public void list2BQ(Node node) 1420 { 1421 while (node != null) 1422 { 1423 if (node.content != null) 1424 list2BQ(node.content); 1425 1426 if (node.tag != null && node.tag.parser == ParserImpl.getParseList() && 1427 node.hasOneChild() && node.content.implicit) 1428 { 1429 stripOnlyChild(node); 1430 node.element = tt.tagBlockquote.name; 1431 node.tag = tt.tagBlockquote; 1432 node.implicit = true; 1433 } 1434 1435 node = node.next; 1436 } 1437 } 1438 1439 1444 public void bQ2Div(Node node) 1445 { 1446 int indent; 1447 String indent_buf; 1448 1449 while (node != null) 1450 { 1451 if (node.tag == tt.tagBlockquote && node.implicit) 1452 { 1453 indent = 1; 1454 1455 while(node.hasOneChild() && 1456 node.content.tag == tt.tagBlockquote && 1457 node.implicit) 1458 { 1459 ++indent; 1460 stripOnlyChild(node); 1461 } 1462 1463 if (node.content != null) 1464 bQ2Div(node.content); 1465 1466 indent_buf = "margin-left: " + 1467 (new Integer (2*indent)).toString() + "em"; 1468 1469 node.element = tt.tagDiv.name; 1470 node.tag = tt.tagDiv; 1471 node.addAttribute("style", indent_buf); 1472 } 1473 else if (node.content != null) 1474 bQ2Div(node.content); 1475 1476 1477 node = node.next; 1478 } 1479 } 1480 1481 1482 public Node pruneSection(Lexer lexer, Node node) 1483 { 1484 for (;;) 1485 { 1486 1487 node = Node.discardElement(node); 1488 1489 if (node == null) 1490 return null; 1491 1492 if (node.type == Node.SectionTag) 1493 { 1494 if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) 1495 { 1496 node = pruneSection(lexer, node); 1497 continue; 1498 } 1499 1500 if ((Lexer.getString(node.textarray, node.start, 5)).equals("endif")) 1501 { 1502 node = Node.discardElement(node); 1503 break; 1504 } 1505 } 1506 } 1507 1508 return node; 1509 } 1510 1511 public void dropSections(Lexer lexer, Node node) 1512 { 1513 while (node != null) 1514 { 1515 if (node.type == Node.SectionTag) 1516 { 1517 1518 if ((Lexer.getString(node.textarray, node.start, 2)).equals("if")) 1519 { 1520 node = pruneSection(lexer, node); 1521 continue; 1522 } 1523 1524 1525 node = Node.discardElement(node); 1526 continue; 1527 } 1528 1529 if (node.content != null) 1530 dropSections(lexer, node.content); 1531 1532 node = node.next; 1533 } 1534 } 1535 1536 public void purgeAttributes(Node node) 1537 { 1538 AttVal attr = node.attributes; 1539 AttVal next = null; 1540 AttVal prev = null; 1541 1542 while (attr != null) 1543 { 1544 next = attr.next; 1545 1546 1547 if (attr.attribute != null && 1548 attr.value != null && 1549 attr.attribute.equals("class") && 1550 attr.value.equals("Code")) 1551 { 1552 prev = attr; 1553 } 1554 else if (attr.attribute != null && 1555 (attr.attribute.equals("class") || 1556 attr.attribute.equals("style") || 1557 attr.attribute.equals("lang") || 1558 attr.attribute.startsWith("x:") || 1559 ((attr.attribute.equals("height") || attr.attribute.equals("width")) && 1560 (node.tag == tt.tagTd || node.tag == tt.tagTr || node.tag == tt.tagTh)))) 1561 { 1562 if (prev != null) 1563 prev.next = next; 1564 else 1565 node.attributes = next; 1566 1567 } 1568 else 1569 prev = attr; 1570 1571 attr = next; 1572 } 1573 } 1574 1575 1576 public Node stripSpan(Lexer lexer, Node span) 1577 { 1578 Node node; 1579 Node prev = null; 1580 Node content; 1581 1582 1587 1588 cleanWord2000(lexer, span.content); 1589 content = span.content; 1590 1591 if (span.prev != null) 1592 prev = span.prev; 1593 else if (content != null) 1594 { 1595 node = content; 1596 content = content.next; 1597 Node.removeNode(node); 1598 Node.insertNodeBeforeElement(span, node); 1599 prev = node; 1600 } 1601 1602 while (content != null) 1603 { 1604 node = content; 1605 content = content.next; 1606 Node.removeNode(node); 1607 Node.insertNodeAfterElement(prev, node); 1608 prev = node; 1609 } 1610 1611 if (span.next == null) 1612 span.parent.last = prev; 1613 1614 node = span.next; 1615 span.content = null; 1616 Node.discardElement(span); 1617 return node; 1618 } 1619 1620 1621 private void normalizeSpaces(Lexer lexer, Node node) 1622 { 1623 while (node != null) 1624 { 1625 if (node.content != null) 1626 normalizeSpaces(lexer, node.content); 1627 1628 if (node.type == Node.TextNode) 1629 { 1630 int i; 1631 MutableInteger c = new MutableInteger(); 1632 int p = node.start; 1633 1634 for (i = node.start; i < node.end; ++i) 1635 { 1636 c.value = (int)node.textarray[i]; 1637 1638 1639 if (c.value > 0x7F) 1640 i += PPrint.getUTF8(node.textarray, i, c); 1641 1642 if (c.value == 160) 1643 c.value = ' '; 1644 1645 p = PPrint.putUTF8(node.textarray, p, c.value); 1646 } 1647 } 1648 1649 node = node.next; 1650 } 1651 } 1652 1653 1660 public void cleanWord2000(Lexer lexer, Node node) 1661 { 1662 1663 Node list = null; 1664 1665 while (node != null) 1666 { 1667 1668 if (node.tag == tt.tagStyle || 1669 node.tag == tt.tagMeta || 1670 node.type == Node.CommentTag) 1671 { 1672 node = Node.discardElement(node); 1673 continue; 1674 } 1675 1676 1677 if (node.tag == tt.tagSpan) 1678 { 1679 node = stripSpan(lexer, node); 1680 continue; 1681 } 1682 1683 1684 if (node.tag == tt.tagHtml) 1685 { 1686 1687 if (node.getAttrByName("xmlns:o") == null) 1688 return; 1689 } 1690 1691 if (node.tag == tt.tagLink) 1692 { 1693 AttVal attr = node.getAttrByName("rel"); 1694 1695 if (attr != null && attr.value != null && 1696 attr.value.equals("File-List")) 1697 { 1698 node = Node.discardElement(node); 1699 continue; 1700 } 1701 } 1702 1703 1704 if (node.content == null && node.tag == tt.tagP) 1705 { 1706 node = Node.discardElement(node); 1707 continue; 1708 } 1709 1710 if (node.tag == tt.tagP) 1711 { 1712 AttVal attr = node.getAttrByName("class"); 1713 1714 1715 if (attr != null && attr.value != null && 1716 attr.value.equals("MsoListBullet")) 1717 { 1718 Node.coerceNode(lexer, node, tt.tagLi); 1719 1720 if (list == null || list.tag != tt.tagUl) 1721 { 1722 list = lexer.inferredTag("ul"); 1723 Node.insertNodeBeforeElement(node, list); 1724 } 1725 1726 purgeAttributes(node); 1727 1728 if (node.content != null) 1729 cleanWord2000(lexer, node.content); 1730 1731 1732 Node.removeNode(node); 1733 Node.insertNodeAtEnd(list, node); 1734 node = list.next; 1735 } 1736 1737 else if (attr != null && attr.value != null && 1738 attr.value.equals("Code")) 1739 { 1740 Node br = lexer.newLineNode(); 1741 normalizeSpaces(lexer, node); 1742 1743 if (list == null || list.tag != tt.tagPre) 1744 { 1745 list = lexer.inferredTag("pre"); 1746 Node.insertNodeBeforeElement(node, list); 1747 } 1748 1749 1750 Node.removeNode(node); 1751 Node.insertNodeAtEnd(list, node); 1752 stripSpan(lexer, node); 1753 Node.insertNodeAtEnd(list, br); 1754 node = list.next; 1755 } 1756 else 1757 list = null; 1758 } 1759 else 1760 list = null; 1761 1762 1763 if (node.type == Node.StartTag || node.type == Node.StartEndTag) 1764 purgeAttributes(node); 1765 1766 if (node.content != null) 1767 cleanWord2000(lexer, node.content); 1768 1769 node = node.next; 1770 } 1771 } 1772 1773 public boolean isWord2000(Node root, TagTable tt) 1774 { 1775 Node html = root.findHTML(tt); 1776 1777 return (html != null && html.getAttrByName("xmlns:o") != null); 1778 } 1779} 1780 | Popular Tags |