1 27 package org.htmlparser.nodes; 28 29 import java.util.Enumeration ; 30 import java.util.Hashtable ; 31 import java.util.Locale ; 32 import java.util.Vector ; 33 34 import org.htmlparser.Attribute; 35 import org.htmlparser.Tag; 36 import org.htmlparser.lexer.Cursor; 37 import org.htmlparser.lexer.Lexer; 38 import org.htmlparser.lexer.Page; 39 import org.htmlparser.scanners.Scanner; 40 import org.htmlparser.scanners.TagScanner; 41 import org.htmlparser.util.ParserException; 42 import org.htmlparser.util.SpecialHashtable; 43 import org.htmlparser.visitors.NodeVisitor; 44 45 50 public class TagNode 51 extends 52 AbstractNode 53 implements 54 Tag 55 { 56 59 private final static String [] NONE = new String [0]; 60 61 64 private Scanner mScanner; 65 66 69 protected final static Scanner mDefaultScanner = new TagScanner (); 70 71 77 protected Vector mAttributes; 78 79 82 protected static Hashtable breakTags; 83 static 84 { 85 breakTags = new Hashtable (30); 86 breakTags.put ("BLOCKQUOTE", Boolean.TRUE); 87 breakTags.put ("BODY", Boolean.TRUE); 88 breakTags.put ("BR", Boolean.TRUE); 89 breakTags.put ("CENTER", Boolean.TRUE); 90 breakTags.put ("DD", Boolean.TRUE); 91 breakTags.put ("DIR", Boolean.TRUE); 92 breakTags.put ("DIV", Boolean.TRUE); 93 breakTags.put ("DL", Boolean.TRUE); 94 breakTags.put ("DT", Boolean.TRUE); 95 breakTags.put ("FORM", Boolean.TRUE); 96 breakTags.put ("H1", Boolean.TRUE); 97 breakTags.put ("H2", Boolean.TRUE); 98 breakTags.put ("H3", Boolean.TRUE); 99 breakTags.put ("H4", Boolean.TRUE); 100 breakTags.put ("H5", Boolean.TRUE); 101 breakTags.put ("H6", Boolean.TRUE); 102 breakTags.put ("HEAD", Boolean.TRUE); 103 breakTags.put ("HR", Boolean.TRUE); 104 breakTags.put ("HTML", Boolean.TRUE); 105 breakTags.put ("ISINDEX", Boolean.TRUE); 106 breakTags.put ("LI", Boolean.TRUE); 107 breakTags.put ("MENU", Boolean.TRUE); 108 breakTags.put ("NOFRAMES", Boolean.TRUE); 109 breakTags.put ("OL", Boolean.TRUE); 110 breakTags.put ("P", Boolean.TRUE); 111 breakTags.put ("PRE", Boolean.TRUE); 112 breakTags.put ("TD", Boolean.TRUE); 113 breakTags.put ("TH", Boolean.TRUE); 114 breakTags.put ("TITLE", Boolean.TRUE); 115 breakTags.put ("UL", Boolean.TRUE); 116 } 117 118 121 public TagNode () 122 { 123 this (null, -1, -1, new Vector ()); 124 } 125 126 134 public TagNode (Page page, int start, int end, Vector attributes) 135 { 136 super (page, start, end); 137 138 mScanner = mDefaultScanner; 139 mAttributes = attributes; 140 if ((null == mAttributes) || (0 == mAttributes.size ())) 141 { 142 String [] names; 143 144 names = getIds (); 145 if ((null != names) && (0 != names.length)) 146 setTagName (names[0]); 147 else 148 setTagName (""); } 150 } 151 152 157 public TagNode (TagNode tag, TagScanner scanner) 158 { 159 this (tag.getPage (), tag.getTagBegin (), tag.getTagEnd (), tag.getAttributesEx ()); 160 setThisScanner (scanner); 161 } 162 163 169 public String getAttribute (String name) 170 { 171 Attribute attribute; 172 String ret; 173 174 ret = null; 175 176 if (name.equalsIgnoreCase (SpecialHashtable.TAGNAME)) 177 ret = ((Attribute)getAttributesEx ().elementAt (0)).getName (); 178 else 179 { 180 attribute = getAttributeEx (name); 181 if (null != attribute) 182 ret = attribute.getValue (); 183 } 184 185 return (ret); 186 } 187 188 194 public void setAttribute (String key, String value) 195 { 196 char ch; 197 boolean needed; 198 boolean singleq; 199 boolean doubleq; 200 String ref; 201 StringBuffer buffer; 202 char quote; 203 Attribute attribute; 204 205 needed = false; 208 singleq = true; 209 doubleq = true; 210 if (null != value) 211 for (int i = 0; i < value.length (); i++) 212 { 213 ch = value.charAt (i); 214 if (Character.isWhitespace (ch)) 215 needed = true; 216 else if ('\'' == ch) 217 singleq = false; 218 else if ('"' == ch) 219 doubleq = false; 220 } 221 222 if (needed) 224 { 225 if (doubleq) 226 quote = '"'; 227 else if (singleq) 228 quote = '\''; 229 else 230 { 231 quote = '"'; 234 ref = """; buffer = new StringBuffer (value.length() * 5); 237 for (int i = 0; i < value.length (); i++) 238 { 239 ch = value.charAt (i); 240 if (quote == ch) 241 buffer.append (ref); 242 else 243 buffer.append (ch); 244 } 245 value = buffer.toString (); 246 } 247 } 248 else 249 quote = 0; 250 attribute = getAttributeEx (key); 251 if (null != attribute) 252 { attribute.setValue (value); 254 if (0 != quote) 255 attribute.setQuote (quote); 256 } 257 else 258 setAttribute (key, value, quote); 259 } 260 261 265 public void removeAttribute (String key) 266 { 267 Attribute attribute; 268 269 attribute = getAttributeEx (key); 270 if (null != attribute) 271 getAttributesEx ().remove (attribute); 272 } 273 274 281 public void setAttribute (String key, String value, char quote) 282 { 283 setAttribute (new Attribute (key, value, quote)); 284 } 285 286 292 public Attribute getAttributeEx (String name) 293 { 294 Vector attributes; 295 int size; 296 Attribute attribute; 297 String string; 298 Attribute ret; 299 300 ret = null; 301 302 attributes = getAttributesEx (); 303 if (null != attributes) 304 { 305 size = attributes.size (); 306 for (int i = 0; i < size; i++) 307 { 308 attribute = (Attribute)attributes.elementAt (i); 309 string = attribute.getName (); 310 if ((null != string) && name.equalsIgnoreCase (string)) 311 { 312 ret = attribute; 313 i = size; } 315 } 316 } 317 318 return (ret); 319 } 320 321 328 public void setAttributeEx (Attribute attribute) 329 { 330 setAttribute (attribute); 331 } 332 333 339 public void setAttribute (Attribute attribute) 340 { 341 boolean replaced; 342 Vector attributes; 343 int length; 344 String name; 345 Attribute test; 346 String test_name; 347 348 replaced = false; 349 attributes = getAttributesEx (); 350 length = attributes.size (); 351 if (0 < length) 352 { 353 name = attribute.getName (); 354 for (int i = 1; i < attributes.size (); i++) 355 { 356 test = (Attribute)attributes.elementAt (i); 357 test_name = test.getName (); 358 if (null != test_name) 359 if (test_name.equalsIgnoreCase (name)) 360 { 361 attributes.setElementAt (attribute, i); 362 replaced = true; 363 } 364 } 365 } 366 if (!replaced) 367 { 368 if ((0 != length) && !((Attribute)attributes.elementAt (length - 1)).isWhitespace ()) 370 attributes.addElement (new Attribute (" ")); 371 attributes.addElement (attribute); 372 } 373 } 374 375 380 public String getParameter (String name) 381 { 382 return (getAttribute (name)); 383 } 384 385 391 public Vector getAttributesEx () 392 { 393 return (mAttributes); 394 } 395 396 410 public Hashtable getAttributes () 411 { 412 Vector attributes; 413 Attribute attribute; 414 String value; 415 Hashtable ret; 416 417 ret = new SpecialHashtable (); 418 attributes = getAttributesEx (); 419 if (0 < attributes.size ()) 420 { 421 attribute = (Attribute)attributes.elementAt (0); 423 ret.put (SpecialHashtable.TAGNAME, attribute.getName ().toUpperCase (Locale.ENGLISH)); 424 for (int i = 1; i < attributes.size (); i++) 426 { 427 attribute = (Attribute)attributes.elementAt (i); 428 if (!attribute.isWhitespace ()) 429 { 430 value = attribute.getValue (); 431 if (attribute.isEmpty ()) 432 value = SpecialHashtable.NOTHING; 433 if (null == value) 434 value = SpecialHashtable.NULLVALUE; 435 ret.put (attribute.getName ().toUpperCase (Locale.ENGLISH), value); 436 } 437 } 438 } 439 else 440 ret.put (SpecialHashtable.TAGNAME, ""); 441 442 return (ret); 443 } 444 445 458 public String getTagName () 459 { 460 String ret; 461 462 ret = getRawTagName (); 463 if (null != ret) 464 { 465 ret = ret.toUpperCase (Locale.ENGLISH); 466 if (ret.startsWith ("/")) 467 ret = ret.substring (1); 468 if (ret.endsWith ("/")) 469 ret = ret.substring (0, ret.length () - 1); 470 } 471 472 return (ret); 473 } 474 475 480 public String getRawTagName () 481 { 482 Vector attributes; 483 String ret; 484 485 ret = null; 486 487 attributes = getAttributesEx (); 488 if (0 != attributes.size ()) 489 ret = ((Attribute)attributes.elementAt (0)).getName (); 490 491 return (ret); 492 } 493 494 500 public void setTagName (String name) 501 { 502 Attribute attribute; 503 Vector attributes; 504 Attribute zeroth; 505 506 attribute = new Attribute (name, null, (char)0); 507 attributes = getAttributesEx (); 508 if (null == attributes) 509 { 510 attributes = new Vector (); 511 setAttributesEx (attributes); 512 } 513 if (0 == attributes.size ()) 514 attributes.addElement (attribute); 516 else 517 { 518 zeroth = (Attribute)attributes.elementAt (0); 519 if ((null == zeroth.getValue ()) && (0 == zeroth.getQuote ())) 521 attributes.setElementAt (attribute, 0); 522 else 523 attributes.insertElementAt (attribute, 0); 524 } 525 } 526 527 531 public String getText () 532 { 533 String ret; 534 535 ret = toHtml (); 537 ret = ret.substring (1, ret.length () - 1); 538 539 return (ret); 540 } 541 542 548 public void setAttributes (Hashtable attributes) 549 { 550 Vector att; 551 String key; 552 String value; 553 char quote; 554 Attribute attribute; 555 556 att = new Vector (); 557 for (Enumeration e = attributes.keys (); e.hasMoreElements (); ) 558 { 559 key = (String )e.nextElement (); 560 value = (String )attributes.get (key); 561 if (value.startsWith ("'") && value.endsWith ("'") && (2 <= value.length ())) 562 { 563 quote = '\''; 564 value = value.substring (1, value.length () - 1); 565 } 566 else if (value.startsWith ("\"") && value.endsWith ("\"") && (2 <= value.length ())) 567 { 568 quote = '"'; 569 value = value.substring (1, value.length () - 1); 570 } 571 else 572 quote = (char)0; 573 if (key.equals (SpecialHashtable.TAGNAME)) 574 { 575 attribute = new Attribute (value, null, quote); 576 att.insertElementAt (attribute, 0); 577 } 578 else 579 { 580 attribute = new Attribute (" "); 582 att.addElement (attribute); 583 attribute = new Attribute (key, value, quote); 584 att.addElement (attribute); 585 } 586 } 587 this.mAttributes = att; 588 } 589 590 597 public void setAttributesEx (Vector attribs) 598 { 599 mAttributes = attribs; 600 } 601 602 606 public void setTagBegin (int tagBegin) 607 { 608 nodeBegin = tagBegin; 609 } 610 611 615 public int getTagBegin () 616 { 617 return (nodeBegin); 618 } 619 620 624 public void setTagEnd (int tagEnd) 625 { 626 nodeEnd = tagEnd; 627 } 628 629 633 public int getTagEnd () 634 { 635 return (nodeEnd); 636 } 637 638 642 public void setText (String text) 643 { 644 Lexer lexer; 645 TagNode output; 646 647 lexer = new Lexer (text); 648 try 649 { 650 output = (TagNode)lexer.nextNode (); 651 mPage = output.getPage (); 652 nodeBegin = output.getStartPosition (); 653 nodeEnd = output.getEndPosition (); 654 mAttributes = output.getAttributesEx (); 655 } 656 catch (ParserException pe) 657 { 658 throw new IllegalArgumentException (pe.getMessage ()); 659 } 660 } 661 662 667 public String toPlainTextString () 668 { 669 return (""); 670 } 671 672 678 public String toHtml () 679 { 680 int length; 681 int size; 682 Vector attributes; 683 Attribute attribute; 684 StringBuffer ret; 685 686 length = 2; 687 attributes = getAttributesEx (); 688 size = attributes.size (); 689 for (int i = 0; i < size; i++) 690 { 691 attribute = (Attribute)attributes.elementAt (i); 692 length += attribute.getLength (); 693 } 694 ret = new StringBuffer (length); 695 ret.append ("<"); 696 for (int i = 0; i < size; i++) 697 { 698 attribute = (Attribute)attributes.elementAt (i); 699 attribute.toString (ret); 700 } 701 ret.append (">"); 702 703 return (ret.toString ()); 704 } 705 706 710 public String toString () 711 { 712 String text; 713 String type; 714 Cursor start; 715 Cursor end; 716 StringBuffer ret; 717 718 text = getText (); 719 ret = new StringBuffer (20 + text.length ()); 720 if (isEndTag ()) 721 type = "End"; 722 else 723 type = "Tag"; 724 start = new Cursor (getPage (), getStartPosition ()); 725 end = new Cursor (getPage (), getEndPosition ()); 726 ret.append (type); 727 ret.append (" ("); 728 ret.append (start); 729 ret.append (","); 730 ret.append (end); 731 ret.append ("): "); 732 if (80 < ret.length () + text.length ()) 733 { 734 text = text.substring (0, 77 - ret.length ()); 735 ret.append (text); 736 ret.append ("..."); 737 } 738 else 739 ret.append (text); 740 741 return (ret.toString ()); 742 } 743 744 749 public boolean breaksFlow () 750 { 751 return (breakTags.containsKey (getTagName ())); 752 } 753 754 759 public Hashtable getParsed () 760 { 761 return getAttributes (); 762 } 763 764 769 public void accept (NodeVisitor visitor) 770 { 771 if (isEndTag ()) 772 visitor.visitEndTag (this); 773 else 774 visitor.visitTag (this); 775 } 776 777 781 public boolean isEmptyXmlTag () 782 { 783 Vector attributes; 784 int size; 785 Attribute attribute; 786 String name; 787 int length; 788 boolean ret; 789 790 ret = false; 791 792 attributes = getAttributesEx (); 793 size = attributes.size (); 794 if (0 < size) 795 { 796 attribute = (Attribute)attributes.elementAt (size - 1); 797 name = attribute.getName (); 798 if (null != name) 799 { 800 length = name.length (); 801 ret = name.charAt (length - 1) == '/'; 802 } 803 } 804 805 return (ret); 806 } 807 808 814 public void setEmptyXmlTag (boolean emptyXmlTag) 815 { 816 Vector attributes; 817 int size; 818 Attribute attribute; 819 String name; 820 String value; 821 int length; 822 823 attributes = getAttributesEx (); 824 size = attributes.size (); 825 if (0 < size) 826 { 827 attribute = (Attribute)attributes.elementAt (size - 1); 828 name = attribute.getName (); 829 if (null != name) 830 { 831 length = name.length (); 832 value = attribute.getValue (); 833 if (null == value) 834 if (name.charAt (length - 1) == '/') 835 { 836 if (!emptyXmlTag) 838 if (1 == length) 839 attributes.removeElementAt (size - 1); 840 else 841 { 842 name = name.substring (0, length - 1); 846 attribute = new Attribute (name, null); 847 attributes.removeElementAt (size - 1); 848 attributes.addElement (attribute); 849 } 850 } 851 else 852 { 853 if (emptyXmlTag) 855 { 856 attribute = new Attribute (" "); 857 attributes.addElement (attribute); 858 attribute = new Attribute ("/", null); 859 attributes.addElement (attribute); 860 } 861 } 862 else 863 { 864 if (emptyXmlTag) 866 { 867 attribute = new Attribute (" "); 868 attributes.addElement (attribute); 869 attribute = new Attribute ("/", null); 870 attributes.addElement (attribute); 871 } 872 } 873 } 874 else 875 { 876 if (emptyXmlTag) 878 { 879 attribute = new Attribute ("/", null); 880 attributes.addElement (attribute); 881 } 882 } 883 } 884 else 885 if (emptyXmlTag) 887 { 888 attribute = new Attribute ("/", null); 889 attributes.addElement (attribute); 890 } 891 } 892 893 897 public boolean isEndTag () 898 { 899 String raw; 900 901 raw = getRawTagName (); 902 903 return ((null == raw) ? false : ((0 != raw.length ()) && ('/' == raw.charAt (0)))); 904 } 905 906 910 public int getStartingLineNumber () 911 { 912 return (getPage ().row (getStartPosition ())); 913 } 914 915 919 public int getEndingLineNumber () 920 { 921 return (getPage ().row (getEndPosition ())); 922 } 923 924 929 public String [] getIds () 930 { 931 return (NONE); 932 } 933 934 942 public String [] getEnders () 943 { 944 return (NONE); 945 } 946 947 955 public String [] getEndTagEnders () 956 { 957 return (NONE); 958 } 959 960 964 public Scanner getThisScanner () 965 { 966 return (mScanner); 967 } 968 969 973 public void setThisScanner (Scanner scanner) 974 { 975 mScanner = scanner; 976 } 977 978 983 public Tag getEndTag () 984 { 985 return (null); 986 } 987 988 993 public void setEndTag (Tag end) 994 { 995 } 996 } 997 | Popular Tags |