1 21 package au.id.jericho.lib.html; 22 23 import java.util.*; 24 import java.io.*; 25 26 82 public final class StartTag extends Tag { 83 private final Attributes attributes; 84 final StartTagType startTagType; 85 86 96 StartTag(final Source source, final int begin, final int end, final StartTagType startTagType, final String name, final Attributes attributes) { 97 super(source,begin,end,name); 98 this.attributes=attributes; 99 this.startTagType=startTagType; 100 } 101 102 146 public Element getElement() { 147 if (element==Element.NOT_CACHED) { 148 final EndTag endTag=findEndTagInternal(); 149 element=new Element(source,this,endTag); 150 if (endTag!=null) { 151 if (endTag.element!=Element.NOT_CACHED) 152 if (source.isLoggingEnabled()) source.log(source.getRowColumnVector(endTag.begin).appendTo(new StringBuffer (200).append("End tag ").append(endTag).append(" at ")).append(" terminates more than one element").toString()); endTag.element=element; 154 } 155 } 156 return element; 157 } 158 159 177 public boolean isEmptyElementTag() { 178 return startTagType==StartTagType.NORMAL && source.charAt(end-2)=='/'; 179 } 180 181 188 public StartTagType getStartTagType() { 189 return startTagType; 190 } 191 192 public TagType getTagType() { 194 return startTagType; 195 } 196 197 209 public Attributes getAttributes() { 210 return attributes; 211 } 212 213 226 public String getAttributeValue(final String attributeName) { 227 return attributes==null ? null : attributes.getValue(attributeName); 228 } 229 230 244 public Attributes parseAttributes() { 245 return parseAttributes(Attributes.getDefaultMaxErrorCount()); 246 } 247 248 259 public Attributes parseAttributes(final int maxErrorCount) { 260 if (attributes!=null) return attributes; 261 final int maxEnd=end-startTagType.getClosingDelimiter().length(); 262 int attributesBegin=begin+1+name.length(); 263 while (!isXMLNameStartChar(source.charAt(attributesBegin))) { 265 attributesBegin++; 266 if (attributesBegin==maxEnd) return null; 267 } 268 return Attributes.construct(source,begin,attributesBegin,maxEnd,startTagType,name,maxErrorCount); 269 } 270 271 280 public Segment getTagContent() { 281 return new Segment(source,begin+1+name.length(),end-startTagType.getClosingDelimiter().length()); 282 } 283 284 291 public FormControl getFormControl() { 292 return getElement().getFormControl(); 293 } 294 295 313 public boolean isEndTagForbidden() { 314 if (getStartTagType()!=StartTagType.NORMAL) 315 return getStartTagType().getCorrespondingEndTagType()==null; 316 if (HTMLElements.getEndTagForbiddenElementNames().contains(name)) return true; 317 if (HTMLElements.getElementNames().contains(name)) return false; 318 return isEmptyElementTag(); 319 } 320 321 336 public boolean isEndTagRequired() { 337 if (getStartTagType()!=StartTagType.NORMAL) 338 return getStartTagType().getCorrespondingEndTagType()!=null; 339 if (HTMLElements.getEndTagRequiredElementNames().contains(name)) return true; 340 if (HTMLElements.getElementNames().contains(name)) return false; 341 return !isEmptyElementTag(); 342 } 343 344 public boolean isUnregistered() { 346 return startTagType==StartTagType.UNREGISTERED; 347 } 348 349 358 public String tidy() { 359 return tidy(false); 360 } 361 362 393 public String tidy(boolean toXHTML) { 394 if (attributes==null) return toString(); 395 final StringBuffer sb=new StringBuffer (); 396 sb.append('<'); 397 if (toXHTML && startTagType==StartTagType.NORMAL) { 398 sb.append(name); 399 } else { 400 int i=begin+startTagType.startDelimiterPrefix.length(); 401 final int nameSegmentEnd=i+name.length(); 402 while (i<nameSegmentEnd) { 403 sb.append(source.charAt(i)); 404 i++; 405 } 406 } 407 attributes.appendTidy(sb,findNextTag()); 408 if (startTagType==StartTagType.NORMAL && getElement().getEndTag()==null && !HTMLElements.getEndTagOptionalElementNames().contains(name)) sb.append(" /"); 409 sb.append(startTagType.getClosingDelimiter()); 410 return sb.toString(); 411 } 412 413 439 public static String generateHTML(final String tagName, final Map attributesMap, final boolean emptyElementTag) { 440 final StringWriter stringWriter=new StringWriter(); 441 final StringBuffer sb=stringWriter.getBuffer(); 442 sb.append('<').append(tagName); 443 try {Attributes.appendHTML(stringWriter,attributesMap);} catch (IOException ex) {} if (emptyElementTag) 445 sb.append(" />"); 446 else 447 sb.append('>'); 448 return sb.toString(); 449 } 450 451 public String getDebugInfo() { 452 final StringBuffer sb=new StringBuffer (); 453 appendStartTagDebugInfo(sb); 454 sb.append(super.getDebugInfo()); 455 return sb.toString(); 456 } 457 458 StringBuffer appendStartTagDebugInfo(final StringBuffer sb) { 459 sb.append('"').append(name).append("\" "); 460 if (startTagType!=StartTagType.NORMAL) sb.append('(').append(startTagType.getDescription()).append(") "); 461 return sb; 462 } 463 464 472 public String regenerateHTML() { 473 return tidy(); 474 } 475 476 488 public boolean isEndTagOptional() { 489 return getStartTagType()==StartTagType.NORMAL && HTMLElements.getEndTagOptionalElementNames().contains(name); 490 } 491 492 505 public EndTag findEndTag() { 506 return getElement().getEndTag(); 507 } 508 509 519 public FormControlType getFormControlType() { 520 final FormControl formControl=getFormControl(); 521 if (formControl==null) return null; 522 return formControl.getFormControlType(); 523 } 524 525 536 public Segment getFollowingTextSegment() { 537 int endData=source.getParseText().indexOf('<',end); 538 if (endData==-1) endData=source.end; 539 return new Segment(source,end,endData); 540 } 541 542 550 public boolean isServerTag() { 551 return getTagType().isServerTag(); 552 } 553 554 562 public boolean isComment() { 563 return startTagType==StartTagType.COMMENT; 564 } 565 566 574 public boolean isProcessingInstruction() { 575 return charAt(1)=='?'; 576 } 577 578 586 public boolean isXMLDeclaration() { 587 return startTagType==StartTagType.XML_DECLARATION; 588 } 589 590 598 public boolean isDocTypeDeclaration() { 599 return startTagType==StartTagType.DOCTYPE_DECLARATION; 600 } 601 602 610 public boolean isCommonServerTag() { 611 return startTagType==StartTagType.SERVER_COMMON; 612 } 613 614 622 public boolean isPHPTag() { 623 return startTagType==PHPTagTypes.PHP_STANDARD; 624 } 625 626 634 public boolean isMasonTag() { 635 return startTagType==StartTagType.SERVER_COMMON || startTagType==MasonTagTypes.MASON_NAMED_BLOCK || startTagType==MasonTagTypes.MASON_COMPONENT_CALL || startTagType==MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT; 636 } 637 638 646 public boolean isMasonNamedBlock() { 647 return startTagType==MasonTagTypes.MASON_NAMED_BLOCK; 648 } 649 650 658 public boolean isMasonComponentCall() { 659 return startTagType==MasonTagTypes.MASON_COMPONENT_CALL; 660 } 661 662 670 public boolean isMasonComponentCalledWithContent() { 671 return startTagType==MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT; 672 } 673 674 private EndTag findEndTagInternal() { 675 boolean checkForEmptyElementTag=true; 676 if (startTagType==StartTagType.NORMAL) { 678 final HTMLElementTerminatingTagNameSets terminatingTagNameSets=HTMLElements.getTerminatingTagNameSets(name); 679 if (terminatingTagNameSets!=null) return findOptionalEndTag(terminatingTagNameSets); 681 if (HTMLElements.getEndTagForbiddenElementNames().contains(name)) return null; 683 checkForEmptyElementTag=!HTMLElements.getEndTagRequiredElementNames().contains(name); if (checkForEmptyElementTag && isEmptyElementTag()) return null; 686 } else if (startTagType.getCorrespondingEndTagType()==null) { 687 return null; 688 } 689 final EndTag nextEndTag=source.findNextEndTag(end,name,startTagType.getCorrespondingEndTagType()); 693 if (nextEndTag!=null) { 694 if (HTMLElements.END_TAG_REQUIRED_NESTING_FORBIDDEN_SET.contains(name)) { 695 final StartTag nextStartTag=source.findNextStartTag(end,name); 696 if (nextStartTag==null || nextStartTag.begin>nextEndTag.begin) return nextEndTag; 697 if (source.isLoggingEnabled()) source.log(source.getRowColumnVector(begin).appendTo(new StringBuffer (200).append("StartTag at ")).append(" missing required end tag - invalid nested start tag encountered before end tag").toString()); 698 return new EndTag(source,nextStartTag.begin,nextStartTag.begin,EndTagType.NORMAL,name); 701 } 702 final Segment[] findResult=findEndTag(nextEndTag,checkForEmptyElementTag); 703 if (findResult!=null) return (EndTag)findResult[0]; 704 } 705 if (source.isLoggingEnabled()) source.log(source.getRowColumnVector(begin).appendTo(new StringBuffer (200).append("StartTag at ")).append(" missing required end tag").toString()); 706 return null; 707 } 708 709 private EndTag findOptionalEndTag(final HTMLElementTerminatingTagNameSets terminatingTagNameSets) { 710 int pos=end; 711 while (pos<source.end) { 712 final Tag tag=Tag.findPreviousOrNextTag(source,pos,false); 713 if (tag==null) break; 714 Set terminatingTagNameSet; 715 if (tag instanceof EndTag) { 716 if (tag.name==name) return (EndTag)tag; 717 terminatingTagNameSet=terminatingTagNameSets.TerminatingEndTagNameSet; 718 } else { 719 terminatingTagNameSet=terminatingTagNameSets.NonterminatingElementNameSet; 720 if (terminatingTagNameSet!=null && terminatingTagNameSet.contains(tag.name)) { 721 Element nonterminatingElement=((StartTag)tag).getElement(); 722 pos=nonterminatingElement.end; 723 continue; 724 } 725 terminatingTagNameSet=terminatingTagNameSets.TerminatingStartTagNameSet; 726 } 727 if (terminatingTagNameSet!=null && terminatingTagNameSet.contains(tag.name)) return new EndTag(source,tag.begin,tag.begin,EndTagType.NORMAL,name); 728 pos=tag.begin+1; 729 } 730 return new EndTag(source,source.end,source.end,EndTagType.NORMAL,name); 732 } 733 734 static StartTag findPreviousOrNext(final Source source, final int pos, final String searchName, final boolean isXMLTagName, final boolean previous) { 735 if (searchName==null) return findPreviousOrNext(source,pos,previous); 737 if (searchName.length()==0) throw new IllegalArgumentException ("searchName argument must not be zero length"); 738 final char[] startDelimiterCharArray=new char[searchName.length()+1]; 739 startDelimiterCharArray[0]='<'; 740 for (int i=1; i<startDelimiterCharArray.length; i++) startDelimiterCharArray[i]=searchName.charAt(i-1); 741 if (startDelimiterCharArray[1]=='/') throw new IllegalArgumentException ("searchName argument \""+searchName+"\" must not start with '/'"); 742 try { 743 final ParseText parseText=source.getParseText(); 744 int begin=pos; 745 do { 746 begin=previous?parseText.lastIndexOf(startDelimiterCharArray,begin):parseText.indexOf(startDelimiterCharArray,begin); 747 if (begin==-1) return null; 748 final StartTag startTag=(StartTag)Tag.getTagAt(source,begin); 749 if (startTag==null || (isXMLTagName && startTag.isUnregistered())) continue; 750 if (startTag.startTagType.isNameAfterPrefixRequired() && startTag.name.length()>searchName.length()) { 751 char lastSearchNameChar=searchName.charAt(searchName.length()-1); 761 if (lastSearchNameChar!=':' && isXMLNameChar(lastSearchNameChar)) continue; 762 } 763 return startTag; 764 } while (previous ? (begin-=2)>=0 : (begin+=1)<source.end); 765 } catch (IndexOutOfBoundsException ex) { 766 } 769 return null; 770 } 771 772 static StartTag findPreviousOrNext(final Source source, int pos, final boolean previous) { 773 while (true) { 774 final Tag tag=Tag.findPreviousOrNextTag(source,pos,previous); 775 if (tag==null) return null; 776 if (tag instanceof StartTag) return (StartTag)tag; 777 pos+=previous?-1:1; 778 } 779 } 780 781 static StartTag findNext(final Source source, int pos, final String attributeName, final String value, final boolean valueCaseSensitive) { 782 if (value==null) throw new IllegalArgumentException (); 783 final char[] valueCharArray=value.toLowerCase().toCharArray(); 784 final ParseText parseText=source.getParseText(); 785 while (pos<source.end) { 786 pos=parseText.indexOf(valueCharArray,pos); 787 if (pos==-1) return null; 788 final Tag tag=source.findEnclosingTag(pos); 789 if (tag==null || !(tag instanceof StartTag)) { 790 pos++; 791 continue; 792 } 793 final StartTag startTag=(StartTag)tag; 794 if (startTag.getAttributes()!=null) { 795 final String attributeValue=startTag.getAttributes().getValue(attributeName); 796 if (attributeValue!=null) { 797 if (value.equals(attributeValue)) return startTag; 798 if (value.equalsIgnoreCase(attributeValue)) { 799 if (!valueCaseSensitive) return startTag; 800 if (source.isLoggingEnabled()) source.log(source.getRowColumnVector(pos).appendTo(new StringBuffer (200)).append(": StartTag with attribute ").append(attributeName).append("=\"").append(attributeValue).append("\" ignored during search because its case does not match search value \"").append(value).append('"').toString()); 801 } 802 } 803 } 804 pos=startTag.end+5; } 806 return null; 807 } 808 809 private Segment[] findEndTag(final EndTag nextEndTag, final boolean checkForEmptyElementTag) { 810 StartTag nextStartTag=source.findNextStartTag(end,name); 811 if (checkForEmptyElementTag) { 812 while (nextStartTag!=null && nextStartTag.isEmptyElementTag()) 813 nextStartTag=source.findNextStartTag(nextStartTag.end,name); 814 } 815 return findEndTag(end,nextStartTag,nextEndTag,checkForEmptyElementTag); 816 } 817 818 private Segment[] findEndTag(final int afterPos, StartTag nextStartTag, EndTag nextEndTag, final boolean checkForEmptyElementTag) { 819 if (nextEndTag==null) return null; final Segment[] returnArray={nextEndTag, nextStartTag}; 824 if (nextStartTag==null || nextStartTag.begin>nextEndTag.begin) return returnArray; final Segment[] findResult=nextStartTag.findEndTag(nextEndTag,checkForEmptyElementTag); if (findResult==null) return null; final EndTag nextStartTagsEndTag=(EndTag)findResult[0]; 828 nextStartTag=(StartTag)findResult[1]; 829 nextEndTag=source.findNextEndTag(nextStartTagsEndTag.end, name); return findEndTag(nextStartTagsEndTag.end,nextStartTag,nextEndTag,checkForEmptyElementTag); } 832 } 833 | Popular Tags |