KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > au > id > jericho > lib > html > StartTag


1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2
// Version 2.2
3
// Copyright (C) 2006 Martin Jericho
4
// http://sourceforge.net/projects/jerichohtml/
5
//
6
// This library is free software; you can redistribute it and/or
7
// modify it under the terms of the GNU Lesser General Public
8
// License as published by the Free Software Foundation; either
9
// version 2.1 of the License, or (at your option) any later version.
10
// http://www.gnu.org/copyleft/lesser.html
11
//
12
// This library is distributed in the hope that it will be useful,
13
// but WITHOUT ANY WARRANTY; without even the implied warranty of
14
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
// Lesser General Public License for more details.
16
//
17
// You should have received a copy of the GNU Lesser General Public
18
// License along with this library; if not, write to the Free Software
19
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20

21 package au.id.jericho.lib.html;
22
23 import java.util.*;
24 import java.io.*;
25
26 /**
27  * Represents the <a target="_blank" HREF="http://www.w3.org/TR/html401/intro/sgmltut.html#didx-element-2">start tag</a> of an
28  * {@linkplain Element element} in a specific {@linkplain Source source} document.
29  * <p>
30  * A start tag always has a {@linkplain #getTagType() type} that is a subclass of {@link StartTagType}, meaning that any tag
31  * that does <b>not</b> start with the characters '<code>&lt;/</code>' is categorised as a start tag.
32  * <p>
33  * This includes many tags which stand alone, without a {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag},
34  * and would not intuitively be categorised as a "start tag".
35  * For example, an HTML {@linkplain StartTagType#COMMENT comment} is represented as a single start tag that spans the whole comment,
36  * and does not have an end tag at all.
37  * <p>
38  * See the <a HREF="StartTagType.html#field_summary">static fields</a> defined in the {@link StartTagType} class for a list of the
39  * <a HREF="TagType.html#Standard">standard</a> start tag types.
40  * <p>
41  * <code>StartTag</code> instances are obtained using one of the following methods:
42  * <ul>
43  * <li>{@link Element#getStartTag()}
44  * <li>{@link Tag#findNextTag()}
45  * <li>{@link Tag#findPreviousTag()}
46  * <li>{@link Source#findPreviousStartTag(int pos)}
47  * <li>{@link Source#findPreviousStartTag(int pos, String name)}
48  * <li>{@link Source#findPreviousTag(int pos)}
49  * <li>{@link Source#findPreviousTag(int pos, TagType)}
50  * <li>{@link Source#findNextStartTag(int pos)}
51  * <li>{@link Source#findNextStartTag(int pos, String name)}
52  * <li>{@link Source#findNextStartTag(int pos, String attributeName, String value, boolean valueCaseSensitive)}
53  * <li>{@link Source#findNextTag(int pos)}
54  * <li>{@link Source#findNextTag(int pos, TagType)}
55  * <li>{@link Source#findEnclosingTag(int pos)}
56  * <li>{@link Source#findEnclosingTag(int pos, TagType)}
57  * <li>{@link Source#getTagAt(int pos)}
58  * <li>{@link Segment#findAllStartTags()}
59  * <li>{@link Segment#findAllStartTags(String name)}
60  * <li>{@link Segment#findAllStartTags(String attributeName, String value, boolean valueCaseSensitive)}
61  * <li>{@link Segment#findAllTags()}
62  * <li>{@link Segment#findAllTags(TagType)}
63  * </ul>
64  * <p>
65  * The methods above which accept a <code>name</code> parameter are categorised as <a HREF="Tag.html#NamedSearch">named search</a> methods.
66  * <p>
67  * In such methods dealing with start tags, specifying an argument to the <code>name</code> parameter that ends in a
68  * colon (<code>:</code>) searches for all start tags in the specified XML namespace.
69  * <p>
70  * The constants defined in the {@link HTMLElementName} interface can be used directly as arguments to these <code>name</code> parameters.
71  * For example, <code>source.findAllStartTags(</code>{@link HTMLElementName#A}<code>)</code> is equivalent to
72  * <code>source.findAllStartTags("a")</code>, and finds all hyperlink start tags.
73  * <p>
74  * The {@link Tag} superclass defines a method called {@link Tag#getName() getName()} to get the name of this start tag.
75  * <p>
76  * See also the XML 1.0 specification for <a target="_blank" HREF="http://www.w3.org/TR/REC-xml#dt-stag">start tags</a>.
77  *
78  * @see Tag
79  * @see Element
80  * @see EndTag
81  */

82 public final class StartTag extends Tag {
83     private final Attributes attributes;
84     final StartTagType startTagType;
85
86     /**
87      * Constructs a new <code>StartTag</code>.
88      *
89      * @param source the {@link Source} document.
90      * @param begin the character position in the source document where this tag {@linkplain Segment#getBegin() begins}.
91      * @param end the character position in the source document where this tag {@linkplain Segment#getEnd() ends}.
92      * @param startTagType the {@linkplain #getStartTagType() type} of the start tag.
93      * @param name the {@linkplain Tag#getName() name} of the tag.
94      * @param attributes the {@linkplain #getAttributes() attributes} of the tag.
95      */

96     StartTag(final Source source, final int begin, final int end, final StartTagType startTagType, final String JavaDoc name, final Attributes attributes) {
97         super(source,begin,end,name);
98         this.attributes=attributes;
99         this.startTagType=startTagType;
100     }
101
102     /**
103      * Returns the {@linkplain Element element} that is started by this start tag.
104      * Guaranteed not <code>null</code>.
105      * <h4>Example 1: Elements for which the {@linkplain HTMLElements#getEndTagRequiredElementNames() end tag is required}</h4>
106      * <pre>
107      * 1. &lt;div&gt;
108      * 2. &lt;div&gt;
109      * 3. &lt;div&gt;
110      * 4. &lt;div&gt;This is line 4&lt;/div&gt;
111      * 5. &lt;/div&gt;
112      * 6. &lt;div&gt;This is line 6&lt;/div&gt;
113      * 7. &lt;/div&gt;</pre>
114      * <ul>
115      * <li>The start tag on line 1 returns an empty element spanning only the start tag.
116      * This is because the end tag of a <code>&lt;div&gt;</code> element is required,
117      * making the sample code invalid as all the end tags are matched with other start tags.
118      * <li>The start tag on line 2 returns an element spanning to the end of line 7.
119      * <li>The start tag on line 3 returns an element spanning to the end of line 5.
120      * <li>The start tag on line 4 returns an element spanning to the end of line 4.
121      * <li>The start tag on line 6 returns an element spanning to the end of line 6.
122      * </ul>
123      * <h4>Example 2: Elements for which the {@linkplain HTMLElements#getEndTagOptionalElementNames() end tag is optional}</h4>
124      * <pre>
125      * 1. &lt;ul&gt;
126      * 2. &lt;li&gt;item 1
127      * 3. &lt;li&gt;item 2
128      * 4. &lt;ul&gt;
129      * 5. &lt;li&gt;subitem 1&lt;/li&gt;
130      * 6. &lt;li&gt;subitem 2
131      * 7. &lt;/ul&gt;
132      * 8. &lt;li&gt;item 3&lt;/li&gt;
133      * 9. &lt;/ul&gt;</pre>
134      * <ul>
135      * <li>The start tag on line 1 returns an element spanning to the end of line 9.
136      * <li>The start tag on line 2 returns an element spanning to the start of the <code>&lt;li&gt;</code> start tag on line 3.
137      * <li>The start tag on line 3 returns an element spanning to the start of the <code>&lt;li&gt;</code> start tag on line 8.
138      * <li>The start tag on line 4 returns an element spanning to the end of line 7.
139      * <li>The start tag on line 5 returns an element spanning to the end of line 5.
140      * <li>The start tag on line 6 returns an element spanning to the start of the <code>&lt;/ul&gt;</code> end tag on line 7.
141      * <li>The start tag on line 8 returns an element spanning to the end of line 8.
142      * </ul>
143      *
144      * @return the {@linkplain Element element} that is started by this start tag.
145      */

146     public Element getElement() {
147         if (element==Element.NOT_CACHED) {
148             final EndTag endTag=findEndTagInternal();
149             element=new Element(source,this,endTag);
150             if (endTag!=null) {
151                 if (endTag.element!=Element.NOT_CACHED)
152                     if (source.isLoggingEnabled()) source.log(source.getRowColumnVector(endTag.begin).appendTo(new StringBuffer JavaDoc(200).append("End tag ").append(endTag).append(" at ")).append(" terminates more than one element").toString()); // presumably impossible, but log it just in case
153
endTag.element=element;
154             }
155         }
156         return element;
157     }
158
159     /**
160      * Indicates whether this start tag is syntactically an <a target="_blank" HREF="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>.
161      * <p>
162      * This is signified by the characters "/&gt;" at the end of the start tag.
163      * <p>
164      * Only a {@linkplain StartTagType#NORMAL normal} start tag can be an empty-element tag.
165      * <p>
166      * This property simply reports whether the syntax of the start tag is consistent with that of an empty-element tag,
167      * it does not guarantee that this start tag's {@linkplain #getElement() element} is actually {@linkplain Element#isEmpty() empty}.
168      * <p>
169      * This possible discrepancy reflects the way major browsers interpret illegal empty element tags used in
170      * <a HREF="HTMLElements.html#HTMLElement">HTML elements</a>, and is explained further in the documentation of the
171      * {@link Element#isEmptyElementTag()} property.
172      * <p>
173      * Compare this property with the {@link Element#isEmptyElementTag()} property, which does check that the element is actually empty.
174      *
175      * @return <code>true</code> if this start tag is syntactically an <a target="_blank" HREF="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>, otherwise <code>false</code>.
176      */

177     public boolean isEmptyElementTag() {
178         return startTagType==StartTagType.NORMAL && source.charAt(end-2)=='/';
179     }
180
181     /**
182      * Returns the {@linkplain StartTagType type} of this start tag.
183      * <p>
184      * This is equivalent to <code>(StartTagType)</code>{@link #getTagType()}.
185      *
186      * @return the {@linkplain StartTagType type} of this start tag.
187      */

188     public StartTagType getStartTagType() {
189         return startTagType;
190     }
191
192     // Documentation inherited from Tag
193
public TagType getTagType() {
194         return startTagType;
195     }
196
197     /**
198      * Returns the attributes specified in this start tag.
199      * <p>
200      * Return value is not <code>null</code> if and only if
201      * {@link #getStartTagType()}<code>.</code>{@link StartTagType#hasAttributes() hasAttributes()}<code>==true</code>.
202      * <p>
203      * To force the parsing of attributes in other start tag types, use the {@link #parseAttributes()} method instead.
204      *
205      * @return the attributes specified in this start tag, or <code>null</code> if the {@linkplain #getStartTagType() type} of this start tag does not {@linkplain StartTagType#hasAttributes() have attributes}.
206      * @see #parseAttributes()
207      * @see Source#parseAttributes(int pos, int maxEnd)
208      */

209     public Attributes getAttributes() {
210         return attributes;
211     }
212
213     /**
214      * Returns the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name (case insensitive).
215      * <p>
216      * Returns <code>null</code> if this start tag does not {@linkplain StartTagType#hasAttributes() have attributes},
217      * no attribute with the specified name exists or the attribute {@linkplain Attribute#hasValue() has no value}.
218      * <p>
219      * This is equivalent to {@link #getAttributes()}<code>.</code>{@link Attributes#getValue(String) getValue(attributeName)},
220      * except that it returns <code>null</code> if this start tag does not have attributes instead of throwing a
221      * <code>NullPointerException</code>.
222      *
223      * @param attributeName the name of the attribute to get.
224      * @return the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name, or <code>null</code> if the attribute does not exist or {@linkplain Attribute#hasValue() has no value}.
225      */

226     public String JavaDoc getAttributeValue(final String JavaDoc attributeName) {
227         return attributes==null ? null : attributes.getValue(attributeName);
228     }
229
230     /**
231      * Parses the attributes specified in this start tag, regardless of the type of start tag.
232      * This method is only required in the unusual situation where attributes exist in a start tag whose
233      * {@linkplain #getStartTagType() type} doesn't {@linkplain StartTagType#hasAttributes() have attributes}.
234      * <p>
235      * This method returns the cached attributes from the {@link StartTag#getAttributes()} method
236      * if its value is not <code>null</code>, otherwise the source is physically parsed with each call to this method.
237      * <p>
238      * This is equivalent to {@link #parseAttributes(int) parseAttributes}<code>(</code>{@link Attributes#getDefaultMaxErrorCount()}<code>)}</code>.
239      *
240      * @return the attributes specified in this start tag, or <code>null</code> if too many errors occur while parsing.
241      * @see #getAttributes()
242      * @see Source#parseAttributes(int pos, int maxEnd)
243      */

244     public Attributes parseAttributes() {
245         return parseAttributes(Attributes.getDefaultMaxErrorCount());
246     }
247
248     /**
249      * Parses the attributes specified in this start tag, regardless of the type of start tag.
250      * This method is only required in the unusual situation where attributes exist in a start tag whose
251      * {@linkplain #getStartTagType() type} doesn't {@linkplain StartTagType#hasAttributes() have attributes}.
252      * <p>
253      * See the documentation of the {@link #parseAttributes()} method for more information.
254      *
255      * @param maxErrorCount the maximum number of minor errors allowed while parsing
256      * @return the attributes specified in this start tag, or <code>null</code> if too many errors occur while parsing.
257      * @see #getAttributes()
258      */

259     public Attributes parseAttributes(final int maxErrorCount) {
260         if (attributes!=null) return attributes;
261         final int maxEnd=end-startTagType.getClosingDelimiter().length();
262         int attributesBegin=begin+1+name.length();
263         // skip any non-name characters directly after the name (which are quite common)
264
while (!isXMLNameStartChar(source.charAt(attributesBegin))) {
265             attributesBegin++;
266             if (attributesBegin==maxEnd) return null;
267         }
268         return Attributes.construct(source,begin,attributesBegin,maxEnd,startTagType,name,maxErrorCount);
269     }
270
271     /**
272      * Returns the segment between the end of the tag's {@linkplain #getName() name} and the start of its <a HREF="#EndDelimiter">end delimiter</a>.
273      * <p>
274      * This method is normally only of use for start tags whose content is something other than {@linkplain #getAttributes() attributes}.
275      * <p>
276      * A new {@link Segment} object is created with each call to this method.
277      *
278      * @return the segment between the end of the tag's {@linkplain #getName() name} and the start of the <a HREF="#EndDelimiter">end delimiter</a>.
279      */

280     public Segment getTagContent() {
281         return new Segment(source,begin+1+name.length(),end-startTagType.getClosingDelimiter().length());
282     }
283
284     /**
285      * Returns the {@link FormControl} defined by this start tag.
286      * <p>
287      * This is equivalent to {@link #getElement()}<code>.</code>{@link Element#getFormControl() getFormControl()}.
288      *
289      * @return the {@link FormControl} defined by this start tag, or <code>null</code> if it is not a <a target="_blank" HREF="http://www.w3.org/TR/html401/interact/forms.html#form-controls">control</a>.
290      */

291     public FormControl getFormControl() {
292         return getElement().getFormControl();
293     }
294
295     /**
296      * Indicates whether a matching end tag is forbidden.
297      * <p>
298      * This property returns <code>true</code> if one of the following conditions is met:
299      * <ul>
300      * <li>The {@linkplain #getStartTagType() type} of this start tag does not specify a
301      * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}.
302      * <li>The {@linkplain #getName() name} of this start tag indicates it is the start of an
303      * <a HREF="Element.html#HTML">HTML element</a> whose {@linkplain HTMLElements#getEndTagForbiddenElementNames() end tag is forbidden}.
304      * <li>This start tag is syntactically an {@linkplain #isEmptyElementTag() empty-element tag} and its
305      * {@linkplain #getName() name} indicates it is the start of a <a HREF="HTMLElements.html#NonHTMLElement">non-HTML element</a>.
306      * </ul>
307      * <p>
308      * If this property returns <code>true</code> then this start tag's {@linkplain #getElement() element} will always be a
309      * <a HREF="Element.html#SingleTag">single tag element</a>.
310      *
311      * @return <code>true</code> if a matching end tag is forbidden, otherwise <code>false</code>.
312      */

313     public boolean isEndTagForbidden() {
314         if (getStartTagType()!=StartTagType.NORMAL)
315             return getStartTagType().getCorrespondingEndTagType()==null;
316         if (HTMLElements.getEndTagForbiddenElementNames().contains(name)) return true;
317         if (HTMLElements.getElementNames().contains(name)) return false;
318         return isEmptyElementTag();
319     }
320
321     /**
322      * Indicates whether a matching end tag is required.
323      * <p>
324      * This property returns <code>true</code> if one of the following conditions is met:
325      * <ul>
326      * <li>The {@linkplain #getStartTagType() type} of this start tag is NOT {@link StartTagType#NORMAL}, but specifies a
327      * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}.
328      * <li>The {@linkplain #getName() name} of this start tag indicates it is the start of an
329      * <a HREF="Element.html#HTML">HTML element</a> whose {@linkplain HTMLElements#getEndTagRequiredElementNames() end tag is required}.
330      * <li>This start tag is syntactically NOT an {@linkplain #isEmptyElementTag() empty-element tag} and its
331      * {@linkplain #getName() name} indicates it is the start of a <a HREF="HTMLElements.html#NonHTMLElement">non-HTML element</a>.
332      * </ul>
333      *
334      * @return <code>true</code> if a matching end tag is required, otherwise <code>false</code>.
335      */

336     public boolean isEndTagRequired() {
337         if (getStartTagType()!=StartTagType.NORMAL)
338             return getStartTagType().getCorrespondingEndTagType()!=null;
339         if (HTMLElements.getEndTagRequiredElementNames().contains(name)) return true;
340         if (HTMLElements.getElementNames().contains(name)) return false;
341         return !isEmptyElementTag();
342     }
343
344     // Documentation inherited from Tag
345
public boolean isUnregistered() {
346         return startTagType==StartTagType.UNREGISTERED;
347     }
348
349     /**
350      * Returns an XML representation of this start tag.
351      * <p>
352      * This is equivalent to {@link #tidy(boolean) tidy(false)}, thereby keeping the {@linkplain #getName() name} of the tag in its original case.
353      * <p>
354      * See the documentation of the {@link #tidy(boolean toXHTML)} method for more details.
355      *
356      * @return an XML representation of this start tag, or the {@linkplain Segment#toString() source text} if it is of a {@linkplain #getStartTagType() type} that does not {@linkplain StartTagType#hasAttributes() have attributes}.
357      */

358     public String JavaDoc tidy() {
359         return tidy(false);
360     }
361
362     /**
363      * Returns an XML or XHTML representation of this start tag.
364      * <p>
365      * The tidying of the tag is carried out as follows:
366      * <ul>
367      * <li>if this start tag is of a {@linkplain #getStartTagType() type} that does not {@linkplain StartTagType#hasAttributes() have attributes},
368      * then the original {@linkplain Segment#toString() source text} is returned.
369      * <li>name converted to lower case if the <code>toXHTML</code> argument is <code>true</code> and this is a {@linkplain StartTagType#NORMAL normal} start tag
370      * <li>attributes separated by a single space
371      * <li>attribute names in original case
372      * <li>attribute values are enclosed in double quotes and {@linkplain CharacterReference#reencode(CharSequence) re-encoded}
373      * <li>if this start tag forms an <a HREF="Element.html#HTML">HTML element</a> that has no {@linkplain Element#getEndTag() end tag},
374      * a slash is inserted before the closing angle bracket, separated from the {@linkplain #getName() name} or last attribute by a single space.
375      * <li>if an attribute value contains a {@linkplain TagType#isServerTag() server tag} it is inserted verbatim instead of being
376      * {@linkplain CharacterReference#encode(CharSequence) encoded}.
377      * </ul>
378      * <p>
379      * The <code>toXHTML</code> parameter determines only whether the name is converted to lower case for {@linkplain StartTagType#NORMAL normal} tags.
380      * In all other respects the generated tag is already valid XHTML.
381      * <h4>Example:</h4>
382      * The following source text:
383      * <p>
384      * <code>&lt;INPUT name=Company value='G&amp;uuml;nter O&amp#39;Reilly &amp;amp Associ&eacute;s'&gt;</code>
385      * <p>
386      * produces the following regenerated HTML:
387      * <p>
388      * <code>&lt;input name="Company" value="G&amp;uuml;nter O'Reilly &amp;amp; Associ&amp;eacute;s" /&gt;</code>
389      *
390      * @param toXHTML specifies whether the output is XHTML.
391      * @return an XML or XHTML representation of this start tag, or the {@linkplain Segment#toString() source text} if it is of a {@linkplain #getStartTagType() type} that does not {@linkplain StartTagType#hasAttributes() have attributes}.
392      */

393     public String JavaDoc tidy(boolean toXHTML) {
394         if (attributes==null) return toString();
395         final StringBuffer JavaDoc sb=new StringBuffer JavaDoc();
396         sb.append('<');
397         if (toXHTML && startTagType==StartTagType.NORMAL) {
398             sb.append(name);
399         } else {
400             int i=begin+startTagType.startDelimiterPrefix.length();
401             final int nameSegmentEnd=i+name.length();
402             while (i<nameSegmentEnd) {
403                 sb.append(source.charAt(i));
404                 i++;
405             }
406         }
407         attributes.appendTidy(sb,findNextTag());
408         if (startTagType==StartTagType.NORMAL && getElement().getEndTag()==null && !HTMLElements.getEndTagOptionalElementNames().contains(name)) sb.append(" /");
409         sb.append(startTagType.getClosingDelimiter());
410         return sb.toString();
411     }
412
413     /**
414      * Generates the HTML text of a {@linkplain StartTagType#NORMAL normal} start tag with the specified tag name and {@linkplain Attributes#populateMap(Map,boolean) attributes map}.
415      * <p>
416      * The output of the attributes is as described in the {@link Attributes#generateHTML(Map attributesMap)} method.
417      * <p>
418      * The <code>emptyElementTag</code> parameter specifies whether the start tag should be an
419      * <a target="_blank" HREF="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>,
420      * in which case a slash is inserted before the closing angle bracket, separated from the name
421      * or last attribute by a single space.
422      * <h4>Example:</h4>
423      * The following code:
424      * <pre>
425      * LinkedHashMap attributesMap=new LinkedHashMap();
426      * attributesMap.put("name","Company");
427      * attributesMap.put("value","G\n00fcnter O'Reilly & Associ&eacute;s");
428      * System.out.println(StartTag.generateHTML("INPUT",attributesMap,true));</pre>
429      * generates the following output:
430      * <p>
431      * <code>&lt;INPUT name="Company" value="G&amp;uuml;nter O'Reilly &amp;amp; Associ&amp;eacute;s" /&gt;</code>
432      *
433      * @param tagName the name of the start tag.
434      * @param attributesMap a map containing attribute name/value pairs.
435      * @param emptyElementTag specifies whether the start tag should be an <a target="_blank" HREF="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>.
436      * @return the HTML text of a {@linkplain StartTagType#NORMAL normal} start tag with the specified tag name and {@linkplain Attributes#populateMap(Map,boolean) attributes map}.
437      * @see EndTag#generateHTML(String tagName)
438      */

439     public static String JavaDoc generateHTML(final String JavaDoc tagName, final Map attributesMap, final boolean emptyElementTag) {
440         final StringWriter stringWriter=new StringWriter();
441         final StringBuffer JavaDoc sb=stringWriter.getBuffer();
442         sb.append('<').append(tagName);
443         try {Attributes.appendHTML(stringWriter,attributesMap);} catch (IOException ex) {} // IOException never occurs in StringWriter
444
if (emptyElementTag)
445             sb.append(" />");
446         else
447             sb.append('>');
448         return sb.toString();
449     }
450
451     public String JavaDoc getDebugInfo() {
452         final StringBuffer JavaDoc sb=new StringBuffer JavaDoc();
453         appendStartTagDebugInfo(sb);
454         sb.append(super.getDebugInfo());
455         return sb.toString();
456     }
457
458     StringBuffer JavaDoc appendStartTagDebugInfo(final StringBuffer JavaDoc sb) {
459         sb.append('"').append(name).append("\" ");
460         if (startTagType!=StartTagType.NORMAL) sb.append('(').append(startTagType.getDescription()).append(") ");
461         return sb;
462     }
463
464     /**
465      * Returns an XML representation of this start tag.
466      * <p>
467      * This method has been deprecated as of version 2.2 and replaced with the exactly equivalent {@link #tidy()} method.
468      *
469      * @return an XML representation of this start tag, or the {@linkplain Segment#toString() source text} if it is of a {@linkplain #getStartTagType() type} that does not {@linkplain StartTagType#hasAttributes() have attributes}
470      * @deprecated Use {@link #tidy()} instead.
471      */

472     public String JavaDoc regenerateHTML() {
473         return tidy();
474     }
475
476     /**
477      * Indicates whether a matching end tag is <i>optional</i> according to the HTML 4.01 specification.
478      * <p>
479      * This method has been deprecated as of version 2.0 and replaced with the {@link HTMLElements#getEndTagOptionalElementNames()}
480      * static method.
481      * <p>
482      * This property is only relevant to start tags forming part of an <a HREF="Element.html#HTML">HTML element</a>
483      * and returns <code>false</code> in all other cases.
484      *
485      * @return <code>true</code> if a matching end tag is <i>optional</i> according to the HTML 4.01 specification, otherwise <code>false</code>.
486      * @deprecated Use {@link #getStartTagType()}<code>==</code>{@link StartTagType#NORMAL}<code> && </code>{@link HTMLElements#getEndTagOptionalElementNames()}<code>.contains(</code>{@link #getName() getName()}<code>)</code> instead.
487      */

488     public boolean isEndTagOptional() {
489         return getStartTagType()==StartTagType.NORMAL && HTMLElements.getEndTagOptionalElementNames().contains(name);
490     }
491
492     /**
493      * Returns the end tag that corresponds to this start tag.
494      * <p>
495      * This method has been deprecated as of version 2.0 as it has existed mainly for backward compatability with version 1.0.
496      * <p>
497      * The {@link #getElement()} method is much more useful as it determines the span of the
498      * element even if the end tag is {@linkplain #isEndTagOptional() optional} and is not present in the source document.
499      * <p>
500      * This method on the other hand just returns <code>null</code> in the above case, revealing nothing about where the element ends.
501      *
502      * @return the end tag that corresponds to this start tag, or <code>null</code> if it does not exist in the source document.
503      * @deprecated Use {@link #getElement()}<code>.</code>{@link Element#getEndTag() getEndTag()} instead.
504      */

505     public EndTag findEndTag() {
506         return getElement().getEndTag();
507     }
508
509     /**
510      * Returns the {@link FormControlType} of this start tag.
511      * <p>
512      * This method has been deprecated as of version 2.0 as it is no longer used internally and
513      * has no practical use as a public method.
514      *
515      * @return the form control type of this start tag, or <code>null</code> if it is not a <a target="_blank" HREF="http://www.w3.org/TR/html401/interact/forms.html#form-controls">control</a>.
516      * @see Element#getFormControl()
517      * @deprecated Use {@link #getFormControl()}<code>.</code>{@link FormControl#getFormControlType() getFormControlType()} instead.
518      */

519     public FormControlType getFormControlType() {
520         final FormControl formControl=getFormControl();
521         if (formControl==null) return null;
522         return formControl.getFormControlType();
523     }
524
525     /**
526      * Returns the segment containing the text that immediately follows this start tag up until the start of the following tag.
527      * <p>
528      * Guaranteed not <code>null</code>.
529      * <p>
530      * This method has been deprecated as of version 2.0 as it is no longer used internally and
531      * has no practical use as a public method.
532      *
533      * @return the segment containing the text that immediately follows this start tag up until the start of the following tag.
534      * @deprecated Use {@link Segment#Segment(Source,int,int) new Segment}<code>(source,</code>{@link #getEnd() getEnd()}<code>,</code>{@link #findNextTag() findNextTag()}<code>.</code>{@link #getBegin() getBegin()}<code>)</code> instead.
535      */

536     public Segment getFollowingTextSegment() {
537         int endData=source.getParseText().indexOf('<',end);
538         if (endData==-1) endData=source.end;
539         return new Segment(source,end,endData);
540     }
541
542     /**
543      * Indicates whether the start tag is a server tag.
544      * <p>
545      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
546      *
547      * @return <code>true</code> if the start tag is a server tag, otherwise <code>false</code>.
548      * @deprecated Use {@link #getTagType() getTagType()}.{@link TagType#isServerTag() isServerTag()} instead.
549      */

550     public boolean isServerTag() {
551         return getTagType().isServerTag();
552     }
553
554     /**
555      * Indicates whether this start tag is of type {@link StartTagType#COMMENT}.
556      * <p>
557      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
558      *
559      * @return <code>true</code> if this start tag is of type {@link StartTagType#COMMENT}, otherwise <code>false</code>.
560      * @deprecated Use {@link #getTagType() getTagType()}<code>==</code>{@link StartTagType#COMMENT} instead.
561      */

562     public boolean isComment() {
563         return startTagType==StartTagType.COMMENT;
564     }
565
566     /**
567      * Indicates whether this start tag has a {@linkplain #getTagType() type} of {@link StartTagType#XML_PROCESSING_INSTRUCTION} or is any other tag starting with "&lt;?".
568      * <p>
569      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
570      *
571      * @return <code>true</code> if this start tag has a {@linkplain #getTagType() type} of {@link StartTagType#XML_PROCESSING_INSTRUCTION} or is any other tag starting with "&lt;?", otherwise <code>false</code>.
572      * @deprecated Use <code>charAt(1)=='?'</code> instead for backward compatibility.
573      */

574     public boolean isProcessingInstruction() {
575         return charAt(1)=='?';
576     }
577
578     /**
579      * Indicates whether this start tag has a {@linkplain #getTagType() type} of {@link StartTagType#XML_DECLARATION}.
580      * <p>
581      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
582      *
583      * @return <code>true</code> if this start tag has a {@linkplain #getTagType() type} of {@link StartTagType#XML_DECLARATION}, otherwise <code>false</code>.
584      * @deprecated Use {@link #getTagType() getTagType()}<code>==</code>{@link StartTagType#XML_DECLARATION} instead.
585      */

586     public boolean isXMLDeclaration() {
587         return startTagType==StartTagType.XML_DECLARATION;
588     }
589
590     /**
591      * Indicates whether this start tag has a {@linkplain #getTagType() type} of {@link StartTagType#DOCTYPE_DECLARATION}.
592      * <p>
593      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
594      *
595      * @return <code>true</code> if this start tag has a {@linkplain #getTagType() type} of {@link StartTagType#DOCTYPE_DECLARATION}, otherwise <code>false</code>.
596      * @deprecated Use {@link #getTagType() getTagType()}<code>==</code>{@link StartTagType#DOCTYPE_DECLARATION} instead.
597      */

598     public boolean isDocTypeDeclaration() {
599         return startTagType==StartTagType.DOCTYPE_DECLARATION;
600     }
601
602     /**
603      * Indicates whether this start tag has a {@linkplain #getTagType() type} of {@linkplain StartTagType#SERVER_COMMON}.
604      * <p>
605      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
606      *
607      * @return <code>true</code> if this start tag has a {@linkplain #getTagType() type} of {@linkplain StartTagType#SERVER_COMMON}, otherwise <code>false</code>.
608      * @deprecated Use {@link #getTagType() getTagType()}<code>==</code>{@link StartTagType#SERVER_COMMON} instead.
609      */

610     public boolean isCommonServerTag() {
611         return startTagType==StartTagType.SERVER_COMMON;
612     }
613
614     /**
615      * Indicates whether this start tag has a {@linkplain #getTagType() type} of {@link PHPTagTypes#PHP_STANDARD}.
616      * <p>
617      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
618      *
619      * @return <code>true</code> if this start tag has a {@linkplain #getTagType() type} of {@link PHPTagTypes#PHP_STANDARD}, otherwise <code>false</code>.
620      * @deprecated Use {@link #getTagType() getTagType()}<code>==</code>{@link PHPTagTypes#PHP_STANDARD} instead.
621      */

622     public boolean isPHPTag() {
623         return startTagType==PHPTagTypes.PHP_STANDARD;
624     }
625
626     /**
627      * Indicates whether this start tag would be {@linkplain MasonTagTypes#isParsedByMason(TagType) parsed by a Mason server}.
628      * <p>
629      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
630      *
631      * @return <code>true</code> if this start tag would be {@linkplain MasonTagTypes#isParsedByMason(TagType) parsed by a Mason server}, otherwise <code>false</code>.
632      * @deprecated Use {@link MasonTagTypes}<code>.</code>{@link MasonTagTypes#isParsedByMason(TagType) isParsedByMason}<code>(</code>{@link #getTagType() getTagType()}<code>)</code> instead.
633      */

634     public boolean isMasonTag() {
635         return startTagType==StartTagType.SERVER_COMMON || startTagType==MasonTagTypes.MASON_NAMED_BLOCK || startTagType==MasonTagTypes.MASON_COMPONENT_CALL || startTagType==MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT;
636     }
637
638     /**
639      * Indicates whether this start tag has a {@linkplain #getTagType() type} of {@linkplain MasonTagTypes#MASON_NAMED_BLOCK}.
640      * <p>
641      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
642      *
643      * @return <code>true</code> if this start tag has a {@linkplain #getTagType() type} of {@linkplain MasonTagTypes#MASON_NAMED_BLOCK}, otherwise <code>false</code>.
644      * @deprecated Use {@link #getTagType() getTagType()}<code>==</code>{@link MasonTagTypes#MASON_NAMED_BLOCK} instead.
645      */

646     public boolean isMasonNamedBlock() {
647         return startTagType==MasonTagTypes.MASON_NAMED_BLOCK;
648     }
649
650     /**
651      * Indicates whether this start tag has a {@linkplain #getTagType() type} of {@linkplain MasonTagTypes#MASON_COMPONENT_CALL}.
652      * <p>
653      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
654      *
655      * @return <code>true</code> if this start tag has a {@linkplain #getTagType() type} of {@linkplain MasonTagTypes#MASON_COMPONENT_CALL}, otherwise <code>false</code>.
656      * @deprecated Use {@link #getTagType() getTagType()}<code>==</code>{@link MasonTagTypes#MASON_COMPONENT_CALL} instead.
657      */

658     public boolean isMasonComponentCall() {
659         return startTagType==MasonTagTypes.MASON_COMPONENT_CALL;
660     }
661
662     /**
663      * Indicates whether this start tag has a {@linkplain #getTagType() type} of {@linkplain MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}.
664      * <p>
665      * This method has been deprecated as of version 2.0 as its functionality is now easily performed without a dedicated method.
666      *
667      * @return <code>true</code> if this start tag has a {@linkplain #getTagType() type} of {@linkplain MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}, otherwise <code>false</code>.
668      * @deprecated Use {@link #getTagType() getTagType()}<code>==</code>{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT} instead.
669      */

670     public boolean isMasonComponentCalledWithContent() {
671         return startTagType==MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT;
672     }
673
674     private EndTag findEndTagInternal() {
675         boolean checkForEmptyElementTag=true;
676         // A missing optional end tag returns a zero length EndTag instead of null
677
if (startTagType==StartTagType.NORMAL) {
678             final HTMLElementTerminatingTagNameSets terminatingTagNameSets=HTMLElements.getTerminatingTagNameSets(name);
679             if (terminatingTagNameSets!=null) // end tag is optional
680
return findOptionalEndTag(terminatingTagNameSets);
681             if (HTMLElements.getEndTagForbiddenElementNames().contains(name)) // end tag is forbidden
682
return null;
683             checkForEmptyElementTag=!HTMLElements.getEndTagRequiredElementNames().contains(name); // check for empty-element tags if tag is not an HTML element
684
if (checkForEmptyElementTag && isEmptyElementTag()) // non-html empty-element tag
685
return null;
686         } else if (startTagType.getCorrespondingEndTagType()==null) {
687             return null;
688         }
689         // This is either a start tag type other than NORMAL that requires an end tag, or an HTML element tag that requires an end tag,
690
// or a non-HTML element tag that is not an empty-element tag.
691
// In all of these cases the end tag is required.
692
final EndTag nextEndTag=source.findNextEndTag(end,name,startTagType.getCorrespondingEndTagType());
693         if (nextEndTag!=null) {
694             if (HTMLElements.END_TAG_REQUIRED_NESTING_FORBIDDEN_SET.contains(name)) {
695                 final StartTag nextStartTag=source.findNextStartTag(end,name);
696                 if (nextStartTag==null || nextStartTag.begin>nextEndTag.begin) return nextEndTag;
697                 if (source.isLoggingEnabled()) source.log(source.getRowColumnVector(begin).appendTo(new StringBuffer JavaDoc(200).append("StartTag at ")).append(" missing required end tag - invalid nested start tag encountered before end tag").toString());
698                 // Terminate the element at the start of the invalidly nested start tag.
699
// This is how IE and Mozilla treat illegally nested A elements, but other elements may vary.
700
return new EndTag(source,nextStartTag.begin,nextStartTag.begin,EndTagType.NORMAL,name);
701             }
702             final Segment[] findResult=findEndTag(nextEndTag,checkForEmptyElementTag);
703             if (findResult!=null) return (EndTag)findResult[0];
704         }
705         if (source.isLoggingEnabled()) source.log(source.getRowColumnVector(begin).appendTo(new StringBuffer JavaDoc(200).append("StartTag at ")).append(" missing required end tag").toString());
706         return null;
707     }
708
709     private EndTag findOptionalEndTag(final HTMLElementTerminatingTagNameSets terminatingTagNameSets) {
710         int pos=end;
711         while (pos<source.end) {
712             final Tag tag=Tag.findPreviousOrNextTag(source,pos,false);
713             if (tag==null) break;
714             Set terminatingTagNameSet;
715             if (tag instanceof EndTag) {
716                 if (tag.name==name) return (EndTag)tag;
717                 terminatingTagNameSet=terminatingTagNameSets.TerminatingEndTagNameSet;
718             } else {
719                 terminatingTagNameSet=terminatingTagNameSets.NonterminatingElementNameSet;
720                 if (terminatingTagNameSet!=null && terminatingTagNameSet.contains(tag.name)) {
721                     Element nonterminatingElement=((StartTag)tag).getElement();
722                     pos=nonterminatingElement.end;
723                     continue;
724                 }
725                 terminatingTagNameSet=terminatingTagNameSets.TerminatingStartTagNameSet;
726             }
727             if (terminatingTagNameSet!=null && terminatingTagNameSet.contains(tag.name)) return new EndTag(source,tag.begin,tag.begin,EndTagType.NORMAL,name);
728             pos=tag.begin+1;
729         }
730         // Ran out of tags. The only legitimate case of this happening is if the HTML end tag is missing, in which case the end of the element is the end of the source document
731
return new EndTag(source,source.end,source.end,EndTagType.NORMAL,name);
732     }
733
734     static StartTag findPreviousOrNext(final Source source, final int pos, final String JavaDoc searchName, final boolean isXMLTagName, final boolean previous) {
735         // searchName is already in lower case
736
if (searchName==null) return findPreviousOrNext(source,pos,previous);
737         if (searchName.length()==0) throw new IllegalArgumentException JavaDoc("searchName argument must not be zero length");
738         final char[] startDelimiterCharArray=new char[searchName.length()+1];
739         startDelimiterCharArray[0]='<';
740         for (int i=1; i<startDelimiterCharArray.length; i++) startDelimiterCharArray[i]=searchName.charAt(i-1);
741         if (startDelimiterCharArray[1]=='/') throw new IllegalArgumentException JavaDoc("searchName argument \""+searchName+"\" must not start with '/'");
742         try {
743             final ParseText parseText=source.getParseText();
744             int begin=pos;
745             do {
746                 begin=previous?parseText.lastIndexOf(startDelimiterCharArray,begin):parseText.indexOf(startDelimiterCharArray,begin);
747                 if (begin==-1) return null;
748                 final StartTag startTag=(StartTag)Tag.getTagAt(source,begin);
749                 if (startTag==null || (isXMLTagName && startTag.isUnregistered())) continue;
750                 if (startTag.startTagType.isNameAfterPrefixRequired() && startTag.name.length()>searchName.length()) {
751                     // The name of the start tag is longer than the search name, and the type of tag indicates
752
// that we are probably looking for an exact match.
753
// (eg searchName="a", startTag.name="applet" -> reject)
754
// We only require an exact match if the last character of the search name is part of the name, as the
755
// search name might be just the prefix of a server tag.
756
// (eg searchName="?", startTag.name="?abc" -> accept, but searchName="?a", startTag.name="?abc" -> reject)
757
// The only exception to this is if the last character of the search name is a colon (which also forms part of
758
// the name), but signifies that we want to search on the entire namespace.
759
// (eg searchName="o:", startTag.name="o:p" -> accept)
760
char lastSearchNameChar=searchName.charAt(searchName.length()-1);
761                     if (lastSearchNameChar!=':' && isXMLNameChar(lastSearchNameChar)) continue;
762                 }
763                 return startTag;
764             } while (previous ? (begin-=2)>=0 : (begin+=1)<source.end);
765         } catch (IndexOutOfBoundsException JavaDoc ex) {
766             // this should only happen when the end of file is reached in the middle of a tag.
767
// we don't have to do anything to handle it as there are no more tags anyway.
768
}
769         return null;
770     }
771
772     static StartTag findPreviousOrNext(final Source source, int pos, final boolean previous) {
773         while (true) {
774             final Tag tag=Tag.findPreviousOrNextTag(source,pos,previous);
775             if (tag==null) return null;
776             if (tag instanceof StartTag) return (StartTag)tag;
777             pos+=previous?-1:1;
778         }
779     }
780     
781     static StartTag findNext(final Source source, int pos, final String JavaDoc attributeName, final String JavaDoc value, final boolean valueCaseSensitive) {
782         if (value==null) throw new IllegalArgumentException JavaDoc();
783         final char[] valueCharArray=value.toLowerCase().toCharArray();
784         final ParseText parseText=source.getParseText();
785         while (pos<source.end) {
786             pos=parseText.indexOf(valueCharArray,pos);
787             if (pos==-1) return null;
788             final Tag tag=source.findEnclosingTag(pos);
789             if (tag==null || !(tag instanceof StartTag)) {
790                 pos++;
791                 continue;
792             }
793             final StartTag startTag=(StartTag)tag;
794             if (startTag.getAttributes()!=null) {
795                 final String JavaDoc attributeValue=startTag.getAttributes().getValue(attributeName);
796                 if (attributeValue!=null) {
797                     if (value.equals(attributeValue)) return startTag;
798                     if (value.equalsIgnoreCase(attributeValue)) {
799                         if (!valueCaseSensitive) return startTag;
800                         if (source.isLoggingEnabled()) source.log(source.getRowColumnVector(pos).appendTo(new StringBuffer JavaDoc(200)).append(": StartTag with attribute ").append(attributeName).append("=\"").append(attributeValue).append("\" ignored during search because its case does not match search value \"").append(value).append('"').toString());
801                     }
802                 }
803             }
804             pos=startTag.end+5; // next attribute value can't be less than 5 chars after last start tag
805
}
806         return null;
807     }
808
809     private Segment[] findEndTag(final EndTag nextEndTag, final boolean checkForEmptyElementTag) {
810         StartTag nextStartTag=source.findNextStartTag(end,name);
811         if (checkForEmptyElementTag) {
812             while (nextStartTag!=null && nextStartTag.isEmptyElementTag())
813                 nextStartTag=source.findNextStartTag(nextStartTag.end,name);
814         }
815         return findEndTag(end,nextStartTag,nextEndTag,checkForEmptyElementTag);
816     }
817
818     private Segment[] findEndTag(final int afterPos, StartTag nextStartTag, EndTag nextEndTag, final boolean checkForEmptyElementTag) {
819         // returns null if no end tag exists in the rest of the file, otherwise the following two segments:
820
// first is the matching end tag to this start tag. Must be present if array is returned.
821
// second is the next occurrence after the returned end tag of a start tag of the same name. (null if none exists)
822
if (nextEndTag==null) return null; // no end tag in the rest of the file
823
final Segment[] returnArray={nextEndTag, nextStartTag};
824         if (nextStartTag==null || nextStartTag.begin>nextEndTag.begin) return returnArray; // no more start tags of the same name in rest of file, or they occur after the end tag that we found. This means we have found the matching end tag.
825
final Segment[] findResult=nextStartTag.findEndTag(nextEndTag,checkForEmptyElementTag); // find the matching end tag to the interloping start tag
826
if (findResult==null) return null; // no end tag in the rest of the file
827
final EndTag nextStartTagsEndTag=(EndTag)findResult[0];
828         nextStartTag=(StartTag)findResult[1];
829         nextEndTag=source.findNextEndTag(nextStartTagsEndTag.end, name); // find end tag after the interloping start tag's end tag
830
return findEndTag(nextStartTagsEndTag.end,nextStartTag,nextEndTag,checkForEmptyElementTag); // recurse to see if this is the matching end tag
831
}
832 }
833
Popular Tags