KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > au > id > jericho > lib > html > Attribute


1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2
// Version 2.2
3
// Copyright (C) 2006 Martin Jericho
4
// http://sourceforge.net/projects/jerichohtml/
5
//
6
// This library is free software; you can redistribute it and/or
7
// modify it under the terms of the GNU Lesser General Public
8
// License as published by the Free Software Foundation; either
9
// version 2.1 of the License, or (at your option) any later version.
10
// http://www.gnu.org/copyleft/lesser.html
11
//
12
// This library is distributed in the hope that it will be useful,
13
// but WITHOUT ANY WARRANTY; without even the implied warranty of
14
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
// Lesser General Public License for more details.
16
//
17
// You should have received a copy of the GNU Lesser General Public
18
// License along with this library; if not, write to the Free Software
19
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20

21 package au.id.jericho.lib.html;
22
23 import java.io.*;
24
25 /**
26  * Represents a single <a target="_blank" HREF="http://www.w3.org/TR/html401/intro/sgmltut.html#h-3.2.2">attribute</a>
27  * name/value segment within a {@link StartTag}.
28  * <p>
29  * An instance of this class is a representation of a single attribute in the source document and is not modifiable.
30  * The {@link AttributesOutputSegment} class provides the means to add, delete or modify attributes and
31  * their values for inclusion in an {@link OutputDocument}.
32  * <p>
33  * Obtained using the {@link Attributes#get(String key)} method.
34  * <p>
35  * See also the XML 1.0 specification for <a target="_blank" HREF="http://www.w3.org/TR/REC-xml#dt-attr">attributes</a>.
36  *
37  * @see Attributes
38  */

39 public final class Attribute extends Segment {
40     private final String JavaDoc key;
41     private final Segment nameSegment;
42     private final Segment valueSegment;
43     private final Segment valueSegmentIncludingQuotes;
44
45     static final String JavaDoc CHECKED="checked";
46     static final String JavaDoc CLASS="class";
47     static final String JavaDoc DISABLED="disabled";
48     static final String JavaDoc ID="id";
49     static final String JavaDoc MULTIPLE="multiple";
50     static final String JavaDoc NAME="name";
51     static final String JavaDoc SELECTED="selected";
52     static final String JavaDoc STYLE="style";
53     static final String JavaDoc TYPE="type";
54     static final String JavaDoc VALUE="value";
55
56     /**
57      * Constructs a new Attribute with no value part, called from Attributes class.
58      * <p>
59      * Note that the resulting Attribute segment has the same span as the supplied nameSegment.
60      *
61      * @param source the {@link Source} document.
62      * @param key the name of this attribute in lower case.
63      * @param nameSegment the segment representing the name.
64      */

65     Attribute(final Source source, final String JavaDoc key, final Segment nameSegment) {
66         this(source,key,nameSegment,null,null);
67     }
68
69     /**
70      * Constructs a new Attribute, called from Attributes class.
71      * <p>
72      * The resulting Attribute segment begins at the start of the nameSegment
73      * and finishes at the end of the valueSegmentIncludingQuotes. If this attribute
74      * has no value, it finishes at the end of the nameSegment.
75      * <p>
76      * If this attribute has no value, the <code>valueSegment</code> and <code>valueSegmentIncludingQuotes</code> must be null.
77      * The <valueSegmentIncludingQuotes</code> parameter must not be null if the <code>valueSegment</code> is not null, and vice versa
78      *
79      * @param source the {@link Source} document.
80      * @param key the name of this attribute in lower case.
81      * @param nameSegment the segment spanning the name.
82      * @param valueSegment the segment spanning the value.
83      * @param valueSegmentIncludingQuotes the segment spanning the value, including quotation marks if any.
84      */

85     Attribute(final Source source, final String JavaDoc key, final Segment nameSegment, final Segment valueSegment, final Segment valueSegmentIncludingQuotes) {
86         super(source,nameSegment.getBegin(),(valueSegmentIncludingQuotes==null ? nameSegment.getEnd() : valueSegmentIncludingQuotes.getEnd()));
87         this.key=key;
88         this.nameSegment=nameSegment;
89         this.valueSegment=valueSegment;
90         this.valueSegmentIncludingQuotes=valueSegmentIncludingQuotes;
91     }
92
93     /**
94      * Returns the name of this attribute in lower case.
95      * <p>
96      * This package treats all attribute names as case insensitive, consistent with
97      * <a target="_blank" HREF="http://www.w3.org/TR/html401/">HTML</a> but not consistent with
98      * <a target="_blank" HREF="http://www.w3.org/TR/xhtml1/">XHTML</a>.
99      *
100      * @return the name of this attribute in lower case.
101      * @see #getName()
102      */

103     public String JavaDoc getKey() {
104         return key;
105     }
106
107     /**
108      * Returns the name of this attribute in original case.
109      * <p>
110      * This is exactly equivalent to {@link #getNameSegment()}<code>.toString()</code>.
111      *
112      * @return the name of this attribute in original case.
113      * @see #getKey()
114      */

115     public String JavaDoc getName() {
116         return nameSegment.toString();
117     }
118
119     /**
120      * Returns the segment spanning the {@linkplain #getName() name} of this attribute.
121      * @return the segment spanning the {@linkplain #getName() name} of this attribute.
122      * @see #getName()
123      */

124     public Segment getNameSegment() {
125         return nameSegment;
126     }
127
128     /**
129      * Indicates whether this attribute has a value.
130      * <p>
131      * This method also returns <code>true</code> if this attribute has been assigned a zero-length value.
132      * <p>
133      * It only returns <code>false</code> if this attribute appears in
134      * <a target="_blank" HREF="http://www.w3.org/TR/html401/intro/sgmltut.html#didx-boolean_attribute-1">minimized form</a>.
135      *
136      * @return <code>true</code> if this attribute has a value, otherwise <code>false</code>.
137      */

138     public boolean hasValue() {
139         return valueSegment!=null;
140     }
141
142     /**
143      * Returns the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute,
144      * or <code>null</code> if it {@linkplain #hasValue() has no value}.
145      * <p>
146      * This is equivalent to {@link CharacterReference}<code>.</code>{@link CharacterReference#decode(CharSequence,boolean) decode}<code>(</code>{@link #getValueSegment()}<code>,true)</code>.
147      * <p>
148      * Note that before version 1.4.1 this method returned the raw value of the attribute as it appears in the source document,
149      * without {@linkplain CharacterReference#decode(CharSequence,boolean) decoding}.
150      * <p>
151      * To obtain the raw value without decoding, use {@link #getValueSegment()}<code>.toString()</code>.
152      * <p>
153      * Special attention should be given to attributes that contain URLs, such as the
154      * <code><a target="_blank" HREF="http://www.w3.org/TR/html401/struct/links.html#adef-href">href</a></code> attribute.
155      * When such an attribute contains a URL with parameters (as described in the
156      * <a target="_blank" HREF="http://www.w3.org/MarkUp/html-spec/html-spec_8.html#SEC8.2.1">form-urlencoded media type</a>),
157      * the ampersand (<code>&amp;</code>) characters used to separate the parameters should be
158      * {@linkplain CharacterReference#encode(CharSequence) encoded} to prevent the parameter names from being
159      * unintentionally interpreted as {@linkplain CharacterEntityReference character entity references}.
160      * This requirement is explicitly stated in the
161      * <a target="_blank" HREF="http://www.w3.org/TR/html401/charset.html#h-5.3.2">HTML 4.01 specification section 5.3.2</a>.
162      * <p>
163      * For example, take the following element in the source document:
164      * <div style="margin: 0.5em"><code>&lt;a HREF="Report.jsp?chapt=2&amp;sect=3"&gt;next&lt;/a&gt;</code></div>
165      * By default, calling
166      * {@link Element#getAttributes() getAttributes()}<code>.</code>{@link Attributes#getValue(String) getValue}<code>("href")</code>
167      * on this element returns the string
168      * "<code>Report.jsp?chapt=2&sect;=3</code>", since the text "<code>&amp;sect</code>" is interpreted as the rarely used
169      * character entity reference {@link CharacterEntityReference#_sect &amp;sect;} (U+00A7), despite the fact that it is
170      * missing the {@linkplain CharacterReference#isTerminated() terminating semicolon} (<code>;</code>).
171      * <p>
172      * Most browsers recognise <a HREF="CharacterReference.html#Unterminated">unterminated</a> character entity references
173      * in attribute values representing a codepoint of U+00FF or below, but ignore those representing codepoints above this value.
174      * One relatively popular browser only recognises those representing a codepoint of U+003E or below, meaning it would
175      * have interpreted the URL in the above example differently to most other browsers.
176      * Most browsers also use different rules depending on whether the unterminated character reference is inside or outside
177      * of an attribute value, with both of these possibilities further split into different rules for
178      * {@linkplain CharacterEntityReference character entity references},
179      * <a HREF="NumericCharacterReference.html#DecimalCharacterReference">decimal character references</a>, and
180      * <a HREF="NumericCharacterReference.html#HexadecimalCharacterReference">hexadecimal character references</a>.
181      * <p>
182      * The behaviour of this library is determined by the current {@linkplain Config.CompatibilityMode compatibility mode} setting,
183      * which is determined by the {@link Config#CurrentCompatibilityMode} property.
184      *
185      * @return the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute, or <code>null</code> if it {@linkplain #hasValue() has no value}.
186      */

187     public String JavaDoc getValue() {
188         return CharacterReference.decode(valueSegment,true);
189     }
190
191     /**
192      * Returns the segment spanning the {@linkplain #getValue() value} of this attribute, or <code>null</code> if it {@linkplain #hasValue() has no value}.
193      * @return the segment spanning the {@linkplain #getValue() value} of this attribute, or <code>null</code> if it {@linkplain #hasValue() has no value}.
194      * @see #getValue()
195      */

196     public Segment getValueSegment() {
197         return valueSegment;
198     }
199
200     /**
201      * Returns the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any,
202      * or <code>null</code> if it {@linkplain #hasValue() has no value}.
203      * <p>
204      * If the value is not enclosed by quotation marks, this is the same as the {@linkplain #getValueSegment() value segment}
205      *
206      * @return the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any, or <code>null</code> if it {@linkplain #hasValue() has no value}.
207      */

208     public Segment getValueSegmentIncludingQuotes() {
209         return valueSegmentIncludingQuotes;
210     }
211
212     /**
213      * Returns the character used to quote the value.
214      * <p>
215      * The return value is either a double-quote (<code>"</code>), a single-quote (<code>'</code>), or a space.
216      *
217      * @return the character used to quote the value, or a space if the value is not quoted or this attribute has no value.
218      */

219     public char getQuoteChar() {
220         if (valueSegment==valueSegmentIncludingQuotes) return ' '; // no quotes
221
return source.charAt(valueSegmentIncludingQuotes.getBegin());
222     }
223
224     /**
225      * Returns a string representation of this object useful for debugging purposes.
226      * @return a string representation of this object useful for debugging purposes.
227      */

228     public String JavaDoc getDebugInfo() {
229         final StringBuffer JavaDoc sb=new StringBuffer JavaDoc().append(key).append(super.getDebugInfo()).append(",name=").append(nameSegment.getDebugInfo());
230         if (hasValue())
231             sb.append(",value=").append(valueSegment.getDebugInfo()).append('"').append(valueSegment).append("\"\n");
232         else
233             sb.append(",NO VALUE\n");
234         return sb.toString();
235     }
236
237     Tag appendTidy(final StringBuffer JavaDoc sb, Tag nextTag) {
238         sb.append(' ');
239         Util.appendTo(sb,nameSegment);
240         if (valueSegment!=null) {
241             sb.append("=\"");
242             while (nextTag!=null && nextTag.begin<valueSegment.begin) nextTag=nextTag.findNextTag();
243             if (nextTag==null || nextTag.begin>=valueSegment.end) {
244                 appendTidyValue(sb,valueSegment);
245             } else {
246                 int i=valueSegment.begin;
247                 while (nextTag!=null && nextTag.begin<valueSegment.end) {
248                     appendTidyValue(sb,new Segment(source,i,nextTag.begin));
249                     if (nextTag.end>valueSegment.end) {
250                         sb.append(new Segment(source,nextTag.begin,i=valueSegment.end));
251                         break;
252                     }
253                     sb.append(nextTag);
254                     i=nextTag.end;
255                     nextTag=nextTag.findNextTag();
256                 }
257                 if (i<valueSegment.end) appendTidyValue(sb,new Segment(source,i,valueSegment.end));
258             }
259             sb.append('"');
260         }
261         return nextTag;
262     }
263
264     private static void appendTidyValue(final StringBuffer JavaDoc sb, final CharSequence JavaDoc unencodedValue) {
265         CharacterReference.appendEncode(sb,CharacterReference.decode(unencodedValue,true),false);
266     }
267
268     static StringBuffer JavaDoc appendHTML(final StringBuffer JavaDoc sb, final CharSequence JavaDoc name, final CharSequence JavaDoc value) {
269         sb.append(' ');
270         Util.appendTo(sb,name);
271         if (value!=null) {
272             sb.append("=\"");
273             CharacterReference.appendEncode(sb,value,false);
274             sb.append('"');
275         }
276         return sb;
277     }
278
279     static Writer appendHTML(final Writer writer, final CharSequence JavaDoc name, final CharSequence JavaDoc value) throws IOException {
280         writer.write(' ');
281         Util.appendTo(writer,name);
282         if (value!=null) {
283             writer.write("=\"");
284             writer.write(CharacterReference.encode(value));
285             writer.write('"');
286         }
287         return writer;
288     }
289 }
290
Popular Tags