KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > Attribute


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Attribute.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/02/13 22:45:46 $
10
// $Revision: 1.4 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser;
28
29 import java.io.Serializable JavaDoc;
30
31 /**
32  * An attribute within a tag.
33  * Holds the name, assignment string, value and quote character.
34  * <p>
35  * This class was made deliberately simple. Except for {@link #setRawValue RawValue},
36  * the properties are completely orthogonal, that is: each property is independant
37  * of the others. This means you have enough rope here to hang yourself, and
38  * it's very easy to create malformed HTML. Where it's obvious, warnings and
39  * notes have been provided in the setters javadocs, but it is up to you -- the
40  * programmer -- to ensure that the contents of the four fields will yield
41  * valid HTML (if that's what you want).
42  * <p>
43  * Be especially mindful of quotes and assignment strings. These are handled
44  * by the constructors where it's obvious, but in general, you need to set
45  * them explicitly when building an attribute. For example to construct
46  * the attribute <b><code>label="A multi word value."</code></b> you could use:
47  * <pre>
48  * attribute = new Attribute ();
49  * attribute.setName ("label");
50  * attribute.setAssignment ("=");
51  * attribute.setValue ("A multi word value.");
52  * attribute.setQuote ('"');
53  * </pre>
54  * or
55  * <pre>
56  * attribute = new Attribute ();
57  * attribute.setName ("label");
58  * attribute.setAssignment ("=");
59  * attribute.setRawValue ("A multi word value.");
60  * </pre>
61  * or
62  * <pre>
63  * attribute = new Attribute ("label", "A multi word value.");
64  * </pre>
65  * Note that the assignment value and quoting need to be set separately when
66  * building the attribute from scratch using the properties.
67  * <p>
68  * <table width="100.0%" align="Center" border="1">
69  * <caption>Valid States for Attributes.
70  * <tr>
71  * <th align="Center">Description</th>
72  * <th align="Center">toString()</th>
73  * <th align="Center">Name</th>
74  * <th align="Center">Assignment</th>
75  * <th align="Center">Value</th>
76  * <th align="Center">Quote</th>
77  * </tr>
78  * <tr>
79  * <td align="Center">whitespace attribute</td>
80  * <td align="Center">value</td>
81  * <td align="Center"><code>null</code></td>
82  * <td align="Center"><code>null</code></td>
83  * <td align="Center">"value"</td>
84  * <td align="Center"><code>0</code></td>
85  * </tr>
86  * <tr>
87  * <td align="Center">standalone attribute</td>
88  * <td align="Center">name</td>
89  * <td align="Center">"name"</td>
90  * <td align="Center"><code>null</code></td>
91  * <td align="Center"><code>null</code></td>
92  * <td align="Center"><code>0</code></td>
93  * </tr>
94  * <tr>
95  * <td align="Center">empty attribute</td>
96  * <td align="Center">name=</td>
97  * <td align="Center">"name"</td>
98  * <td align="Center">"="</td>
99  * <td align="Center"><code>null</code></td>
100  * <td align="Center"><code>0</code></td>
101  * </tr>
102  * <tr>
103  * <td align="Center">empty single quoted attribute</td>
104  * <td align="Center">name=''</td>
105  * <td align="Center">"name"</td>
106  * <td align="Center">"="</td>
107  * <td align="Center"><code>null</code></td>
108  * <td align="Center"><code>'</code></td>
109  * </tr>
110  * <tr>
111  * <td align="Center">empty double quoted attribute</td>
112  * <td align="Center">name=""</td>
113  * <td align="Center">"name"</td>
114  * <td align="Center">"="</td>
115  * <td align="Center"><code>null</code></td>
116  * <td align="Center"><code>"</code></td>
117  * </tr>
118  * <tr>
119  * <td align="Center">naked attribute</td>
120  * <td align="Center">name=value</td>
121  * <td align="Center">"name"</td>
122  * <td align="Center">"="</td>
123  * <td align="Center">"value"</td>
124  * <td align="Center"><code>0</code></td>
125  * </tr>
126  * <tr>
127  * <td align="Center">single quoted attribute</td>
128  * <td align="Center">name='value'</td>
129  * <td align="Center">"name"</td>
130  * <td align="Center">"="</td>
131  * <td align="Center">"value"</td>
132  * <td align="Center"><code>'</code></td>
133  * </tr>
134  * <tr>
135  * <td align="Center">double quoted attribute</td>
136  * <td align="Center">name="value"</td>
137  * <td align="Center">"name"</td>
138  * <td align="Center">"="</td>
139  * <td align="Center">"value"</td>
140  * <td align="Center"><code>"</code></td>
141  * </tr>
142  * </table>
143  * <br>In words:
144  * <br>If Name is null, and Assignment is null, and Quote is zero, it is whitepace and Value has the whitespace text -- value
145  * <br>If Name is not null, and both Assignment and Value are null it's a standalone attribute -- name
146  * <br>If Name is not null, and Assignment is an equals sign, and Quote is zero it's an empty attribute -- name=
147  * <br>If Name is not null, and Assignment is an equals sign, and Value is "" or null, and Quote is ' it's an empty single quoted attribute -- name=''
148  * <br>If Name is not null, and Assignment is an equals sign, and Value is "" or null, and Quote is " it's an empty double quoted attribute -- name=""
149  * <br>If Name is not null, and Assignment is an equals sign, and Value is something, and Quote is zero it's a naked attribute -- name=value
150  * <br>If Name is not null, and Assignment is an equals sign, and Value is something, and Quote is ' it's a single quoted attribute -- name='value'
151  * <br>If Name is not null, and Assignment is an equals sign, and Value is something, and Quote is " it's a double quoted attribute -- name="value"
152  * <br>All other states are invalid HTML.
153  * <p>
154  * From the <a HREF="http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2">
155  * HTML 4.01 Specification, W3C Recommendation 24 December 1999</a>
156  * http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2:<p>
157  * <cite>
158  * 3.2.2 Attributes<p>
159  * Elements may have associated properties, called attributes, which may
160  * have values (by default, or set by authors or scripts). Attribute/value
161  * pairs appear before the final ">" of an element's start tag. Any number
162  * of (legal) attribute value pairs, separated by spaces, may appear in an
163  * element's start tag. They may appear in any order.<p>
164  * In this example, the id attribute is set for an H1 element:
165  * <code>
166  * &lt;H1 id="section1"&gt;
167  * </code>
168  * This is an identified heading thanks to the id attribute
169  * <code>
170  * &lt;/H1&gt;
171  * </code>
172  * By default, SGML requires that all attribute values be delimited using
173  * either double quotation marks (ASCII decimal 34) or single quotation
174  * marks (ASCII decimal 39). Single quote marks can be included within the
175  * attribute value when the value is delimited by double quote marks, and
176  * vice versa. Authors may also use numeric character references to
177  * represent double quotes (&amp;#34;) and single quotes (&amp;#39;).
178  * For doublequotes authors can also use the character entity reference &amp;quot;.<p>
179  * In certain cases, authors may specify the value of an attribute without
180  * any quotation marks. The attribute value may only contain letters
181  * (a-z and A-Z), digits (0-9), hyphens (ASCII decimal 45),
182  * periods (ASCII decimal 46), underscores (ASCII decimal 95),
183  * and colons (ASCII decimal 58). We recommend using quotation marks even
184  * when it is possible to eliminate them.<p>
185  * Attribute names are always case-insensitive.<p>
186  * Attribute values are generally case-insensitive. The definition of each
187  * attribute in the reference manual indicates whether its value is case-insensitive.<p>
188  * All the attributes defined by this specification are listed in the attribute index.<p>
189  * </cite>
190  * <p>
191  */

192 public class Attribute
193     implements
194         Serializable JavaDoc
195 {
196     /**
197      * The name of this attribute.
198      * The part before the equals sign, or the stand-alone attribute.
199      * This will be <code>null</code> if the attribute is whitespace.
200      */

201     protected String JavaDoc mName;
202
203     /**
204      * The assignment string of the attribute.
205      * The equals sign.
206      * This will be <code>null</code> if the attribute is a
207      * stand-alone attribute.
208      */

209     protected String JavaDoc mAssignment;
210
211     /**
212      * The value of the attribute.
213      * The part after the equals sign.
214      * This will be <code>null</code> if the attribute is an empty or
215      * stand-alone attribute.
216      */

217     protected String JavaDoc mValue;
218
219     /**
220      * The quote, if any, surrounding the value of the attribute, if any.
221      * This will be zero if there are no quotes around the value.
222      */

223     protected char mQuote;
224
225     /**
226      * Create an attribute with the name, assignment string, value and quote given.
227      * If the quote value is zero, assigns the value using {@link #setRawValue}
228      * which sets the quote character to a proper value if necessary.
229      * @param name The name of this attribute.
230      * @param assignment The assignment string of this attribute.
231      * @param value The value of this attribute.
232      * @param quote The quote around the value of this attribute.
233      */

234     public Attribute (String JavaDoc name, String JavaDoc assignment, String JavaDoc value, char quote)
235     {
236         setName (name);
237         setAssignment (assignment);
238         if (0 == quote)
239             setRawValue (value);
240         else
241         {
242             setValue (value);
243             setQuote (quote);
244         }
245     }
246
247     /**
248      * Create an attribute with the name, value and quote given.
249      * Uses an equals sign as the assignment string if the value is not
250      * <code>null</code>, and calls {@link #setRawValue} to get the
251      * correct quoting if <code>quote</code> is zero.
252      * @param name The name of this attribute.
253      * @param value The value of this attribute.
254      * @param quote The quote around the value of this attribute.
255      */

256     public Attribute (String JavaDoc name, String JavaDoc value, char quote)
257     {
258         this (name, (null == value ? "" : "="), value, quote);
259     }
260
261     /**
262      * Create a whitespace attribute with the value given.
263      * @param value The value of this attribute.
264      * @exception IllegalArgumentException if the value contains other than
265      * whitespace. To set a real value use {@link #Attribute(String,String)}.
266      */

267     public Attribute (String JavaDoc value)
268     {
269         if (0 != value.trim ().length ())
270             throw new IllegalArgumentException JavaDoc ("non whitespace value");
271         else
272         {
273             setName (null);
274             setAssignment (null);
275             setValue (value);
276             setQuote ((char)0);
277         }
278     }
279
280     /**
281      * Create an attribute with the name and value given.
282      * Uses an equals sign as the assignment string if the value is not
283      * <code>null</code>, and calls {@link #setRawValue} to get the
284      * correct quoting.
285      * @param name The name of this attribute.
286      * @param value The value of this attribute.
287      */

288     public Attribute (String JavaDoc name, String JavaDoc value)
289     {
290         this (name, (null == value ? "" : "="), value, (char)0);
291     }
292
293     /**
294      * Create an attribute with the name, assignment string and value given.
295      * Calls {@link #setRawValue} to get the correct quoting.
296      * @param name The name of this attribute.
297      * @param assignment The assignment string of this attribute.
298      * @param value The value of this attribute.
299      */

300     public Attribute (String JavaDoc name, String JavaDoc assignment, String JavaDoc value)
301     {
302         this (name, assignment, value, (char)0);
303     }
304
305     /**
306      * Create an empty attribute.
307      * This will provide "" from the {@link #toString} and
308      * {@link #toString(StringBuffer)} methods.
309      */

310     public Attribute ()
311     {
312         this (null, null, null, (char)0);
313     }
314
315     /**
316      * Get the name of this attribute.
317      * The part before the equals sign, or the contents of the
318      * stand-alone attribute.
319      * @return The name, or <code>null</code> if it's just a whitepace
320      * 'attribute'.
321      */

322     public String JavaDoc getName ()
323     {
324         return (mName);
325     }
326
327     /**
328      * Get the name of this attribute.
329      * @param buffer The buffer to place the name in.
330      * @see #getName()
331      */

332     public void getName (StringBuffer JavaDoc buffer)
333     {
334         if (null != mName)
335             buffer.append (mName);
336     }
337
338     /**
339      * Set the name of this attribute.
340      * Set the part before the equals sign, or the contents of the
341      * stand-alone attribute.
342      * <em>WARNING:</em> Setting this to <code>null</code> can result in
343      * malformed HTML if the assignment string is not <code>null</code>.
344      * @param name The new name.
345      */

346     public void setName (String JavaDoc name)
347     {
348         mName = name;
349     }
350
351     /**
352      * Get the assignment string of this attribute.
353      * This is usually just an equals sign, but in poorly formed attributes it
354      * can include whitespace on either or both sides of an equals sign.
355      * @return The assignment string.
356      */

357     public String JavaDoc getAssignment ()
358     {
359         return (mAssignment);
360     }
361
362     /**
363      * Get the assignment string of this attribute.
364      * @param buffer The buffer to place the assignment string in.
365      * @see #getAssignment()
366      */

367     public void getAssignment (StringBuffer JavaDoc buffer)
368     {
369         if (null != mAssignment)
370             buffer.append (mAssignment);
371     }
372
373     /**
374      * Set the assignment string of this attribute.
375      * <em>WARNING:</em> Setting this property to other than an equals sign
376      * or <code>null</code> will result in malformed HTML. In the case of a
377      * <code>null</code>, the {@link #setValue value} should also be set to
378      * <code>null</code>.
379      * @param assignment The new assignment string.
380      */

381     public void setAssignment (String JavaDoc assignment)
382     {
383         mAssignment = assignment;
384     }
385
386     /**
387      * Get the value of the attribute.
388      * The part after the equals sign, or the text if it's just a whitepace
389      * 'attribute'.
390      * <em>NOTE:</em> This does not include any quotes that may have enclosed
391      * the value when it was read. To get the un-stripped value use
392      * {@link #getRawValue}.
393      * @return The value, or <code>null</code> if it's a stand-alone or
394      * empty attribute, or the text if it's just a whitepace 'attribute'.
395      */

396     public String JavaDoc getValue ()
397     {
398         return (mValue);
399     }
400
401     /**
402      * Get the value of the attribute.
403      * @param buffer The buffer to place the value in.
404      * @see #getValue()
405      */

406     public void getValue (StringBuffer JavaDoc buffer)
407     {
408         if (null != mValue)
409             buffer.append (mValue);
410     }
411
412     /**
413      * Set the value of the attribute.
414      * The part after the equals sign, or the text if it's a whitepace
415      * 'attribute'.
416      * <em>WARNING:</em> Setting this property to a value that needs to be
417      * quoted without also setting the quote character will result in malformed
418      * HTML.
419      * @param value The new value.
420      */

421     public void setValue (String JavaDoc value)
422     {
423         mValue = value;
424     }
425
426     /**
427      * Get the quote, if any, surrounding the value of the attribute, if any.
428      * @return Either ' or " if the attribute value was quoted, or zero
429      * if there are no quotes around it.
430      */

431     public char getQuote ()
432     {
433         return (mQuote);
434     }
435
436     /**
437      * Get the quote, if any, surrounding the value of the attribute, if any.
438      * @param buffer The buffer to place the quote in.
439      * @see #getQuote()
440      */

441     public void getQuote (StringBuffer JavaDoc buffer)
442     {
443         if (0 != mQuote)
444             buffer.append (mQuote);
445     }
446
447     /**
448      * Set the quote surrounding the value of the attribute.
449      * <em>WARNING:</em> Setting this property to zero will result in malformed
450      * HTML if the {@link #getValue value} needs to be quoted (i.e. contains
451      * whitespace).
452      * @param quote The new quote value.
453      */

454     public void setQuote (char quote)
455     {
456         mQuote = quote;
457     }
458
459     /**
460      * Get the raw value of the attribute.
461      * The part after the equals sign, or the text if it's just a whitepace
462      * 'attribute'. This includes the quotes around the value if any.
463      * @return The value, or <code>null</code> if it's a stand-alone attribute,
464      * or the text if it's just a whitepace 'attribute'.
465      */

466     public String JavaDoc getRawValue ()
467     {
468         char quote;
469         StringBuffer JavaDoc buffer;
470         String JavaDoc ret;
471
472         if (isValued ())
473         {
474             quote = getQuote ();
475             if (0 != quote)
476             {
477                 buffer = new StringBuffer JavaDoc (); // todo: can we get the value length?
478
buffer.append (quote);
479                 getValue (buffer);
480                 buffer.append (quote);
481                 ret = buffer.toString ();
482             }
483             else
484                 ret = getValue ();
485         }
486         else
487             ret = null;
488
489         return (ret);
490     }
491
492     /**
493      * Get the raw value of the attribute.
494      * The part after the equals sign, or the text if it's just a whitepace
495      * 'attribute'. This includes the quotes around the value if any.
496      * @param buffer The string buffer to append the attribute value to.
497      * @see #getRawValue()
498      */

499     public void getRawValue (StringBuffer JavaDoc buffer)
500     {
501         getQuote (buffer);
502         getValue (buffer);
503         getQuote (buffer);
504     }
505
506     /**
507      * Set the value of the attribute and the quote character.
508      * If the value is pure whitespace, assign it 'as is' and reset the
509      * quote character. If not, check for leading and trailing double or
510      * single quotes, and if found use this as the quote character and
511      * the inner contents of <code>value</code> as the real value.
512      * Otherwise, examine the string to determine if quotes are needed
513      * and an appropriate quote character if so. This may involve changing
514      * double quotes within the string to character references.
515      * @param value The new value.
516      */

517     public void setRawValue (String JavaDoc value)
518     {
519         char ch;
520         boolean needed;
521         boolean singleq;
522         boolean doubleq;
523         String JavaDoc ref;
524         StringBuffer JavaDoc buffer;
525         char quote;
526
527         quote = 0;
528         if ((null != value) && (0 != value.trim ().length ()))
529         {
530             if (value.startsWith ("'") && value.endsWith ("'") && (2 <= value.length ()))
531             {
532                 quote = '\'';
533                 value = value.substring (1, value.length () - 1);
534             }
535             else if (value.startsWith ("\"") && value.endsWith ("\"") && (2 <= value.length ()))
536             {
537                 quote = '"';
538                 value = value.substring (1, value.length () - 1);
539             }
540             else
541             {
542                 // first determine if there's whitespace in the value
543
// and while we're at it find a suitable quote character
544
needed = false;
545                 singleq = true;
546                 doubleq = true;
547                 for (int i = 0; i < value.length (); i++)
548                 {
549                     ch = value.charAt (i);
550                     if ('\'' == ch)
551                     {
552                         singleq = false;
553                         needed = true;
554                     }
555                     else if ('"' == ch)
556                     {
557                         doubleq = false;
558                         needed = true;
559                     }
560                     else if (!('-' == ch) && !('.' == ch) && !('_' == ch)
561                        && !(':' == ch) && !Character.isLetterOrDigit (ch))
562                     {
563                         needed = true;
564                     }
565                 }
566
567                 // now apply quoting
568
if (needed)
569                 {
570                     if (doubleq)
571                         quote = '"';
572                     else if (singleq)
573                         quote = '\'';
574                     else
575                     {
576                         // uh-oh, we need to convert some quotes into character
577
// references, so convert all double quotes into &#34;
578
quote = '"';
579                         ref = "&quot;"; // Translate.encode (quote);
580
// JDK 1.4: value = value.replaceAll ("\"", ref);
581
buffer = new StringBuffer JavaDoc (value.length() * 5);
582                         for (int i = 0; i < value.length (); i++)
583                         {
584                             ch = value.charAt (i);
585                             if (quote == ch)
586                                 buffer.append (ref);
587                             else
588                                 buffer.append (ch);
589                         }
590                         value = buffer.toString ();
591                     }
592                 }
593             }
594         }
595         setValue (value);
596         setQuote (quote);
597     }
598         
599     /**
600      * Predicate to determine if this attribute is whitespace.
601      * @return <code>true</code> if this attribute is whitespace,
602      * <code>false</code> if it is a real attribute.
603      */

604     public boolean isWhitespace ()
605     {
606         return (null == getName ());
607     }
608
609     /**
610      * Predicate to determine if this attribute has no equals sign (or value).
611      * @return <code>true</code> if this attribute is a standalone attribute.
612      * <code>false</code> if has an equals sign.
613      */

614     public boolean isStandAlone ()
615     {
616         return ((null != getName ()) && (null == getAssignment ()));
617     }
618
619     /**
620      * Predicate to determine if this attribute has an equals sign but no value.
621      * @return <code>true</code> if this attribute is an empty attribute.
622      * <code>false</code> if has an equals sign and a value.
623      */

624     public boolean isEmpty ()
625     {
626         return ((null != getAssignment ()) && (null == getValue ()));
627     }
628
629     /**
630      * Predicate to determine if this attribute has a value.
631      * @return <code>true</code> if this attribute has a value.
632      * <code>false</code> if it is empty or standalone.
633      */

634     public boolean isValued ()
635     {
636         return (null != getValue ());
637     }
638
639     /**
640      * Get the length of the string value of this attribute.
641      * @return The number of characters required to express this attribute.
642      */

643     public int getLength ()
644     {
645         String JavaDoc name;
646         String JavaDoc assignment;
647         String JavaDoc value;
648         char quote;
649         int ret;
650
651         ret = 0;
652         name = getName ();
653         if (null != name)
654             ret += name.length ();
655         assignment = getAssignment ();
656         if (null != assignment)
657             ret += assignment.length ();
658         value = getValue ();
659         if (null != value)
660             ret += value.length ();
661         quote = getQuote ();
662         if (0 != quote)
663             ret += 2;
664         
665         return (ret);
666     }
667
668     /**
669      * Get a text representation of this attribute.
670      * Suitable for insertion into a tag, the output is one of
671      * the forms:
672      * <code>
673      * <pre>
674      * value
675      * name
676      * name=
677      * name=value
678      * name='value'
679      * name="value"
680      * </pre>
681      * </code>
682      * @return A string that can be used within a tag.
683      */

684     public String JavaDoc toString ()
685     {
686         int length;
687         StringBuffer JavaDoc ret;
688
689         // get the size to avoid extra StringBuffer allocations
690
length = getLength ();
691         ret = new StringBuffer JavaDoc (length);
692         toString (ret);
693
694         return (ret.toString ());
695     }
696     
697     /**
698      * Get a text representation of this attribute.
699      * @param buffer The accumulator for placing the text into.
700      * @see #toString()
701      */

702     public void toString (StringBuffer JavaDoc buffer)
703     {
704         getName (buffer);
705         getAssignment (buffer);
706         getRawValue (buffer);
707     }
708
709 }
710
Popular Tags