KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > lexer > PageAttribute


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageAttribute.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/02/13 22:45:47 $
10
// $Revision: 1.2 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.lexer;
28
29 import org.htmlparser.Attribute;
30 import org.htmlparser.lexer.Page;
31
32 /**
33  * An attribute within a tag on a page.
34  * This attribute is similar to Attribute but 'lazy loaded' from the
35  * <code>Page</code> by providing the page and cursor offsets
36  * into the page for the name and value. This is done for speed, since
37  * if the name and value are not needed we can avoid the cost and memory
38  * overhead of creating the strings.
39  * <p>
40  * Thus the property getters, defer to the base class unless the property
41  * is null, in which case an attempt is made to read it from the underlying
42  * page. Optimizations in the predicates and length calculation defer the
43  * actual instantiation of strings until absolutely needed.
44  */

45 public class PageAttribute
46     extends
47         Attribute
48 {
49     /**
50      * The page this attribute is extracted from.
51      */

52     protected Page mPage;
53
54     /**
55      * The starting offset of the name within the page.
56      * If negative, the name is considered <code>null</code>.
57      */

58     protected int mNameStart;
59
60     /**
61      * The ending offset of the name within the page.
62      */

63     protected int mNameEnd;
64
65     /**
66      * The starting offset of the value within the page.
67      * If negative, the value is considered <code>null</code>.
68      */

69     protected int mValueStart;
70
71     /**
72      * The ending offset of the name within the page.
73      */

74     protected int mValueEnd;
75
76     /**
77      * Create an attribute.
78      * @param page The page containing the attribute.
79      * @param name_start The starting offset of the name within the page.
80      * If this is negative, the name is considered null.
81      * @param name_end The ending offset of the name within the page.
82      * @param value_start he starting offset of the value within the page.
83      * If this is negative, the value is considered null.
84      * @param value_end The ending offset of the value within the page.
85      * @param quote The quote, if any, surrounding the value of the attribute,
86      * (i.e. ' or "), or zero if none.
87      */

88     public PageAttribute (Page page, int name_start, int name_end, int value_start, int value_end, char quote)
89     {
90         mPage = page;
91         mNameStart = name_start;
92         mNameEnd = name_end;
93         mValueStart = value_start;
94         mValueEnd = value_end;
95         setName (null);
96         setAssignment (null);
97         setValue (null);
98         setQuote (quote);
99     }
100
101     //
102
// provide same constructors as super class
103
//
104

105     private void init ()
106     {
107         mPage = null;
108         mNameStart = -1;
109         mNameEnd = -1;
110         mValueStart = -1;
111         mValueEnd = -1;
112     }
113
114     /**
115      * Create an attribute with the name, assignment string, value and quote given.
116      * If the quote value is zero, assigns the value using {@link #setRawValue}
117      * which sets the quote character to a proper value if necessary.
118      * @param name The name of this attribute.
119      * @param assignment The assignment string of this attribute.
120      * @param value The value of this attribute.
121      * @param quote The quote around the value of this attribute.
122      */

123     public PageAttribute (String JavaDoc name, String JavaDoc assignment, String JavaDoc value, char quote)
124     {
125         super (name, assignment, value, quote);
126         init ();
127     }
128
129     /**
130      * Create an attribute with the name, value and quote given.
131      * Uses an equals sign as the assignment string if the value is not
132      * <code>null</code>, and calls {@link #setRawValue} to get the
133      * correct quoting if <code>quote</code> is zero.
134      * @param name The name of this attribute.
135      * @param value The value of this attribute.
136      * @param quote The quote around the value of this attribute.
137      */

138     public PageAttribute (String JavaDoc name, String JavaDoc value, char quote)
139     {
140         super (name, value, quote);
141         init ();
142     }
143
144     /**
145      * Create a whitespace attribute with the value given.
146      * @param value The value of this attribute.
147      * @exception IllegalArgumentException if the value contains other than
148      * whitespace. To set a real value use {@link Attribute#Attribute(String)}.
149      */

150     public PageAttribute (String JavaDoc value)
151     {
152         super (value);
153         init ();
154     }
155
156     /**
157      * Create an attribute with the name and value given.
158      * Uses an equals sign as the assignment string if the value is not
159      * <code>null</code>, and calls {@link #setRawValue} to get the
160      * correct quoting.
161      * @param name The name of this attribute.
162      * @param value The value of this attribute.
163      */

164     public PageAttribute (String JavaDoc name, String JavaDoc value)
165     {
166         super (name, value);
167         init ();
168     }
169
170     /**
171      * Create an attribute with the name, assignment string and value given.
172      * Calls {@link #setRawValue} to get the correct quoting.
173      * @param name The name of this attribute.
174      * @param assignment The assignment string of this attribute.
175      * @param value The value of this attribute.
176      */

177     public PageAttribute (String JavaDoc name, String JavaDoc assignment, String JavaDoc value)
178     {
179         super (name, assignment, value);
180         init ();
181     }
182
183     /**
184      * Create an empty attribute.
185      * This will provide "" from the {@link #toString} and
186      * {@link #toString(StringBuffer)} methods.
187      */

188     public PageAttribute ()
189     {
190         super ();
191         init ();
192     }
193
194     /**
195      * Get the name of this attribute.
196      * The part before the equals sign, or the contents of the
197      * stand-alone attribute.
198      * @return The name, or <code>null</code> if it's just a whitepace
199      * 'attribute'.
200      */

201     public String JavaDoc getName ()
202     {
203         String JavaDoc ret;
204
205         ret = super.getName ();
206         if (null == ret)
207         {
208             if ((null != mPage) && (0 <= mNameStart))
209             {
210                 ret = mPage.getText (mNameStart, mNameEnd);
211                 setName (ret); // cache the value
212
}
213         }
214
215         return (ret);
216     }
217
218     /**
219      * Get the name of this attribute.
220      * @param buffer The buffer to place the name in.
221      * @see #getName()
222      */

223     public void getName (StringBuffer JavaDoc buffer)
224     {
225         String JavaDoc name;
226
227         name = super.getName ();
228         if (null == name)
229         {
230             if ((null != mPage) && (0 <= mNameStart))
231                 mPage.getText (buffer, mNameStart, mNameEnd);
232         }
233         else
234             buffer.append (name);
235     }
236
237     /**
238      * Get the assignment string of this attribute.
239      * This is usually just an equals sign, but in poorly formed attributes it
240      * can include whitespace on either or both sides of an equals sign.
241      * @return The assignment string.
242      */

243     public String JavaDoc getAssignment ()
244     {
245         String JavaDoc ret;
246
247         ret = super.getAssignment ();
248         if (null == ret)
249         {
250             if ((null != mPage) && (0 <= mNameEnd) && (0 <= mValueStart))
251             {
252                 ret = mPage.getText (mNameEnd, mValueStart);
253                 // remove a possible quote included in the assignment
254
// since mValueStart points at the real start of the value
255
if (ret.endsWith ("\"") || ret.endsWith ("'"))
256                     ret = ret.substring (0, ret.length () - 1);
257                 setAssignment (ret); // cache the value
258
}
259         }
260
261         return (ret);
262     }
263
264     /**
265      * Get the assignment string of this attribute.
266      * @param buffer The buffer to place the assignment string in.
267      * @see #getAssignment()
268      */

269     public void getAssignment (StringBuffer JavaDoc buffer)
270     {
271         int length;
272         char ch;
273         String JavaDoc assignment;
274
275         assignment = super.getAssignment ();
276         if (null == assignment)
277         {
278             if ((null != mPage) && (0 <= mNameEnd) && (0 <= mValueStart))
279             {
280                 mPage.getText (buffer, mNameEnd, mValueStart);
281                 // remove a possible quote included in the assignment
282
// since mValueStart points at the real start of the value
283
length = buffer.length () - 1;
284                 ch = buffer.charAt (length);
285                 if (('\'' == ch) || ('"' == ch))
286                     buffer.setLength (length);
287             }
288         }
289         else
290             buffer.append (assignment);
291     }
292
293     /**
294      * Get the value of the attribute.
295      * The part after the equals sign, or the text if it's just a whitepace
296      * 'attribute'.
297      * <em>NOTE:</em> This does not include any quotes that may have enclosed
298      * the value when it was read. To get the un-stripped value use
299      * {@link #getRawValue}.
300      * @return The value, or <code>null</code> if it's a stand-alone or
301      * empty attribute, or the text if it's just a whitepace 'attribute'.
302      */

303     public String JavaDoc getValue ()
304     {
305         String JavaDoc ret;
306
307         ret = super.getValue ();
308         if (null == ret)
309         {
310             if ((null != mPage) && (0 <= mValueEnd))
311             {
312                 ret = mPage.getText (mValueStart, mValueEnd);
313                 setValue (ret); // cache the value
314
}
315         }
316
317         return (ret);
318     }
319
320     /**
321      * Get the value of the attribute.
322      * @param buffer The buffer to place the value in.
323      * @see #getValue()
324      */

325     public void getValue (StringBuffer JavaDoc buffer)
326     {
327         String JavaDoc value;
328
329         value = super.getValue ();
330         if (null == value)
331         {
332             if ((null != mPage) && (0 <= mValueEnd))
333                 mPage.getText (buffer, mNameStart, mNameEnd);
334         }
335         else
336             buffer.append (value);
337     }
338
339     /**
340      * Get the raw value of the attribute.
341      * The part after the equals sign, or the text if it's just a whitepace
342      * 'attribute'. This includes the quotes around the value if any.
343      * @return The value, or <code>null</code> if it's a stand-alone attribute,
344      * or the text if it's just a whitepace 'attribute'.
345      */

346     public String JavaDoc getRawValue ()
347     {
348         char quote;
349         StringBuffer JavaDoc buffer;
350         String JavaDoc ret;
351
352         ret = getValue ();
353         if (null != ret && (0 != (quote = getQuote ())))
354         {
355             buffer = new StringBuffer JavaDoc (ret.length() + 2);
356             buffer.append (quote);
357             buffer.append (ret);
358             buffer.append (quote);
359             ret = buffer.toString ();
360         }
361
362         return (ret);
363     }
364
365     /**
366      * Get the raw value of the attribute.
367      * The part after the equals sign, or the text if it's just a whitepace
368      * 'attribute'. This includes the quotes around the value if any.
369      * @param buffer The string buffer to append the attribute value to.
370      * @see #getRawValue()
371      */

372     public void getRawValue (StringBuffer JavaDoc buffer)
373     {
374         char quote;
375
376         if (null == mValue)
377         {
378             if (0 <= mValueEnd)
379             {
380                 if (0 != (quote = getQuote ()))
381                     buffer.append (quote);
382                 if (mValueStart != mValueEnd)
383                     mPage.getText (buffer, mValueStart, mValueEnd);
384                 if (0 != quote)
385                     buffer.append (quote);
386             }
387         }
388         else
389         {
390             if (0 != (quote = getQuote ()))
391                 buffer.append (quote);
392             buffer.append (mValue);
393             if (0 != quote)
394                 buffer.append (quote);
395         }
396     }
397
398     /**
399      * Get the page this attribute is anchored to, if any.
400      * @return The page used to construct this attribute, or null if this
401      * is just a regular attribute.
402      */

403     public Page getPage ()
404     {
405         return (mPage);
406     }
407
408     /**
409      * Set the page this attribute is anchored to.
410      * @param page The page to be used to construct this attribute.
411      * Note: If you set this you probably also want to uncache the property
412      * values by setting them to null.
413      */

414     public void setPage (Page page)
415     {
416         mPage = page;
417     }
418
419     /**
420      * Get the starting position of the attribute name.
421      * @return The offset into the page at which the name begins.
422      */

423     public int getNameStartPosition ()
424     {
425         return (mNameStart);
426     }
427
428     /**
429      * Set the starting position of the attribute name.
430      * @param start The new offset into the page at which the name begins.
431      */

432     public void setNameStartPosition (int start)
433     {
434         mNameStart = start;
435         setName (null); // uncache value
436
}
437
438     /**
439      * Get the ending position of the attribute name.
440      * @return The offset into the page at which the name ends.
441      */

442     public int getNameEndPosition ()
443     {
444         return (mNameEnd);
445     }
446
447     /**
448      * Set the ending position of the attribute name.
449      * @param end The new offset into the page at which the name ends.
450      */

451     public void setNameEndPosition (int end)
452     {
453         mNameEnd = end;
454         setName (null); // uncache value
455
setAssignment (null); // uncache value
456
}
457
458     /**
459      * Get the starting position of the attribute value.
460      * @return The offset into the page at which the value begins.
461      */

462     public int getValueStartPosition ()
463     {
464         return (mValueStart);
465     }
466
467     /**
468      * Set the starting position of the attribute value.
469      * @param start The new offset into the page at which the value begins.
470      */

471     public void setValueStartPosition (int start)
472     {
473         mValueStart = start;
474         setAssignment (null); // uncache value
475
setValue (null); // uncache value
476
}
477
478     /**
479      * Get the ending position of the attribute value.
480      * @return The offset into the page at which the value ends.
481      */

482     public int getValueEndPosition ()
483     {
484         return (mValueEnd);
485     }
486
487     /**
488      * Set the ending position of the attribute value.
489      * @param end The new offset into the page at which the value ends.
490      */

491     public void setValueEndPosition (int end)
492     {
493         mValueEnd = end;
494         setValue (null); // uncache value
495
}
496
497     /**
498      * Predicate to determine if this attribute is whitespace.
499      * @return <code>true</code> if this attribute is whitespace,
500      * <code>false</code> if it is a real attribute.
501      */

502     public boolean isWhitespace ()
503     {
504         return (((null == super.getName ()) && (null == mPage))
505             || ((null != mPage) && (0 > mNameStart)));
506     }
507
508     /**
509      * Predicate to determine if this attribute has no equals sign (or value).
510      * @return <code>true</code> if this attribute is a standalone attribute.
511      * <code>false</code> if has an equals sign.
512      */

513     public boolean isStandAlone ()
514     {
515         return (!isWhitespace () // not whitespace
516
&& (null == super.getAssignment ()) // and no explicit assignment provided
517
&& !isValued () // and has no value
518
&& ((null == mPage) // and either its not coming from a page
519
// or it is coming from a page and it doesn't have an assignment part
520
|| ((null != mPage) && (0 <= mNameEnd) && (0 > mValueStart))));
521     }
522
523     /**
524      * Predicate to determine if this attribute has an equals sign but no value.
525      * @return <code>true</code> if this attribute is an empty attribute.
526      * <code>false</code> if has an equals sign and a value.
527      */

528     public boolean isEmpty ()
529     {
530         return (!isWhitespace () // not whitespace
531
&& !isStandAlone () // and not standalone
532
&& (null == super.getValue ()) // and no explicit value provided
533
&& ((null == mPage) // and either its not coming from a page
534
// or it is coming from a page and has no value
535
|| ((null != mPage) && (0 > mValueEnd))));
536     }
537
538     /**
539      * Predicate to determine if this attribute has a value.
540      * @return <code>true</code> if this attribute has a value.
541      * <code>false</code> if it is empty or standalone.
542      */

543     public boolean isValued ()
544     {
545         return ((null != super.getValue ()) // an explicit value provided
546
// or it is coming from a page and has a non-empty value
547
|| ((null != mPage) && ((0 <= mValueStart) && (0 <= mValueEnd)) && (mValueStart != mValueEnd)));
548     }
549
550     /**
551      * Get the length of the string value of this attribute.
552      * @return The number of characters required to express this attribute.
553      */

554     public int getLength ()
555     {
556         String JavaDoc name;
557         String JavaDoc assignment;
558         String JavaDoc value;
559         char quote;
560         int ret;
561
562         ret = 0;
563         name = super.getName ();
564         if (null != name)
565             ret += name.length ();
566         else if ((null != mPage) && (0 <= mNameStart) && (0 <= mNameEnd))
567             ret += mNameEnd - mNameStart;
568         assignment = super.getAssignment ();
569         if (null != assignment)
570             ret += assignment.length ();
571         else if ((null != mPage) && (0 <= mNameEnd) && (0 <= mValueStart))
572             ret += mValueStart - mNameEnd;
573         value = super.getValue ();
574         if (null != value)
575             ret += value.length ();
576         else if ((null != mPage) && (0 <= mValueStart) && (0 <= mValueEnd))
577             ret += mValueEnd - mValueStart;
578         quote = getQuote ();
579         if (0 != quote)
580             ret += 2;
581         
582         return (ret);
583     }
584 }
585
Popular Tags