KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tags > Tag


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tags/Tag.java,v 1.2.2.1 2004/10/24 01:22:59 sebb Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser.tags;
34
35 import java.util.Enumeration JavaDoc;
36 import java.util.HashSet JavaDoc;
37 import java.util.Hashtable JavaDoc;
38 import java.util.Map JavaDoc;
39
40 import org.htmlparser.Node;
41 import org.htmlparser.NodeReader;
42 import org.htmlparser.parserHelper.AttributeParser;
43 import org.htmlparser.parserHelper.TagParser;
44 import org.htmlparser.scanners.TagScanner;
45 import org.htmlparser.tags.data.TagData;
46 import org.htmlparser.util.NodeList;
47 import org.htmlparser.util.ParserException;
48 import org.htmlparser.visitors.NodeVisitor;
49
50 /**
51  * Tag represents a generic tag. This class allows users to register specific
52  * tag scanners, which can identify links, or image references. This tag asks the
53  * scanners to run over the text, and identify. It can be used to dynamically
54  * configure a parser.
55  * @author Kaarle Kaila 23.10.2001
56  */

57 public class Tag extends Node
58 {
59     public static final String JavaDoc TYPE = "TAG";
60     /**
61      * Constant used as value for the value of the tag name
62      * in parseParameters (Kaarle Kaila 3.8.2001)
63      */

64     public final static String JavaDoc TAGNAME = "$<TAGNAME>$";
65     public final static String JavaDoc EMPTYTAG = "$<EMPTYTAG>$";
66     private final static int TAG_BEFORE_PARSING_STATE = 1;
67     private final static int TAG_BEGIN_PARSING_STATE = 2;
68     private final static int TAG_FINISHED_PARSING_STATE = 3;
69     private final static int TAG_ILLEGAL_STATE = 4;
70     private final static int TAG_IGNORE_DATA_STATE = 5;
71     private final static int TAG_IGNORE_BEGIN_TAG_STATE = 6;
72     private final static String JavaDoc EMPTY_STRING = "";
73
74     private static AttributeParser paramParser = new AttributeParser();
75     private static TagParser tagParser;
76     /**
77      * Tag contents will have the contents of the comment tag.
78      */

79     protected StringBuffer JavaDoc tagContents;
80     private boolean emptyXmlTag = false;
81     /**
82      * tag parameters parsed into this hashtable
83      * not implemented yet
84      * added by Kaarle Kaila 23.10.2001
85      */

86     protected Hashtable JavaDoc attributes = null;
87
88     /**
89      * Scanner associated with this tag (useful for extraction of filtering data from a
90      * HTML node)
91      */

92     protected TagScanner thisScanner = null;
93     private java.lang.String JavaDoc tagLine;
94
95     /**
96      * The combined text of all the lines spanned by this tag
97      */

98     private String JavaDoc[] tagLines;
99
100     /**
101      * The line number on which this tag starts
102      */

103     private int startLine;
104
105     /**
106      * Set of tags that breaks the flow.
107      */

108     protected static HashSet JavaDoc breakTags;
109     static {
110         breakTags = new HashSet JavaDoc(30);
111         breakTags.add("BLOCKQUOTE");
112         breakTags.add("BODY");
113         breakTags.add("BR");
114         breakTags.add("CENTER");
115         breakTags.add("DD");
116         breakTags.add("DIR");
117         breakTags.add("DIV");
118         breakTags.add("DL");
119         breakTags.add("DT");
120         breakTags.add("FORM");
121         breakTags.add("H1");
122         breakTags.add("H2");
123         breakTags.add("H3");
124         breakTags.add("H4");
125         breakTags.add("H5");
126         breakTags.add("H6");
127         breakTags.add("HEAD");
128         breakTags.add("HR");
129         breakTags.add("HTML");
130         breakTags.add("ISINDEX");
131         breakTags.add("LI");
132         breakTags.add("MENU");
133         breakTags.add("NOFRAMES");
134         breakTags.add("OL");
135         breakTags.add("P");
136         breakTags.add("PRE");
137         breakTags.add("TD");
138         breakTags.add("TH");
139         breakTags.add("TITLE");
140         breakTags.add("UL");
141     }
142
143     /**
144      * Set the Tag with the beginning posn, ending posn and tag contents (in
145      * a tagData object.
146      * @param tagData The data for this tag
147      */

148     public Tag(TagData tagData)
149     {
150         super(tagData.getTagBegin(), tagData.getTagEnd());
151         this.startLine = tagData.getStartLine();
152         this.tagContents = new StringBuffer JavaDoc();
153         this.tagContents.append(tagData.getTagContents());
154         this.tagLine = tagData.getTagLine();
155         this.tagLines = new String JavaDoc[] { tagData.getTagLine()};
156         this.emptyXmlTag = tagData.isEmptyXmlTag();
157     }
158
159     public void append(char ch)
160     {
161         tagContents.append(ch);
162     }
163
164     public void append(String JavaDoc ch)
165     {
166         tagContents.append(ch);
167     }
168
169     /**
170      * Locate the tag withing the input string, by parsing from the given position
171      * @param reader HTML reader to be provided so as to allow reading of next line
172      * @param input Input String
173      * @param position Position to start parsing from
174      */

175     public static Tag find(NodeReader reader, String JavaDoc input, int position)
176     {
177         return tagParser.find(reader, input, position);
178     }
179
180     /**
181      * This method is not to be called by any scanner or tag. It is
182      * an expensive method, hence it has been made private. However,
183      * there might be some circumstances when a scanner wishes to force
184      * parsing of attributes over and above what has already been parsed.
185      * To make the choice clear - we have a method - redoParseAttributes(),
186      * which can be used.
187      * @return Hashtable
188      */

189     private Hashtable JavaDoc parseAttributes()
190     {
191         return paramParser.parseAttributes(this);
192     }
193
194     /**
195      * In case the tag is parsed at the scan method this will return value of a
196      * parameter not implemented yet
197      * @param name of parameter
198      */

199     public String JavaDoc getAttribute(String JavaDoc name)
200     {
201         return (String JavaDoc) getAttributes().get(name.toUpperCase());
202     }
203
204     /**
205      * Set attribute with given key, value pair.
206      * @param key
207      * @param value
208      */

209     public void setAttribute(String JavaDoc key, String JavaDoc value)
210     {
211         attributes.put(key, value);
212     }
213
214     /**
215      * In case the tag is parsed at the scan method this will return value of a
216      * parameter not implemented yet
217      * @param name of parameter
218      * @deprecated use getAttribute instead
219      */

220     public String JavaDoc getParameter(String JavaDoc name)
221     {
222         return (String JavaDoc) getAttributes().get(name.toUpperCase());
223     }
224
225     /**
226      * Gets the attributes in the tag.
227      * @return Returns a Hashtable of attributes
228      */

229     public Hashtable JavaDoc getAttributes()
230     {
231         if (attributes == null)
232         {
233             attributes = parseAttributes();
234         }
235         return attributes;
236     }
237
238     public String JavaDoc getTagName()
239     {
240         return (String JavaDoc) getAttributes().get(TAGNAME);
241     }
242
243     /**
244      * Returns the line where the tag was found
245      * @return java.lang.String
246      */

247     public String JavaDoc getTagLine()
248     {
249         return tagLine;
250     }
251
252     /**
253      * Returns the combined text of all the lines spanned by this tag
254      * @return java.lang.String
255      */

256     public String JavaDoc[] getTagLines()
257     {
258         return tagLines;
259     }
260
261     /**
262      * Return the text contained in this tag
263      */

264     public String JavaDoc getText()
265     {
266         return tagContents.toString();
267     }
268
269     /**
270      * Return the scanner associated with this tag.
271      */

272     public TagScanner getThisScanner()
273     {
274         return thisScanner;
275     }
276
277     /**
278      * Extract the first word from the given string.
279      * Words are delimited by whitespace or equals signs.
280      * @param s The string to get the word from.
281      * @return The first word.
282      */

283     public static String JavaDoc extractWord(String JavaDoc s)
284     {
285         int length;
286         boolean parse;
287         char ch;
288         StringBuffer JavaDoc ret;
289
290         length = s.length();
291         ret = new StringBuffer JavaDoc(length);
292         parse = true;
293         for (int i = 0; i < length && parse; i++)
294         {
295             ch = s.charAt(i);
296             if (Character.isWhitespace(ch) || ch == '=')
297                 parse = false;
298             else
299                 ret.append(Character.toUpperCase(ch));
300         }
301
302         return (ret.toString());
303     }
304
305     /**
306      * Scan the tag to see using the registered scanners, and attempt identification.
307      * @param url URL at which HTML page is located
308      * @param reader The NodeReader that is to be used for reading the url
309      */

310     public Node scan(Map JavaDoc scanners, String JavaDoc url, NodeReader reader)
311         throws ParserException
312     {
313         if (tagContents.length() == 0)
314             return this;
315         try
316         {
317             boolean found = false;
318             Node retVal = null;
319             // Find the first word in the scanners
320
String JavaDoc firstWord = extractWord(tagContents.toString());
321             // Now, get the scanner associated with this.
322
TagScanner scanner = (TagScanner) scanners.get(firstWord);
323
324             // Now do a deep check
325
if (scanner != null
326                 && scanner.evaluate(
327                     tagContents.toString(),
328                     reader.getPreviousOpenScanner()))
329             {
330                 found = true;
331                 TagScanner save;
332                 save = reader.getPreviousOpenScanner();
333                 reader.setPreviousOpenScanner(scanner);
334                 retVal = scanner.createScannedNode(this, url, reader, tagLine);
335                 reader.setPreviousOpenScanner(save);
336             }
337
338             if (!found)
339                 return this;
340             else
341             {
342                 return retVal;
343             }
344         }
345         catch (Exception JavaDoc e)
346         {
347             String JavaDoc errorMsg;
348             if (tagContents != null)
349                 errorMsg = tagContents.toString();
350             else
351                 errorMsg = "null";
352             throw new ParserException(
353                 "Tag.scan() : Error while scanning tag, tag contents = "
354                     + errorMsg
355                     + ", tagLine = "
356                     + tagLine,
357                 e);
358         }
359     }
360
361     /**
362      * Sets the parsed.
363      * @param parsed The parsed to set
364      */

365     public void setAttributes(Hashtable JavaDoc attributes)
366     {
367         this.attributes = attributes;
368     }
369
370     /**
371      * Sets the nodeBegin.
372      * @param nodeBegin The nodeBegin to set
373      */

374     public void setTagBegin(int tagBegin)
375     {
376         this.nodeBegin = tagBegin;
377     }
378
379     /**
380      * Gets the nodeBegin.
381      * @return The nodeBegin value.
382      */

383     public int getTagBegin()
384     {
385         return (nodeBegin);
386     }
387
388     /**
389      * Sets the nodeEnd.
390      * @param nodeEnd The nodeEnd to set
391      */

392     public void setTagEnd(int tagEnd)
393     {
394         this.nodeEnd = tagEnd;
395     }
396
397     /**
398      * Gets the nodeEnd.
399      * @return The nodeEnd value.
400      */

401     public int getTagEnd()
402     {
403         return (nodeEnd);
404     }
405
406     /**
407      * Gets the line number on which this tag starts.
408      * @return the start line number
409      */

410     public int getTagStartLine()
411     {
412         return startLine;
413     }
414
415     /**
416      * Gets the line number on which this tag ends.
417      * @return the end line number
418      */

419     public int getTagEndLine()
420     {
421         return startLine + tagLines.length - 1;
422     }
423
424     public void setTagLine(java.lang.String JavaDoc newTagLine)
425     {
426         tagLine = newTagLine;
427
428         // Note: Incur the overhead of resizing each time (versus
429
// preallocating a larger array), since the average tag
430
// generally doesn't span multiple lines
431
String JavaDoc[] newTagLines = new String JavaDoc[tagLines.length + 1];
432         for (int i = 0; i < tagLines.length; i++)
433             newTagLines[i] = tagLines[i];
434         newTagLines[tagLines.length] = newTagLine;
435         tagLines = newTagLines;
436     }
437
438     public void setText(String JavaDoc text)
439     {
440         tagContents = new StringBuffer JavaDoc(text);
441     }
442
443     public void setThisScanner(TagScanner scanner)
444     {
445         thisScanner = scanner;
446     }
447
448     public String JavaDoc toPlainTextString()
449     {
450         return EMPTY_STRING;
451     }
452
453     /**
454      * A call to a tag's toHTML() method will render it in HTML
455      * Most tags that do not have children and inherit from Tag,
456      * do not need to override toHTML().
457      * @see org.htmlparser.Node#toHTML()
458      */

459     public String JavaDoc toHtml()
460     {
461         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
462         sb.append("<");
463         sb.append(getTagName());
464         if (containsMoreThanOneKey())
465             sb.append(" ");
466         String JavaDoc key, value;
467         String JavaDoc empty = null;
468         int i = 0;
469         for (Enumeration JavaDoc e = attributes.keys(); e.hasMoreElements();)
470         {
471             key = (String JavaDoc) e.nextElement();
472             i++;
473             if (!key.equals(TAGNAME))
474             {
475                 if (key.equals(EMPTYTAG))
476                 {
477                     empty = "/";
478                 }
479                 else
480                 {
481                     value = getAttribute(key);
482                     sb.append(key + "=\"" + value + "\"");
483                     if (i < attributes.size())
484                         sb.append(" ");
485                 }
486             }
487         }
488         if (empty != null)
489             sb.append(empty);
490         if (isEmptyXmlTag())
491             sb.append("/");
492         sb.append(">");
493         return sb.toString();
494     }
495
496     private boolean containsMoreThanOneKey()
497     {
498         return attributes.keySet().size() > 1;
499     }
500
501     /**
502      * Print the contents of the tag
503      */

504     public String JavaDoc toString()
505     {
506         return "Begin Tag : "
507             + tagContents
508             + "; begins at : "
509             + elementBegin()
510             + "; ends at : "
511             + elementEnd();
512     }
513
514     /**
515      * Sets the tagParser.
516      * @param tagParser The tagParser to set
517      */

518     public static void setTagParser(TagParser tagParser)
519     {
520         Tag.tagParser = tagParser;
521     }
522
523     /**
524      * Determines if the given tag breaks the flow of text.
525      * @return <code>true</code> if following text would start on a new line,
526      * <code>false</code> otherwise.
527      */

528     public boolean breaksFlow()
529     {
530         return (breakTags.contains(getText().toUpperCase()));
531     }
532
533     /**
534      * This method verifies that the current tag matches the provided
535      * filter. The match is based on the string object and not its contents,
536      * so ensure that you are using static final filter strings provided
537      * in the tag classes.
538      * @see org.htmlparser.Node#collectInto(NodeList, String)
539      */

540     public void collectInto(NodeList collectionList, String JavaDoc filter)
541     {
542         if (thisScanner != null && thisScanner.getFilter().equals(filter))
543             collectionList.add(this);
544     }
545
546     /**
547      * Returns table of attributes in the tag
548      * @return Hashtable
549      * @deprecated This method is deprecated. Use getAttributes() instead.
550      */

551     public Hashtable JavaDoc getParsed()
552     {
553         return attributes;
554     }
555
556     /**
557      * Sometimes, a scanner may need to request a re-evaluation of the
558      * attributes in a tag. This may happen when there is some correction
559      * activity. An example of its usage can be found in ImageTag.
560      * <br>
561      * <B>Note:<B> This is an intensive task, hence call only when
562      * really necessary
563      * @return Hashtable
564      */

565     public Hashtable JavaDoc redoParseAttributes()
566     {
567         return parseAttributes();
568     }
569
570     public void accept(NodeVisitor visitor)
571     {
572         visitor.visitTag(this);
573     }
574
575     public String JavaDoc getType()
576     {
577         return TYPE;
578     }
579
580     /**
581      * Is this an empty xml tag of the form<br>
582      * &lt;tag/&gt;
583      * @return boolean
584      */

585     public boolean isEmptyXmlTag()
586     {
587         return emptyXmlTag;
588     }
589
590     public void setEmptyXmlTag(boolean emptyXmlTag)
591     {
592         this.emptyXmlTag = emptyXmlTag;
593     }
594
595 }
596
Popular Tags