KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tags > ImageTag


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/07/17 13:45:04 $
10
// $Revision: 1.48 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.tags;
28
29 import java.util.Locale JavaDoc;
30 import java.util.Vector JavaDoc;
31
32 import org.htmlparser.Attribute;
33 import org.htmlparser.nodes.TagNode;
34 import org.htmlparser.util.ParserUtils;
35
36 /**
37  * Identifies an image tag.
38  */

39 public class ImageTag
40     extends
41         TagNode
42 {
43     /**
44      * The set of names handled by this tag.
45      */

46     private static final String JavaDoc[] mIds = new String JavaDoc[] {"IMG"};
47
48     /**
49      * Holds the set value of the SRC attribute, since this can differ
50      * from the attribute value due to relative references resolved by
51      * the scanner.
52      */

53     protected String JavaDoc imageURL;
54
55     /**
56      * Create a new image tag.
57      */

58     public ImageTag ()
59     {
60         imageURL = null;
61     }
62
63     /**
64      * Return the set of names handled by this tag.
65      * @return The names to be matched that create tags of this type.
66      */

67     public String JavaDoc[] getIds ()
68     {
69         return (mIds);
70     }
71
72     /**
73     * Extract the location of the image
74     * Given the tag (with attributes), and the url of the html page in which
75     * this tag exists, perform best effort to extract the 'intended' URL.
76     * Attempts to handle such attributes as:
77     * <pre>
78     * &lt;IMG SRC=http://www.redgreen.com&gt; - normal
79     * &lt;IMG SRC =http://www.redgreen.com&gt; - space between attribute name and equals sign
80     * &lt;IMG SRC= http://www.redgreen.com&gt; - space between equals sign and attribute value
81     * &lt;IMG SRC = http://www.redgreen.com&gt; - space both sides of equals sign
82     * </pre>
83     */

84     public String JavaDoc extractImageLocn ()
85     {
86         Vector JavaDoc attributes;
87         int size;
88         Attribute attribute;
89         String JavaDoc string;
90         String JavaDoc data;
91         int state;
92         String JavaDoc name;
93         String JavaDoc ret;
94     
95         // TODO: move this logic into the lexer?
96

97         ret = "";
98         state = 0;
99         attributes = getAttributesEx ();
100         size = attributes.size ();
101         for (int i = 0; (i < size) && (state < 3); i++)
102         {
103             attribute = (Attribute)attributes.elementAt (i);
104             string = attribute.getName ();
105             data = attribute.getValue ();
106             switch (state)
107             {
108                 case 0: // looking for 'src'
109
if (null != string)
110                     {
111                         name = string.toUpperCase (Locale.ENGLISH);
112                         if (name.equals ("SRC"))
113                         {
114                             state = 1;
115                             if (null != data)
116                             {
117                                 if ("".equals (data))
118                                     state = 2; // empty attribute, SRC=
119
else
120                                 {
121                                     ret = data;
122                                     i = size; // exit fast
123
}
124                             }
125
126                         }
127                         else if (name.startsWith ("SRC"))
128                         {
129                             // missing equals sign
130
string = string.substring (3);
131                             // remove any double quotes from around string
132
if (string.startsWith ("\"") && string.endsWith ("\"") && (1 < string.length ()))
133                                 string = string.substring (1, string.length () - 1);
134                             // remove any single quote from around string
135
if (string.startsWith ("'") && string.endsWith ("'") && (1 < string.length ()))
136                                 string = string.substring (1, string.length () - 1);
137                             ret = string;
138                             state = 0; // go back to searching for SRC
139
// because, maybe we found SRCXXX
140
// where XXX isn't a URL
141
}
142                     }
143                     break;
144                 case 1: // looking for equals sign
145
if (null != string)
146                     {
147                         if (string.startsWith ("="))
148                         {
149                             state = 2;
150                             if (1 < string.length ())
151                             {
152                                 ret = string.substring (1);
153                                 state = 0; // keep looking ?
154
}
155                             else if (null != data)
156                             {
157                                 ret = string.substring (1);
158                                 state = 0; // keep looking ?
159
}
160                         }
161                     }
162                     break;
163                 case 2: // looking for a valueless attribute that could be a relative or absolute URL
164
if (null != string)
165                     {
166                         if (null == data)
167                             ret = string;
168                         state = 0; // only check first non-whitespace item
169
// not every valid attribute after an equals
170
}
171                     break;
172                 default:
173                     throw new IllegalStateException JavaDoc ("we're not supposed to in state " + state);
174             }
175         }
176         ret = ParserUtils.removeChars (ret, '\n');
177         ret = ParserUtils.removeChars (ret, '\r');
178         
179         return (ret);
180     }
181
182     /**
183      * Returns the location of the image
184      */

185     public String JavaDoc getImageURL()
186     {
187         if (null == imageURL)
188             if (null != getPage ())
189                 imageURL = getPage ().getAbsoluteURL (extractImageLocn ());
190
191         return (imageURL);
192     }
193
194     public void setImageURL (String JavaDoc url)
195     {
196         imageURL = url;
197         setAttribute ("SRC", imageURL);
198     }
199 }
200
Popular Tags