KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tags > LinkTag


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/02/13 22:45:48 $
10
// $Revision: 1.53 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.tags;
28
29 import org.htmlparser.Node;
30 import org.htmlparser.util.ParserUtils;
31 import org.htmlparser.util.SimpleNodeIterator;
32
33 /**
34  * Identifies a link tag.
35  */

36 public class LinkTag extends CompositeTag
37 {
38     /**
39      * The set of names handled by this tag.
40      */

41     private static final String JavaDoc[] mIds = new String JavaDoc[] {"A"};
42
43     /**
44      * The set of tag names that indicate the end of this tag.
45      */

46     private static final String JavaDoc[] mEnders = new String JavaDoc[] {"A", "P", "DIV", "TD", "TR", "FORM", "LI"};
47
48     /**
49      * The set of end tag names that indicate the end of this tag.
50      */

51     private static final String JavaDoc[] mEndTagEnders = new String JavaDoc[] {"P", "DIV", "TD", "TR", "FORM", "LI", "BODY", "HTML"};
52
53     /**
54      * The URL where the link points to
55      */

56     protected String JavaDoc mLink;
57
58     /**
59      * Set to true when the link was a mailto: URL.
60      */

61     private boolean mailLink;
62
63     /**
64      * Set to true when the link was a javascript: URL.
65      */

66     private boolean javascriptLink;
67
68     /**
69      * Constructor creates an LinkTag object, which basically stores the location
70      * where the link points to, and the text it contains.
71      * <p>
72      * In order to get the contents of the link tag, use the method linkData(),
73      * which returns an enumeration of nodes encapsulated within the link.
74      * <p>
75      * The following code will get all the images inside a link tag.
76      * <pre>
77      * Node node ;
78      * ImageTag imageTag;
79      * for (Enumeration e=linkTag.linkData();e.hasMoreElements();) {
80      * node = (Node)e.nextElement();
81      * if (node instanceof ImageTag) {
82      * imageTag = (ImageTag)node;
83      * // Process imageTag
84      * }
85      * }
86      * </pre>
87      */

88     public LinkTag ()
89     {
90     }
91
92     /**
93      * Return the set of names handled by this tag.
94      * @return The names to be matched that create tags of this type.
95      */

96     public String JavaDoc[] getIds ()
97     {
98         return (mIds);
99     }
100
101     /**
102      * Return the set of tag names that cause this tag to finish.
103      * @return The names of following tags that stop further scanning.
104      */

105     public String JavaDoc[] getEnders ()
106     {
107         return (mEnders);
108     }
109
110     /**
111      * Return the set of end tag names that cause this tag to finish.
112      * @return The names of following end tags that stop further scanning.
113      */

114     public String JavaDoc[] getEndTagEnders ()
115     {
116         return (mEndTagEnders);
117     }
118
119     /**
120      * Returns the accesskey attribute value, if any.
121      */

122     public String JavaDoc getAccessKey()
123     {
124         return (getAttribute("ACCESSKEY"));
125     }
126
127     /**
128      * Returns the url as a string, to which this link points.
129      * This string has had the "mailto:" and "javascript:" protocol stripped
130      * off the front (if those predicates return <code>true</code>) but not
131      * for other protocols. Don't ask me why, it's a legacy thing.
132      */

133     public String JavaDoc getLink()
134     {
135         if (null == mLink)
136         {
137             mailLink=false;
138             javascriptLink = false;
139             mLink = extractLink ();
140
141             int mailto = mLink.indexOf("mailto");
142             if (mailto==0)
143             {
144                 // yes it is
145
mailto = mLink.indexOf(":");
146                 mLink = mLink.substring(mailto+1);
147                 mailLink = true;
148             }
149             int javascript = mLink.indexOf("javascript:");
150             if (javascript == 0)
151             {
152                 mLink = mLink.substring(11); // this magic number is "javascript:".length()
153
javascriptLink = true;
154             }
155         }
156         return (mLink);
157     }
158
159     /**
160      * Returns the text contained inside this link tag
161      */

162     public String JavaDoc getLinkText()
163     {
164         String JavaDoc ret;
165
166         if (null != getChildren ())
167             ret = getChildren ().asString ();
168         else
169             ret = "";
170
171         return (ret);
172     }
173
174     /**
175      * Is this a mail address
176      * @return boolean true/false
177      */

178     public boolean isMailLink()
179     {
180         getLink (); // force an evaluation of the booleans
181
return (mailLink);
182     }
183
184     /**
185      * Tests if the link is javascript
186      * @return flag indicating if the link is a javascript code
187      */

188     public boolean isJavascriptLink()
189     {
190         getLink (); // force an evaluation of the booleans
191
return (javascriptLink);
192     }
193
194     /**
195      * Tests if the link is an FTP link.
196      *
197      * @return flag indicating if this link is an FTP link
198      */

199     public boolean isFTPLink() {
200         return getLink ().indexOf("ftp://")==0;
201     }
202
203     /**
204      * Tests if the link is an IRC link.
205      * @return flag indicating if this link is an IRC link
206      */

207     public boolean isIRCLink() {
208         return getLink ().indexOf("irc://")==0;
209     }
210
211     /**
212      * Tests if the link is an HTTP link.
213      *
214      * @return flag indicating if this link is an HTTP link
215      */

216     public boolean isHTTPLink()
217     {
218         return (!isFTPLink() && !isHTTPSLink() && !isJavascriptLink() && !isMailLink() && !isIRCLink());
219     }
220
221     /**
222      * Tests if the link is an HTTPS link.
223      *
224      * @return flag indicating if this link is an HTTPS link
225      */

226     public boolean isHTTPSLink() {
227             return getLink ().indexOf("https://")==0;
228     }
229
230         /**
231      * Tests if the link is an HTTP link or one of its variations (HTTPS, etc.).
232      *
233      * @return flag indicating if this link is an HTTP link or one of its variations (HTTPS, etc.)
234      */

235     public boolean isHTTPLikeLink() {
236             return isHTTPLink() || isHTTPSLink();
237     }
238
239
240     /**
241      * Insert the method's description here.
242      * Creation date: (8/3/2001 1:49:31 AM)
243      * @param newMailLink boolean
244      */

245     public void setMailLink(boolean newMailLink)
246     {
247         mailLink = newMailLink;
248     }
249
250     /**
251      * Set the link as a javascript link.
252      *
253      * @param newJavascriptLink flag indicating if the link is a javascript code
254      */

255     public void setJavascriptLink(boolean newJavascriptLink)
256     {
257         javascriptLink = newJavascriptLink;
258     }
259
260     /**
261      * Print the contents of this Link Node
262      */

263     public String JavaDoc toString()
264     {
265         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
266         sb.append("Link to : "+ getLink() + "; titled : "+getLinkText ()+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition ()+ ", AccessKey=");
267         if (getAccessKey ()==null)
268             sb.append("null\n");
269         else
270             sb.append(getAccessKey ()+"\n");
271         if (null != getChildren ())
272         {
273             sb.append(" "+"LinkData\n");
274             sb.append(" "+"--------\n");
275
276             Node node;
277             int i = 0;
278             for (SimpleNodeIterator e=children();e.hasMoreNodes();)
279             {
280                 node = e.nextNode();
281                 sb.append(" "+(i++)+ " ");
282                 sb.append(node.toString()+"\n");
283             }
284         }
285         sb.append(" "+"*** END of LinkData ***\n");
286         return sb.toString();
287     }
288
289     public void setLink(String JavaDoc link)
290     {
291         mLink = link;
292         setAttribute ("HREF", link);
293     }
294
295     /**
296      * This method returns an enumeration of data that it contains
297      * @return Enumeration
298      * @deprecated Use children() instead.
299      */

300     public SimpleNodeIterator linkData() {
301         return children();
302     }
303
304     /**
305      * Extract the link from the HREF attribute.
306      * @return The URL from the HREF attibute. This is absolute if the tag has
307      * a valid page.
308      */

309     public String JavaDoc extractLink ()
310     {
311         String JavaDoc ret;
312
313         ret = getAttribute ("HREF");
314         if (null != ret)
315         {
316             ret = ParserUtils.removeChars (ret,'\n');
317             ret = ParserUtils.removeChars (ret,'\r');
318         }
319         if (null != getPage ())
320             ret = getPage ().getAbsoluteURL (ret);
321
322         return (ret);
323     }
324 }
325
Popular Tags