KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > beans > LinkBean


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/LinkBean.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/03/12 21:39:45 $
10
// $Revision: 1.30 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.beans;
28
29 import java.beans.PropertyChangeListener JavaDoc;
30 import java.beans.PropertyChangeSupport JavaDoc;
31 import java.io.Serializable JavaDoc;
32 import java.net.MalformedURLException JavaDoc;
33 import java.net.URL JavaDoc;
34 import java.net.URLConnection JavaDoc;
35 import java.util.Vector JavaDoc;
36
37 import org.htmlparser.Node;
38 import org.htmlparser.NodeFilter;
39 import org.htmlparser.Parser;
40 import org.htmlparser.filters.NodeClassFilter;
41 import org.htmlparser.tags.LinkTag;
42 import org.htmlparser.util.EncodingChangeException;
43 import org.htmlparser.util.NodeList;
44 import org.htmlparser.util.ParserException;
45
46 /**
47  * Extract links from a URL.
48  */

49 public class LinkBean extends Object JavaDoc implements Serializable JavaDoc
50 {
51     /**
52      * Property name in event where the URL contents changes.
53      */

54     public static final String JavaDoc PROP_LINKS_PROPERTY = "links";
55
56     /**
57      * Property name in event where the URL changes.
58      */

59     public static final String JavaDoc PROP_URL_PROPERTY = "URL";
60
61     /**
62      * Bound property support.
63      */

64     protected PropertyChangeSupport JavaDoc mPropertySupport;
65
66     /**
67      * The strings extracted from the URL.
68      */

69     protected URL JavaDoc[] mLinks;
70
71     /**
72      * The parser used to extract strings.
73      */

74     protected Parser mParser;
75
76     /** Creates new LinkBean */
77     public LinkBean ()
78     {
79         mPropertySupport = new PropertyChangeSupport JavaDoc (this);
80         mLinks = null;
81         mParser = new Parser ();
82     }
83
84     //
85
// internals
86
//
87

88     protected URL JavaDoc[] extractLinks () throws ParserException
89     {
90         NodeFilter filter;
91         NodeList list;
92         Vector JavaDoc vector;
93         LinkTag link;
94         URL JavaDoc[] ret;
95
96         mParser.reset ();
97         filter = new NodeClassFilter (LinkTag.class);
98         try
99         {
100             list = mParser.extractAllNodesThatMatch (filter);
101         }
102         catch (EncodingChangeException ece)
103         {
104             mParser.reset ();
105             list = mParser.extractAllNodesThatMatch (filter);
106         }
107         vector = new Vector JavaDoc();
108         for (int i = 0; i < list.size (); i++)
109             try
110             {
111                 link = (LinkTag)list.elementAt (i);
112                 vector.add(new URL JavaDoc (link.getLink ()));
113             }
114             catch (MalformedURLException JavaDoc murle)
115             {
116                 //vector.remove (i);
117
//i--;
118
}
119         ret = new URL JavaDoc[vector.size ()];
120         vector.copyInto (ret);
121
122         return (ret);
123     }
124
125     /**
126      * Determine if two arrays of URL's are the same.
127      * @param array1 One array of URL's
128      * @param array2 Another array of URL's
129      * @return <code>true</code> if the URL's match in number and value,
130      * <code>false</code> otherwise.
131      */

132     protected boolean equivalent (URL JavaDoc[] array1, URL JavaDoc[] array2)
133     {
134         boolean ret;
135
136         ret = false;
137         if ((null == array1) && (null == array2))
138             ret = true;
139         else if ((null != array1) && (null != array2))
140             if (array1.length == array2.length)
141             {
142                 ret = true;
143                 for (int i = 0; i < array1.length && ret; i++)
144                     if (!(array1[i] == array2[i]))
145                         ret = false;
146             }
147
148         return (ret);
149     }
150
151     //
152
// Property change support.
153
//
154

155     /**
156      * Add a PropertyChangeListener to the listener list.
157      * The listener is registered for all properties.
158      * @param listener The PropertyChangeListener to be added.
159      */

160     public void addPropertyChangeListener (PropertyChangeListener JavaDoc listener)
161     {
162         mPropertySupport.addPropertyChangeListener (listener);
163     }
164
165     /**
166      * Remove a PropertyChangeListener from the listener list.
167      * This removes a PropertyChangeListener that was registered for all properties.
168      * @param listener The PropertyChangeListener to be removed.
169      */

170     public void removePropertyChangeListener (PropertyChangeListener JavaDoc listener)
171     {
172         mPropertySupport.removePropertyChangeListener (listener);
173     }
174
175     //
176
// Properties
177
//
178

179     /**
180      * Refetch the URL contents.
181      */

182     private void setLinks ()
183     {
184         String JavaDoc url;
185         URL JavaDoc[] urls;
186         URL JavaDoc[] oldValue;
187
188         url = getURL ();
189         if (null != url)
190             try
191             {
192                 urls = extractLinks ();
193                 if (!equivalent (mLinks, urls))
194                 {
195                     oldValue = mLinks;
196                     mLinks = urls;
197                     mPropertySupport.firePropertyChange (PROP_LINKS_PROPERTY, oldValue, mLinks);
198                 }
199             }
200             catch (ParserException hpe)
201             {
202                 mLinks = null;
203             }
204     }
205
206     /**
207      * Getter for property links.
208      * @return Value of property links.
209      */

210     public URL JavaDoc[] getLinks ()
211     {
212         if (null == mLinks)
213             try
214             {
215                 mLinks = extractLinks ();
216                 mPropertySupport.firePropertyChange (PROP_LINKS_PROPERTY, null, mLinks);
217             }
218             catch (ParserException hpe)
219             {
220                 mLinks = null;
221             }
222
223         return (mLinks);
224     }
225
226
227     /**
228      * Getter for property URL.
229      * @return Value of property URL.
230      */

231     public String JavaDoc getURL ()
232     {
233         return (mParser.getURL ());
234     }
235
236     /**
237      * Setter for property URL.
238      * @param url New value of property URL.
239      */

240     public void setURL (String JavaDoc url)
241     {
242         String JavaDoc old;
243
244         old = getURL ();
245         if (((null == old) && (null != url)) || ((null != old) && !old.equals (url)))
246         {
247             try
248             {
249                 mParser.setURL (url);
250                 mPropertySupport.firePropertyChange (PROP_URL_PROPERTY, old, getURL ());
251                 setLinks ();
252             }
253             catch (ParserException hpe)
254             {
255                 // failed... now what
256
}
257         }
258     }
259
260     /**
261      * Getter for property Connection.
262      * @return Value of property Connection.
263      */

264     public URLConnection JavaDoc getConnection ()
265     {
266         return (mParser.getConnection ());
267     }
268
269     /**
270      * Setter for property Connection.
271      * @param connection New value of property Connection.
272      */

273     public void setConnection (URLConnection JavaDoc connection)
274     {
275         try
276         {
277             mParser.setConnection (connection);
278             setLinks ();
279         }
280         catch (ParserException hpe)
281         {
282             // failed... now what
283
}
284     }
285
286     /**
287      * Unit test.
288      * @param args Pass arg[0] as the URL to process.
289      */

290     public static void main (String JavaDoc[] args)
291     {
292         if (0 >= args.length)
293             System.out.println ("Usage: java -classpath htmlparser.jar org.htmlparser.beans.LinkBean <http://whatever_url>");
294         else
295         {
296             LinkBean lb = new LinkBean ();
297             lb.setURL (args[0]);
298             URL JavaDoc[] urls = lb.getLinks ();
299             for (int i = 0; i < urls.length; i++)
300                 System.out.println (urls[i]);
301         }
302     }
303 }
304
305
306
Popular Tags