KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > util > LinkProcessor


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/LinkProcessor.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/07/31 16:42:34 $
10
// $Revision: 1.35 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.util;
28
29 import java.io.Serializable JavaDoc;
30 import java.net.MalformedURLException JavaDoc;
31 import java.net.URL JavaDoc;
32
33 /**
34  * Processor class for links, is present basically as a utility class.
35  * @deprecated Use a Page object instead.
36  */

37 public class LinkProcessor
38     implements
39         Serializable JavaDoc
40 {
41     /**
42      * Overriding base URL.
43      * If set, this is used instead of a provided base URL in extract().
44      */

45     private String JavaDoc baseUrl;
46
47     /**
48      * Create an HTMLLinkProcessor.
49      */

50     public LinkProcessor ()
51     {
52         baseUrl = null;
53     }
54
55     /**
56      * Create an absolute URL from a possibly relative link and a base URL.
57      * @param link The reslative portion of a URL.
58      * @param base The base URL unless overridden by the current baseURL property.
59      * @return The fully qualified URL or the original link if a failure occured.
60      * @deprecated Use Page.getAbsoluteURL() instead.
61      */

62     public String JavaDoc extract (String JavaDoc link, String JavaDoc base)
63     {
64         String JavaDoc ret;
65
66         try
67         {
68             if (null == link)
69                 link = "";
70             else
71                 link = stripQuotes (link);
72             if (null != getBaseUrl ())
73                 base = getBaseUrl ();
74             if ((null == base) || ("".equals (link)))
75                 ret = link;
76             else
77             {
78                 URL JavaDoc url = constructUrl(link, base);
79                 ret = url.toExternalForm ();
80             }
81         }
82         catch (MalformedURLException JavaDoc murle)
83         {
84             ret = link;
85         }
86
87         return (Translate.decode (ret));
88     }
89
90     /**
91      * Remove double or single quotes from the string.
92      */

93     public String JavaDoc stripQuotes (String JavaDoc string)
94     {
95         // remove any double quotes from around string
96
if (string.startsWith ("\"") && string.endsWith ("\"") && (1 < string.length ()))
97             string = string.substring (1, string.length () - 1);
98
99         // remove any single quote from around string
100
if (string.startsWith ("'") && string.endsWith ("'") && (1 < string.length ()))
101             string = string.substring (1, string.length () - 1);
102
103         return (string);
104     }
105
106     /**
107      * @deprecated Use Page.constructUrl() instead.
108      */

109     public URL JavaDoc constructUrl(String JavaDoc link, String JavaDoc base)
110         throws MalformedURLException JavaDoc {
111         String JavaDoc path;
112         boolean modified;
113         boolean absolute;
114         int index;
115         URL JavaDoc url; // constructed URL combining relative link and base
116
url = new URL JavaDoc (new URL JavaDoc (base), link);
117         path = url.getFile ();
118         modified = false;
119         absolute = link.startsWith ("/");
120         if (!absolute) { // we prefer to fix incorrect relative links
121
// this doesn't fix them all, just the ones at the start
122
while (path.startsWith ("/.")) {
123                 if (path.startsWith ("/../")) {
124                     path = path.substring (3);
125                     modified = true;
126                 }
127                 else if (path.startsWith ("/./") || path.startsWith("/.")) {
128                     path = path.substring (2);
129                     modified = true;
130                 } else break;
131             }
132         }
133         // fix backslashes
134
while (-1 != (index = path.indexOf ("/\\"))) {
135             path = path.substring (0, index + 1) + path.substring (index + 2);
136             modified = true;
137         }
138         if (modified)
139             url = new URL JavaDoc (url, path);
140         return url;
141     }
142
143     /**
144      * Turn spaces into %20.
145      * @param url The url containing spaces.
146      * @return The URL with spaces as %20 sequences.
147      * @deprecated Use Parser.fixSpaces() instead.
148      */

149     public static String JavaDoc fixSpaces (String JavaDoc url)
150     {
151         int index;
152         int length;
153         char ch;
154         StringBuffer JavaDoc returnURL;
155
156         index = url.indexOf (' ');
157         if (-1 != index)
158         {
159             length = url.length ();
160             returnURL = new StringBuffer JavaDoc (length * 3);
161             returnURL.append (url.substring (0, index));
162             for (int i = index; i < length; i++)
163             {
164                 ch = url.charAt (i);
165                 if (ch==' ')
166                     returnURL.append ("%20");
167                 else
168                     returnURL.append (ch);
169             }
170             url = returnURL.toString ();
171         }
172
173         return (url);
174     }
175
176     /**
177      * Check if a resource is a valid URL.
178      * @param resourceLocn The resource to test.
179      * @return <code>true</code> if the resource is a valid URL.
180      */

181     public static boolean isURL (String JavaDoc resourceLocn) {
182         boolean ret;
183
184         try
185         {
186             new URL JavaDoc (resourceLocn);
187             ret = true;
188         }
189         catch (MalformedURLException JavaDoc murle)
190         {
191             ret = false;
192         }
193
194         return (ret);
195     }
196
197     /**
198      * Returns the baseUrl.
199      * @return String
200      */

201     public String JavaDoc getBaseUrl ()
202     {
203         return baseUrl;
204     }
205
206     /**
207      * Sets the baseUrl.
208      * @param baseUrl The baseUrl to set
209      */

210     public void setBaseUrl (String JavaDoc baseUrl)
211     {
212         this.baseUrl = baseUrl;
213     }
214
215     /**
216      * @deprecated Removing the last slash from a URL is a bad idea.
217      */

218     public static String JavaDoc removeLastSlash(String JavaDoc baseUrl) {
219       if(baseUrl.charAt(baseUrl.length()-1)=='/')
220       {
221          return baseUrl.substring(0,baseUrl.length()-1);
222       }
223       else
224       {
225          return baseUrl;
226       }
227     }
228
229 }
230
Popular Tags