KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > weblech > spider > URLObject


1 /*
2  * This is the MIT license, see also http://www.opensource.org/licenses/mit-license.html
3  *
4  * Copyright (c) 2001 Brian Pitcher
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */

24
25 // $Header: /cvsroot/weblech/weblech/src/weblech/spider/URLObject.java,v 1.3 2002/06/02 08:00:48 weblech Exp $
26

27 package weblech.spider;
28
29 import org.apache.log4j.Category;
30
31 import java.io.*;
32 import java.net.URL JavaDoc;
33 import java.net.URLEncoder JavaDoc;
34
35 import weblech.util.Log4j;
36
37 public class URLObject
38 {
39     private final static Category _logClass = Category.getInstance(URLObject.class);
40
41     static
42     {
43         Log4j.init();
44     }
45
46     private final URL JavaDoc sourceURL;
47     private final String JavaDoc contentType;
48     private final byte[] content;
49
50     private final SpiderConfig config;
51
52     public URLObject(URL JavaDoc sourceURL, String JavaDoc contentType, byte[] content, SpiderConfig config)
53     {
54         this.sourceURL = sourceURL;
55         this.contentType = contentType;
56         this.content = content;
57         this.config = config;
58     }
59
60     public URLObject(URL JavaDoc sourceURL, SpiderConfig config)
61     {
62         this.sourceURL = sourceURL;
63         this.config = config;
64
65         String JavaDoc s = sourceURL.toExternalForm().toLowerCase();
66         if(s.indexOf(".jpg") != -1)
67         {
68             contentType = "image/jpeg";
69         }
70         else if(s.indexOf(".gif") != -1)
71         {
72             contentType = "image/gif";
73         }
74         else
75         {
76             contentType = "text/html";
77         }
78
79         if(existsOnDisk())
80         {
81
82             File f = new File(convertToFileName());
83             if(f.isDirectory())
84             {
85                 f = new File(f, "index.html");
86             }
87             content = new byte[(int) f.length()];
88             try
89             {
90                 FileInputStream in = new FileInputStream(f);
91                 in.read(content);
92                 in.close();
93             }
94             catch(IOException ioe)
95             {
96                 _logClass.warn("IO Exception reading disk version of URL " + sourceURL, ioe);
97             }
98         }
99         else
100         {
101             content = new byte[0];
102         }
103     }
104
105     public String JavaDoc getContentType()
106     {
107         return contentType;
108     }
109
110     public boolean isHTML()
111     {
112         return contentType.toLowerCase().startsWith("text/html");
113     }
114
115     public boolean isXML()
116     {
117         return contentType.toLowerCase().startsWith("text/xml");
118     }
119
120     public boolean isImage()
121     {
122         return contentType.startsWith("image/");
123     }
124
125     public String JavaDoc getStringContent()
126     {
127         return new String JavaDoc(content);
128     }
129
130     private String JavaDoc convertToFileName()
131     {
132         String JavaDoc url = sourceURL.toExternalForm();
133         int httpIdx = url.indexOf("http://");
134         if(httpIdx == 0)
135         {
136             url = url.substring(7);
137         }
138         // Check for at least one slash -- otherwise host name (e.g. sourceforge.net)
139
if(url.indexOf("/") < 0)
140         {
141             url = url + "/";
142         }
143         // If trailing slash, add index.html as default
144
if(url.endsWith("/"))
145         {
146             url = url + "index.html";
147         }
148         url = textReplace("?", URLEncoder.encode("?"), url);
149         url = textReplace("&", URLEncoder.encode("&"), url);
150         return config.getSaveRootDirectory().getPath() + "/" + url;
151     }
152
153     public boolean existsOnDisk()
154     {
155         File f = new File(convertToFileName());
156         return (f.exists() && !f.isDirectory());
157     }
158
159     public void writeToFile()
160     {
161         writeToFile(convertToFileName());
162     }
163
164     public void writeToFile(String JavaDoc fileName)
165     {
166         _logClass.debug("writeToFile(" + fileName + ")");
167         try
168         {
169             File f = new File(fileName);
170             f.getParentFile().mkdirs();
171             FileOutputStream out = new FileOutputStream(fileName);
172             out.write(content);
173             out.flush();
174             out.close();
175         }
176         catch(IOException ioe)
177         {
178             _logClass.warn("IO Exception writing to " + fileName, ioe);
179         }
180     }
181
182     public String JavaDoc toString()
183     {
184         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
185         sb.append("URLObject: ");
186         sb.append(contentType);
187         if(false)//isHTML() || isXML())
188
{
189             sb.append("\n");
190             sb.append(getStringContent());
191         }
192         return sb.toString();
193     }
194
195     private String JavaDoc textReplace(String JavaDoc find, String JavaDoc replace, String JavaDoc input)
196     {
197         int startPos = 0;
198         while(true)
199         {
200             int textPos = input.indexOf(find, startPos);
201             if(textPos < 0)
202             {
203                 break;
204             }
205             input = input.substring(0, textPos) + replace + input.substring(textPos + find.length());
206             startPos = textPos + replace.length();
207         }
208         return input;
209     }
210 }
211
Popular Tags