KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > matuschek > http > HttpDocToFile


1 package net.matuschek.http;
2
3 /************************************************
4     Copyright (c) 2001/2002 by Daniel Matuschek
5 *************************************************/

6
7
8 import java.io.BufferedOutputStream JavaDoc;
9 import java.io.File JavaDoc;
10 import java.io.FileInputStream JavaDoc;
11 import java.io.FileOutputStream JavaDoc;
12 import java.io.IOException JavaDoc;
13 import java.net.URL JavaDoc;
14 import java.util.StringTokenizer JavaDoc;
15
16 import org.apache.log4j.Category;
17
18 /**
19  * DocumentManager that will store document contents in a file.
20  *
21  * @author Daniel Matuschek
22  * @version $Revision: 1.11 $
23  */

24 public class HttpDocToFile extends AbstractHttpDocManager
25 {
26   /**
27    * directory where the files will be created
28    */

29   private String JavaDoc baseDir;
30
31
32   /**
33    * the object will not store files smaller then this size !
34    */

35   private int minFileSize;
36   
37
38   /**
39    * defines if special characters in the URL should be replaced
40    * by "normal" characters
41    * @see #setReplaceAllSpecials(boolean)
42    */

43   private boolean replaceAllSpecials = false;
44
45
46   /**
47    * defines, if CGIs should be stored on disc.
48    *
49    * @see #setStoreCGI
50    */

51   private boolean storeCGI = true;
52
53   /** Log4J logging */
54   private Category log;
55
56
57   
58   /**
59    * creates a new HttpDocToFile object that will store the
60    * documents in the given directory
61    */

62   public HttpDocToFile(String JavaDoc baseDir) {
63     this.baseDir = baseDir;
64     log = Category.getInstance(getClass().getName());
65   }
66   
67
68   /**
69    * store document (that means write it to disk)
70    * @param doc the document to store
71    * @exception DocManagerException if the document can't be stored
72    * (some IO error occured)
73    */

74   public void storeDocument(HttpDoc doc)
75     throws DocManagerException
76   {
77     if ((doc == null) || (doc.getContent() == null)) {
78       return;
79     }
80     
81     /*
82      * write file only, if this was NOT a cached document
83      * (in this case we have it already on harddisk)
84      */

85     if (doc.isCached()) {
86         return;
87     }
88
89
90     if ((! storeCGI)
91     && (doc.getURL().toString().indexOf('?') >= 0)) {
92       // do not store dynamic pages, because storeCGI is false
93
// and the URL contains a "?"
94
return;
95     }
96
97
98     String JavaDoc filename = url2Filename(doc.getURL());
99     if (doc.getContent().length >= minFileSize) {
100       try {
101         createDirs(filename);
102         BufferedOutputStream JavaDoc os =
103           new BufferedOutputStream JavaDoc(new FileOutputStream JavaDoc(filename));
104         os.write(doc.getContent());
105         os.flush();
106         os.close();
107       } catch (IOException JavaDoc e) {
108         throw new DocManagerException(e.getMessage());
109       }
110     }
111   }
112
113
114   /**
115    * Gets the cacheFile of the given URL if its document was stored.
116    * @param url
117    * @return cacheFile
118    */

119   protected File JavaDoc getCacheFile(URL JavaDoc url) {
120       // does the file exists on the filesystem ?
121
File JavaDoc cacheFile = new File JavaDoc(url2Filename(url));
122       if (! (cacheFile.exists() && (cacheFile.isFile()))) {
123         return null;
124       }
125       return cacheFile;
126   }
127
128     /**
129      * Gets the extension of the given URL if its document was stored.
130      * @param url
131      * @return String
132      */

133     protected String JavaDoc getExtension(URL JavaDoc url) {
134         // is it dynamic ?
135
if ((url.toString().indexOf('?') >= 0)
136         || (url.toString().indexOf("cgi") >= 0)) {
137           return null;
138         }
139         
140         // do we have an filename extension ?
141
// without it is not possible to guess the MIME type.
142
String JavaDoc path = url.getPath();
143         String JavaDoc ext = null;
144     
145         if (path.indexOf(".") < 0) {
146             return null;
147         }
148     
149         StringTokenizer JavaDoc st = new StringTokenizer JavaDoc(path,".");
150         while (st.hasMoreTokens()) {
151             ext = st.nextToken();
152         }
153         // no extension if ext contains a "/"
154
if (ext.indexOf("/") >= 0) {
155             return null;
156         }
157         
158         return ext;
159     }
160     
161   /**
162    * Removes a document that was stored previous from the file system. Because
163    * the HttpDocToFile does not store the HTTP headers, only the Content-Type
164    * header will exists. Even this header may not be correct. It will only use a
165    * simple heuristic to determine the possible MIME type.
166    */

167   public void removeDocument(URL JavaDoc u) {
168     String JavaDoc ext = getExtension(u);
169     if (ext == null) return;
170     File JavaDoc cacheFile = getCacheFile(u);
171     if (cacheFile == null) return ;
172     
173     cacheFile.delete();
174   }
175
176   /**
177    * Gets a document that was stored previous from the file system.
178    * Because the HttpDocToFile does not store the HTTP headers, only
179    * the Content-Type header will exists. Even this header may not
180    * be correct. It will only use a simple heuristic to determine the
181    * possible MIME type.
182    *
183    * @return null, if this document was not stored before or it seems
184    * to be a dynamic document.
185    */

186   public HttpDoc retrieveFromCache(URL JavaDoc u) {
187     String JavaDoc ext = getExtension(u);
188     if (ext == null) return null;
189     File JavaDoc cacheFile = getCacheFile(u);
190     if (cacheFile == null) return null;
191     
192     // create a buffer;
193
long size = cacheFile.length();
194     if (size > Integer.MAX_VALUE) {
195       log.info("File too large");
196       return null;
197     }
198
199     byte[] buff = new byte[(int) size];
200
201     // read the file
202
try {
203       FileInputStream JavaDoc fi = new FileInputStream JavaDoc(cacheFile);
204       fi.read(buff);
205     } catch (IOException JavaDoc e) {
206       log.info("Could not read cached document "+e.getMessage());
207       return null;
208     }
209     
210     // create a new HttpDoc object
211
HttpDoc doc = new HttpDoc();
212
213     // and set the content and the header
214
doc.setHttpCode("HTTP/1.0 200 OK");
215     doc.setContent(buff);
216     
217    
218     // now guess the MIME type
219
String JavaDoc mimetype = null;
220
221     if (ext.equals("html")
222     || ext.equals("htm")
223     || ext.equals("shtml")
224     || ext.equals("asp")
225     || ext.equals("php")
226     || ext.equals("jsp")) {
227       mimetype="text/html";
228     } else {
229       mimetype="application/unknown";
230     }
231
232     doc.addHeader(new HttpHeader("Content-Type",mimetype));
233     doc.setURL(u);
234     doc.setCached(true);
235     
236     return doc;
237   }
238   
239
240   /**
241    * gets the value of baseDir
242    * @return the value of baseDir
243    */

244   public String JavaDoc getBaseDir() {
245     return baseDir;
246   }
247   
248
249   /**
250    * sets the value of basedir
251    * @param baseDir the new value of baseDir
252    */

253   public void setBaseDir(String JavaDoc baseDir) {
254     this.baseDir = baseDir;
255   }
256   
257
258   /**
259    * converts an URL to a filename http://host/path will
260    * be converted to basedir/host/path
261    * @param URL a URL to convert, must not be null
262    * @return a pathname
263    */

264   protected String JavaDoc url2Filename(URL JavaDoc u) {
265     StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
266
267     sb.append(baseDir);
268     sb.append(File.separatorChar);
269     sb.append(u.getHost());
270     sb.append(u.getFile());
271
272     // is there a query part ?
273
// that is something after the file name seperated by ?
274
String JavaDoc query = u.getQuery();
275     if ((query != null) &&
276     (!query.equals(""))) {
277       sb.append(File.separatorChar);
278       sb.append(query);
279     }
280
281     // filename that ends with /
282
// are directories, we will name the file "index.html"
283
if (sb.charAt(sb.length()-1) == '/') {
284       sb.append("index.html");
285     }
286
287     // postprocess filename (replace special characters)
288
for (int i=0; i<sb.length(); i++) {
289       char c=sb.charAt(i);
290       char newc=(char)0;
291
292       // replace / by operating system file name separator
293
if (c == '/') {
294     newc = File.separatorChar;
295       }
296       
297       // replace special characters from CGIs
298
if (replaceAllSpecials) {
299     if ((c == '?')
300         || (c == '=')
301         || (c == '&')) {
302       newc = '-';
303     }
304       }
305
306       if ((newc != (char)0)
307       && (newc != c)) {
308     sb.setCharAt(i,newc);
309       }
310     }
311
312     return sb.toString();
313   }
314   
315
316   /**
317    * creates all directories that are needed to place the
318    * file filename if they don't exists
319    * @param filename the full path name of a file
320    */

321   protected void createDirs(String JavaDoc filename) throws IOException JavaDoc {
322     int pos = -1;
323     // look for the last directory separator in the filename
324
for (int i = filename.length() - 1; i >= 0; i--) {
325       if (filename.charAt(i) == File.separatorChar) {
326     pos = i;
327     i = -1;
328       }
329     }
330     File JavaDoc dir = new File JavaDoc(filename.substring(0, pos));
331     dir.mkdirs();
332   }
333   
334
335   /**
336    * gets the value of minFileSize. Files smaller then this size
337    * (in Bytes) will not be saved to disk !
338    * @return the value of minFileSize
339    */

340   public int getMinFileSize() {
341     return minFileSize;
342   }
343
344   
345   /**
346    * sets the value of minFileSize
347    * @param minFileSize the new value of minFileSize
348    * @see #getMinFileSize()
349    */

350   public void setMinFileSize(int minFileSize) {
351     this.minFileSize = minFileSize;
352   }
353
354
355   /**
356    * Get the value of replaceAllSpecials.
357    *
358    * if replaceAllSpecials is true, all sepcial characters in the URL
359    * will be replaced by "-". This is useful for operating system that
360    * can't handle files with special characters in the filename (e.g.
361    * Windows)
362    *
363    * @return value of replaceAllSpecials.
364    */

365   public boolean isReplaceAllSpecials() {
366     return replaceAllSpecials;
367   }
368   
369
370   /**
371    * Set the value of replaceAllSpecials.
372    *
373    * if replaceAllSpecials is true, all sepcial characters in the URL
374    * will be replaced by "-". This is useful for operating system that
375    * can't handle files with special characters in the filename (e.g.
376    * Windows)
377    *
378    * @param v Value to assign to replaceAllSpecials.
379    */

380   public void setReplaceAllSpecials(boolean v) {
381     this.replaceAllSpecials = v;
382   }
383
384
385   /**
386    * Get the value of storeCGI
387    *
388    * If this is true, the object will store ALL retrieved documents,
389    * otherwise it will store only documents from URLs that do not
390    * have a "?" in the URL
391    */

392   public boolean getStoreCGI() {
393     return storeCGI;
394   }
395   
396
397   /**
398    * Set the value of storeCGI.
399    *
400    * If this is true, the object will store ALL retrieved documents,
401    * otherwise it will store only documents from URLs that do not
402    * have a "?" in the URL
403    *
404    * @param v Value to assign to storeCGI.
405    */

406   public void setStoreCGI(boolean v) {
407     this.storeCGI = v;
408   }
409
410 }
411
412
413
Popular Tags