KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > protocol > file > File


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.protocol.file;
5
6 import javax.activation.MimetypesFileTypeMap JavaDoc;
7 // 20040528, xing, disabled for now
8
//import xing.net.nutch.util.magicfile.*;
9

10 import net.nutch.net.protocols.HttpDateFormat;
11
12 import net.nutch.util.LogFormatter;
13 import net.nutch.util.NutchConf;
14
15 import net.nutch.protocol.Content;
16 import net.nutch.protocol.Protocol;
17
18 import java.util.logging.Level JavaDoc;
19 import java.util.logging.Logger JavaDoc;
20
21 import java.net.URL JavaDoc;
22
23 import java.io.InputStream JavaDoc;
24 // 20040528, xing, disabled for now
25
//import java.io.Reader;
26
import java.io.IOException JavaDoc;
27
28 /************************************
29  * File.java deals with file: scheme.
30  *
31  * Configurable parameters are defined under "FILE properties" section
32  * in ./conf/nutch-default.xml or similar.
33  *
34  * @author John Xing
35  ***********************************/

36 public class File implements Protocol {
37
38   public static final Logger JavaDoc LOG =
39     LogFormatter.getLogger("net.nutch.protocol.file.File");
40
41   static final int MAX_REDIRECTS = 5;
42
43   static int maxContentLength = NutchConf.getInt("file.content.limit",64*1024);
44
45   // 20040412, xing
46
// the following three: HttpDateFormat, MimetypesFileTypeMap, MagicFile
47
// are placed in each thread before we check out if they're thread-safe.
48

49   // http date format
50
HttpDateFormat httpDateFormat = null;
51
52   // file name extension to mime-type map
53
static MimetypesFileTypeMap JavaDoc TYPE_MAP = null;
54
55   static {
56     try {
57       // read mime types from config file
58
InputStream JavaDoc is =
59         NutchConf.getConfResourceAsInputStream
60         (NutchConf.get("mime.types.file"));
61       if (is == null) {
62         LOG.warning
63           ("no mime.types.file: won't use url extension for content-type.");
64         TYPE_MAP = null;
65       } else {
66         TYPE_MAP = new MimetypesFileTypeMap JavaDoc(is);
67       }
68       
69       if (is != null)
70         is.close();
71     } catch (IOException JavaDoc e) {
72       LOG.log(Level.SEVERE, "Unexpected error", e);
73     }
74   }
75
76 // 20040528, xing, disabled for now
77
// // file magic for determining content type
78
// static MagicFile MAGIC = null;
79
//
80
// static {
81
// try {
82
// // read file magic from config file
83
// Reader reader =
84
// NutchConf.getConfResourceAsReader
85
// (NutchConf.get("mime.magic.file"));
86
// if (reader == null) {
87
// LOG.warning
88
// ("no mime.magic.file: won't use file magic for content-type.");
89
// MAGIC = null;
90
// } else {
91
// MAGIC = MagicFile.getInstance(reader);
92
// }
93
//
94
// if (reader != null)
95
// reader.close();
96
// } catch (IOException e) {
97
// LOG.log(Level.SEVERE, "Unexpected error", e);
98
// }
99
// }
100

101   // constructor
102
public File() {
103     this.httpDateFormat = new HttpDateFormat();
104   }
105
106   /** Set the point at which content is truncated. */
107   public void setMaxContentLength(int length) {this.maxContentLength = length;}
108
109   public Content getContent(String JavaDoc urlString) throws FileException {
110     try {
111       URL JavaDoc url = new URL JavaDoc(urlString);
112   
113       int redirects = 0;
114   
115       while (true) {
116         FileResponse response;
117         response = new FileResponse(urlString, url, this); // make a request
118

119         int code = response.getCode();
120   
121         if (code == 200) { // got a good response
122
return response.toContent(); // return it
123

124         } else if (code >= 300 && code < 400) { // handle redirect
125
if (redirects == MAX_REDIRECTS)
126             throw new FileException("Too many redirects: " + url);
127           url = new URL JavaDoc(response.getHeader("Location"));
128           redirects++;
129           if (LOG.isLoggable(Level.FINE))
130             LOG.fine("redirect to " + url);
131   
132         } else { // convert to exception
133
throw new FileError(code);
134         }
135       }
136     } catch (IOException JavaDoc e) {
137       throw new FileException(e);
138     }
139   }
140
141 // protected void finalize () {
142
// // nothing here
143
// }
144

145   /** For debugging. */
146   public static void main(String JavaDoc[] args) throws Exception JavaDoc {
147     int maxContentLength = Integer.MIN_VALUE;
148     String JavaDoc logLevel = "info";
149     boolean dumpContent = false;
150     String JavaDoc urlString = null;
151
152     String JavaDoc usage = "Usage: File [-logLevel level] [-maxContentLength L] [-dumpContent] url";
153
154     if (args.length == 0) {
155       System.err.println(usage);
156       System.exit(-1);
157     }
158       
159     for (int i = 0; i < args.length; i++) {
160       if (args[i].equals("-logLevel")) {
161         logLevel = args[++i];
162       } else if (args[i].equals("-maxContentLength")) {
163         maxContentLength = Integer.parseInt(args[++i]);
164       } else if (args[i].equals("-dumpContent")) {
165         dumpContent = true;
166       } else if (i != args.length-1) {
167         System.err.println(usage);
168         System.exit(-1);
169       } else
170         urlString = args[i];
171     }
172
173     File file = new File();
174
175     if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength
176
file.setMaxContentLength(maxContentLength);
177
178     // set log level
179
LOG.setLevel(Level.parse((new String JavaDoc(logLevel)).toUpperCase()));
180
181     Content content = file.getContent(urlString);
182
183     System.err.println("Content-Type: " + content.getContentType());
184     System.err.println("Content-Length: " + content.get("Content-Length"));
185     System.err.println("Last-Modified: " + content.get("Last-Modified"));
186     if (dumpContent) {
187       System.out.print(new String JavaDoc(content.getContent()));
188     }
189
190     file = null;
191   }
192
193 }
194
Popular Tags