1 2 3 4 package net.nutch.protocol.file; 5 6 import javax.activation.MimetypesFileTypeMap ; 7 10 import net.nutch.net.protocols.HttpDateFormat; 11 12 import net.nutch.util.LogFormatter; 13 import net.nutch.util.NutchConf; 14 15 import net.nutch.protocol.Content; 16 import net.nutch.protocol.Protocol; 17 18 import java.util.logging.Level ; 19 import java.util.logging.Logger ; 20 21 import java.net.URL ; 22 23 import java.io.InputStream ; 24 import java.io.IOException ; 27 28 36 public class File implements Protocol { 37 38 public static final Logger LOG = 39 LogFormatter.getLogger("net.nutch.protocol.file.File"); 40 41 static final int MAX_REDIRECTS = 5; 42 43 static int maxContentLength = NutchConf.getInt("file.content.limit",64*1024); 44 45 49 HttpDateFormat httpDateFormat = null; 51 52 static MimetypesFileTypeMap TYPE_MAP = null; 54 55 static { 56 try { 57 InputStream is = 59 NutchConf.getConfResourceAsInputStream 60 (NutchConf.get("mime.types.file")); 61 if (is == null) { 62 LOG.warning 63 ("no mime.types.file: won't use url extension for content-type."); 64 TYPE_MAP = null; 65 } else { 66 TYPE_MAP = new MimetypesFileTypeMap (is); 67 } 68 69 if (is != null) 70 is.close(); 71 } catch (IOException e) { 72 LOG.log(Level.SEVERE, "Unexpected error", e); 73 } 74 } 75 76 101 public File() { 103 this.httpDateFormat = new HttpDateFormat(); 104 } 105 106 107 public void setMaxContentLength(int length) {this.maxContentLength = length;} 108 109 public Content getContent(String urlString) throws FileException { 110 try { 111 URL url = new URL (urlString); 112 113 int redirects = 0; 114 115 while (true) { 116 FileResponse response; 117 response = new FileResponse(urlString, url, this); 119 int code = response.getCode(); 120 121 if (code == 200) { return response.toContent(); 124 } else if (code >= 300 && code < 400) { if (redirects == MAX_REDIRECTS) 126 throw new FileException("Too many redirects: " + url); 127 url = new URL (response.getHeader("Location")); 128 redirects++; 129 if (LOG.isLoggable(Level.FINE)) 130 LOG.fine("redirect to " + url); 131 132 } else { throw new FileError(code); 134 } 135 } 136 } catch (IOException e) { 137 throw new FileException(e); 138 } 139 } 140 141 145 146 public static void main(String [] args) throws Exception { 147 int maxContentLength = Integer.MIN_VALUE; 148 String logLevel = "info"; 149 boolean dumpContent = false; 150 String urlString = null; 151 152 String usage = "Usage: File [-logLevel level] [-maxContentLength L] [-dumpContent] url"; 153 154 if (args.length == 0) { 155 System.err.println(usage); 156 System.exit(-1); 157 } 158 159 for (int i = 0; i < args.length; i++) { 160 if (args[i].equals("-logLevel")) { 161 logLevel = args[++i]; 162 } else if (args[i].equals("-maxContentLength")) { 163 maxContentLength = Integer.parseInt(args[++i]); 164 } else if (args[i].equals("-dumpContent")) { 165 dumpContent = true; 166 } else if (i != args.length-1) { 167 System.err.println(usage); 168 System.exit(-1); 169 } else 170 urlString = args[i]; 171 } 172 173 File file = new File(); 174 175 if (maxContentLength != Integer.MIN_VALUE) file.setMaxContentLength(maxContentLength); 177 178 LOG.setLevel(Level.parse((new String (logLevel)).toUpperCase())); 180 181 Content content = file.getContent(urlString); 182 183 System.err.println("Content-Type: " + content.getContentType()); 184 System.err.println("Content-Length: " + content.get("Content-Length")); 185 System.err.println("Last-Modified: " + content.get("Last-Modified")); 186 if (dumpContent) { 187 System.out.print(new String (content.getContent())); 188 } 189 190 file = null; 191 } 192 193 } 194 | Popular Tags |