KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > protocol > http > HttpResponse


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.protocol.http;
5
6 import java.io.BufferedInputStream JavaDoc;
7 import java.io.ByteArrayOutputStream JavaDoc;
8 import java.io.EOFException JavaDoc;
9 import java.io.IOException JavaDoc;
10 import java.io.InputStream JavaDoc;
11 import java.io.OutputStream JavaDoc;
12 import java.io.PushbackInputStream JavaDoc;
13 import java.net.InetAddress JavaDoc;
14 import java.net.InetSocketAddress JavaDoc;
15 import java.net.Socket JavaDoc;
16 import java.net.URL JavaDoc;
17 import java.util.Map JavaDoc;
18 import java.util.TreeMap JavaDoc;
19 import java.util.Properties JavaDoc;
20 import java.util.logging.Level JavaDoc;
21
22 import net.nutch.protocol.Content;
23 import net.nutch.protocol.ProtocolException;
24
25 import net.nutch.util.GZIPUtils;
26
27 /** An HTTP response. */
28 public class HttpResponse {
29   private String JavaDoc orig;
30   private String JavaDoc base;
31   private byte[] content;
32   private int code;
33   private Properties JavaDoc headers = new Properties JavaDoc();
34
35   /** Returns the response code. */
36   public int getCode() { return code; }
37
38   /** Returns the value of a named header. */
39   public String JavaDoc getHeader(String JavaDoc name) {
40     return (String JavaDoc)headers.get(name);
41   }
42
43   public byte[] getContent() { return content; }
44
45   public Content toContent() {
46     String JavaDoc contentType = getHeader("Content-Type");
47     if (contentType == null)
48       contentType = "";
49     return new Content(orig, base, content, contentType, headers);
50   }
51
52   public HttpResponse(URL JavaDoc url) throws ProtocolException, IOException JavaDoc {
53     this(url.toString(), url);
54   }
55
56   public HttpResponse(String JavaDoc orig, URL JavaDoc url)
57     throws ProtocolException, IOException JavaDoc {
58     
59     this.orig = orig;
60     this.base = url.toString();
61
62     if (!"http".equals(url.getProtocol()))
63       throw new HttpException("Not an HTTP url:" + url);
64
65     if (Http.LOG.isLoggable(Level.FINE))
66       Http.LOG.fine("fetching " + url);
67
68     String JavaDoc path = "".equals(url.getFile()) ? "/" : url.getFile();
69
70     // some servers will redirect a request with a host line like
71
// "Host: <hostname>:80" to "http://<hpstname>/<orig_path>"- they
72
// don't want the :80...
73

74     String JavaDoc host = url.getHost();
75     int port;
76     String JavaDoc portString;
77     if (url.getPort() == -1) {
78       port= 80;
79       portString= "";
80     } else {
81       port= url.getPort();
82       portString= ":" + port;
83     }
84     Socket JavaDoc socket = null;
85
86     try {
87       socket = new Socket JavaDoc(); // create the socket
88
socket.setSoTimeout(Http.TIMEOUT);
89
90
91       // connect
92
String JavaDoc sockHost = Http.PROXY ? Http.PROXY_HOST : host;
93       int sockPort = Http.PROXY ? Http.PROXY_PORT : port;
94       InetSocketAddress JavaDoc sockAddr= new InetSocketAddress JavaDoc(sockHost, sockPort);
95       socket.connect(sockAddr, Http.TIMEOUT);
96
97       // make request
98
OutputStream JavaDoc req = socket.getOutputStream();
99
100       StringBuffer JavaDoc reqStr = new StringBuffer JavaDoc("GET ");
101       if(Http.PROXY){
102         reqStr.append(url.getProtocol()+"://"+host+portString+path);
103       } else {
104         reqStr.append(path);
105       }
106
107       reqStr.append(" HTTP/1.0\r\n");
108
109       reqStr.append("Host: ");
110       reqStr.append(host);
111       reqStr.append(portString);
112       reqStr.append("\r\n");
113
114       reqStr.append("Accept-Encoding: x-gzip, gzip\r\n");
115
116       if ((Http.AGENT_STRING == null) || (Http.AGENT_STRING.length() == 0)) {
117         Http.LOG.severe("User-agent is not set!");
118       } else {
119         reqStr.append("User-Agent: ");
120         reqStr.append(Http.AGENT_STRING);
121         reqStr.append("\r\n");
122       }
123
124       reqStr.append("\r\n");
125       byte[] reqBytes= reqStr.toString().getBytes();
126
127       req.write(reqBytes);
128       req.flush();
129         
130       PushbackInputStream JavaDoc in = // process response
131
new PushbackInputStream JavaDoc(
132           new BufferedInputStream JavaDoc(socket.getInputStream(), Http.BUFFER_SIZE),
133           Http.BUFFER_SIZE) ;
134
135       StringBuffer JavaDoc line = new StringBuffer JavaDoc();
136
137       boolean haveSeenNonContinueStatus= false;
138       while (!haveSeenNonContinueStatus) {
139         // parse status code line
140
this.code = parseStatusLine(in, line);
141         // parse headers
142
headers.putAll(parseHeaders(in, line));
143         haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
144
}
145
146       readPlainContent(in);
147
148       String JavaDoc contentEncoding= getHeader("Content-Encoding");
149       if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
150         Http.LOG.fine("uncompressing....");
151         byte[] compressed = content;
152
153         content = GZIPUtils.unzipBestEffort(compressed, Http.MAX_CONTENT);
154
155         if (content == null)
156           throw new HttpException("unzipBestEffort returned null");
157
158         if (Http.LOG.isLoggable(Level.FINE))
159           Http.LOG.fine("fetched " + compressed.length
160                         + " bytes of compressed content (expanded to "
161                         + content.length + " bytes) from " + url);
162       } else {
163         if (Http.LOG.isLoggable(Level.FINE))
164           Http.LOG.fine("fetched " + content.length + " bytes from " + url);
165       }
166
167     } finally {
168       if (socket != null)
169         socket.close();
170     }
171
172   }
173
174   private void readPlainContent(InputStream JavaDoc in)
175     throws HttpException, IOException JavaDoc {
176
177     int contentLength = Integer.MAX_VALUE; // get content length
178
String JavaDoc contentLengthString = (String JavaDoc)headers.get("Content-Length");
179     if (contentLengthString != null) {
180       contentLengthString = contentLengthString.trim();
181       try {
182         contentLength = Integer.parseInt(contentLengthString);
183       } catch (NumberFormatException JavaDoc e) {
184         throw new HttpException("bad content length: "+contentLengthString);
185       }
186     }
187     if (Http.MAX_CONTENT >= 0
188       && contentLength > Http.MAX_CONTENT) // limit download size
189
contentLength = Http.MAX_CONTENT;
190
191     ByteArrayOutputStream JavaDoc out = new ByteArrayOutputStream JavaDoc(Http.BUFFER_SIZE);
192     byte[] bytes = new byte[Http.BUFFER_SIZE];
193     int length = 0; // read content
194
for (int i = in.read(bytes); i != -1; i = in.read(bytes)) {
195
196       out.write(bytes, 0, i);
197       length += i;
198       if (length >= contentLength)
199         break;
200     }
201     content = out.toByteArray();
202   }
203
204   private void readChunkedContent(PushbackInputStream JavaDoc in,
205                                   StringBuffer JavaDoc line)
206     throws HttpException, IOException JavaDoc {
207     boolean doneChunks= false;
208     int contentBytesRead= 0;
209     byte[] bytes = new byte[Http.BUFFER_SIZE];
210     ByteArrayOutputStream JavaDoc out = new ByteArrayOutputStream JavaDoc(Http.BUFFER_SIZE);
211
212     while (!doneChunks) {
213       Http.LOG.fine("Http: starting chunk");
214
215       readLine(in, line, false);
216
217       String JavaDoc chunkLenStr;
218       // LOG.fine("chunk-header: '" + line + "'");
219

220       int pos= line.indexOf(";");
221       if (pos < 0) {
222         chunkLenStr= line.toString();
223       } else {
224         chunkLenStr= line.substring(0, pos);
225         // LOG.fine("got chunk-ext: " + line.substring(pos+1));
226
}
227       chunkLenStr= chunkLenStr.trim();
228       int chunkLen;
229       try {
230         chunkLen= Integer.parseInt(chunkLenStr, 16);
231       } catch (NumberFormatException JavaDoc e){
232         throw new HttpException("bad chunk length: "+line.toString());
233       }
234
235       if (chunkLen == 0) {
236         doneChunks= true;
237         break;
238       }
239
240       if ( (contentBytesRead + chunkLen) > Http.MAX_CONTENT )
241         chunkLen= Http.MAX_CONTENT - contentBytesRead;
242
243       // read one chunk
244
int chunkBytesRead= 0;
245       while (chunkBytesRead < chunkLen) {
246
247         int toRead= (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ?
248                     (chunkLen - chunkBytesRead) : Http.BUFFER_SIZE;
249         int len= in.read(bytes, 0, toRead);
250
251         if (len == -1)
252           throw new HttpException("chunk eof after " + contentBytesRead
253                                       + " bytes in successful chunks"
254                                       + " and " + chunkBytesRead
255                                       + " in current chunk");
256
257         // DANGER!!! Will printed GZIPed stuff right to your
258
// terminal!
259
// LOG.fine("read: " + new String(bytes, 0, len));
260

261         out.write(bytes, 0, len);
262         chunkBytesRead+= len;
263       }
264
265       readLine(in, line, false);
266
267     }
268
269     if (!doneChunks) {
270       if (contentBytesRead != Http.MAX_CONTENT)
271         throw new HttpException("chunk eof: !doneChunk && didn't max out");
272       return;
273     }
274
275     content = out.toByteArray();
276     parseHeaders(in, line);
277
278   }
279
280   private int parseStatusLine(PushbackInputStream JavaDoc in, StringBuffer JavaDoc line)
281     throws IOException JavaDoc, HttpException {
282     readLine(in, line, false);
283
284     int codeStart = line.indexOf(" ");
285     int codeEnd = line.indexOf(" ", codeStart+1);
286
287     // handle lines with no plaintext result code, ie:
288
// "HTTP/1.1 200" vs "HTTP/1.1 200 OK"
289
if (codeEnd == -1)
290       codeEnd= line.length();
291
292     int code;
293     try {
294       code= Integer.parseInt(line.substring(codeStart+1, codeEnd));
295     } catch (NumberFormatException JavaDoc e) {
296       throw new HttpException("bad status line '" + line
297                               + "': " + e.getMessage(), e);
298     }
299
300     return code;
301   }
302
303
304   private void processHeaderLine(StringBuffer JavaDoc line, TreeMap JavaDoc headers)
305     throws IOException JavaDoc, HttpException {
306     int colonIndex = line.indexOf(":"); // key is up to colon
307
if (colonIndex == -1) {
308       int i;
309       for (i= 0; i < line.length(); i++)
310         if (!Character.isWhitespace(line.charAt(i)))
311           break;
312       if (i == line.length())
313         return;
314       throw new HttpException("No colon in header:" + line);
315     }
316     String JavaDoc key = line.substring(0, colonIndex);
317
318     int valueStart = colonIndex+1; // skip whitespace
319
while (valueStart < line.length()) {
320       int c = line.charAt(valueStart);
321       if (c != ' ' && c != '\t')
322         break;
323       valueStart++;
324     }
325     String JavaDoc value = line.substring(valueStart);
326
327     headers.put(key, value);
328   }
329
330   private Map JavaDoc parseHeaders(PushbackInputStream JavaDoc in, StringBuffer JavaDoc line)
331     throws IOException JavaDoc, HttpException {
332     TreeMap JavaDoc headers = new TreeMap JavaDoc(String.CASE_INSENSITIVE_ORDER);
333     return parseHeaders(in, line, headers);
334   }
335
336   // Adds headers to an existing TreeMap
337
private Map JavaDoc parseHeaders(PushbackInputStream JavaDoc in, StringBuffer JavaDoc line,
338                            TreeMap JavaDoc headers)
339     throws IOException JavaDoc, HttpException {
340     while (readLine(in, line, true) != 0) {
341
342       // handle HTTP responses with missing blank line after headers
343
int pos;
344       if ( ((pos= line.indexOf("<!DOCTYPE")) != -1)
345            || ((pos= line.indexOf("<HTML")) != -1)
346            || ((pos= line.indexOf("<html")) != -1) ) {
347
348         in.unread(line.substring(pos).getBytes("UTF-8"));
349         line.setLength(pos);
350
351         try {
352           processHeaderLine(line, headers);
353         } catch (Exception JavaDoc e) {
354           // fixme:
355
e.printStackTrace();
356         }
357
358         return headers;
359       }
360
361       processHeaderLine(line, headers);
362     }
363     return headers;
364   }
365
366   private static int readLine(PushbackInputStream JavaDoc in, StringBuffer JavaDoc line,
367                       boolean allowContinuedLine)
368     throws IOException JavaDoc {
369     line.setLength(0);
370     for (int c = in.read(); c != -1; c = in.read()) {
371       switch (c) {
372         case '\r':
373           if (peek(in) == '\n') {
374             in.read();
375           }
376         case '\n':
377           if (line.length() > 0) {
378             // at EOL -- check for continued line if the current
379
// (possibly continued) line wasn't blank
380
if (allowContinuedLine)
381               switch (peek(in)) {
382                 case ' ' : case '\t': // line is continued
383
in.read();
384                   continue;
385               }
386           }
387           return line.length(); // else complete
388
default :
389           line.append((char)c);
390       }
391     }
392     throw new EOFException JavaDoc();
393   }
394
395   private static int peek(PushbackInputStream JavaDoc in) throws IOException JavaDoc {
396     int value = in.read();
397     in.unread(value);
398     return value;
399   }
400
401 }
402
Popular Tags