1 25 package org.archive.io.arc; 26 27 import java.io.ByteArrayInputStream ; 28 import java.io.ByteArrayOutputStream ; 29 import java.io.IOException ; 30 import java.io.InputStream ; 31 32 import org.apache.commons.httpclient.Header; 33 import org.apache.commons.httpclient.HttpParser; 34 import org.apache.commons.httpclient.StatusLine; 35 import org.apache.commons.httpclient.util.EncodingUtil; 36 import org.archive.io.ArchiveRecord; 37 import org.archive.io.ArchiveRecordHeader; 38 39 40 45 public class ARCRecord extends ArchiveRecord implements ARCConstants { 46 51 private StatusLine httpStatus = null; 52 53 59 private InputStream httpHeaderStream = null; 60 61 66 private Header [] httpHeaders = null; 67 68 69 75 private static final long MIN_HTTP_HEADER_LENGTH = 76 "HTTP/1.1 200 OK\r\n".length(); 77 78 86 public ARCRecord(InputStream in, ArchiveRecordHeader metaData) 87 throws IOException { 88 this(in, metaData, 0, true, false, true); 89 } 90 91 106 public ARCRecord(InputStream in, ArchiveRecordHeader metaData, 107 int bodyOffset, boolean digest, boolean strict, 108 final boolean parseHttpHeaders) 109 throws IOException { 110 super(in, metaData, bodyOffset, digest, strict); 111 if (parseHttpHeaders) { 112 this.httpHeaderStream = readHttpHeader(); 113 } 114 } 115 116 131 public void skipHttpHeader() throws IOException { 132 if (this.httpHeaderStream != null) { 133 for (int available = this.httpHeaderStream.available(); 135 this.httpHeaderStream != null && 136 (available = this.httpHeaderStream.available()) > 0;) { 137 byte [] buffer = new byte[available]; 140 read(buffer, 0, available); 143 } 144 } 145 } 146 147 public void dumpHttpHeader() throws IOException { 148 if (this.httpHeaderStream == null) { 149 return; 150 } 151 for (int available = this.httpHeaderStream.available(); 153 this.httpHeaderStream != null 154 && (available = this.httpHeaderStream.available()) > 0;) { 155 byte[] buffer = new byte[available]; 158 int read = read(buffer, 0, available); 161 System.out.write(buffer, 0, read); 162 } 163 } 164 165 173 private InputStream readHttpHeader() throws IOException { 174 if(!getHeader().getUrl().startsWith("http") || 177 getHeader().getLength() <= MIN_HTTP_HEADER_LENGTH) { 178 return null; 179 } 180 byte [] statusBytes = HttpParser.readRawLine(getIn()); 181 int eolCharCount = getEolCharsCount(statusBytes); 182 if (eolCharCount <= 0) { 183 throw new IOException ("Failed to read http status where one " + 184 " was expected: " + new String (statusBytes)); 185 } 186 String statusLine = EncodingUtil.getString(statusBytes, 0, 187 statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); 188 if ((statusLine == null) || 189 !StatusLine.startsWithHTTP(statusLine)) { 190 throw new IOException ("Failed parse of http status line."); 191 } 192 this.httpStatus = new StatusLine(statusLine); 193 194 ByteArrayOutputStream baos = 199 new ByteArrayOutputStream (statusBytes.length + 4 * 1024); 200 baos.write(statusBytes); 201 202 for (byte [] lineBytes = null; true;) { 205 lineBytes = HttpParser.readRawLine(getIn()); 206 eolCharCount = getEolCharsCount(lineBytes); 207 if (eolCharCount <= 0) { 208 throw new IOException ("Failed reading http headers: " + 209 ((lineBytes != null)? new String (lineBytes): null)); 210 } 211 baos.write(lineBytes); 213 if ((lineBytes.length - eolCharCount) <= 0) { 214 break; 216 } 217 } 218 219 byte [] headerBytes = baos.toByteArray(); 220 this.getMetaData().setContentBegin(headerBytes.length); 222 ByteArrayInputStream bais = 223 new ByteArrayInputStream (headerBytes); 224 if (!bais.markSupported()) { 225 throw new IOException ("ByteArrayInputStream does not support mark"); 226 } 227 bais.mark(headerBytes.length); 228 bais.read(statusBytes, 0, statusBytes.length); 231 this.httpHeaders = HttpParser.parseHeaders(bais, 232 ARCConstants.DEFAULT_ENCODING); 233 this.getMetaData().setStatusCode(Integer.toString(getStatusCode())); 234 bais.reset(); 235 return bais; 236 } 237 238 244 public int getStatusCode() { 245 return (this.httpStatus == null)? -1: this.httpStatus.getStatusCode(); 246 } 247 248 252 private int getEolCharsCount(byte [] bytes) { 253 int count = 0; 254 if (bytes != null && bytes.length >=1 && 255 bytes[bytes.length - 1] == '\n') { 256 count++; 257 if (bytes.length >=2 && bytes[bytes.length -2] == '\r') { 258 count++; 259 } 260 } 261 return count; 262 } 263 264 267 public ARCRecordMetaData getMetaData() { 268 return (ARCRecordMetaData)getHeader(); 269 } 270 271 274 public Header [] getHttpHeaders() { 275 return this.httpHeaders; 276 } 277 278 283 public int read() throws IOException { 284 int c = -1; 285 if (this.httpHeaderStream != null && 286 (this.httpHeaderStream.available() > 0)) { 287 c = this.httpHeaderStream.read(); 290 if (this.httpHeaderStream.available() <= 0) { 292 this.httpHeaderStream = null; 293 } 294 incrementPosition(); 295 } else { 296 c = super.read(); 297 } 298 return c; 299 } 300 301 public int read(byte [] b, int offset, int length) throws IOException { 302 int read = -1; 303 if (this.httpHeaderStream != null && 304 (this.httpHeaderStream.available() > 0)) { 305 read = Math.min(length, this.httpHeaderStream.available()); 308 if (read == 0) { 309 read = -1; 310 } else { 311 read = this.httpHeaderStream.read(b, offset, read); 312 } 313 if (this.httpHeaderStream.available() <= 0) { 315 this.httpHeaderStream = null; 316 } 317 incrementPosition(read); 318 } else { 319 read = super.read(b, offset, length); 320 } 321 return read; 322 } 323 324 330 public int getBodyOffset() { 331 return this.getMetaData().getContentBegin(); 332 } 333 334 @Override 335 protected String getIp4Cdx(ArchiveRecordHeader h) { 336 String result = null; 337 if (h instanceof ARCRecordMetaData) { 338 result = ((ARCRecordMetaData)h).getIp(); 339 } 340 return (result != null)? result: super.getIp4Cdx(h); 341 } 342 343 @Override 344 protected String getStatusCode4Cdx(ArchiveRecordHeader h) { 345 String result = null; 346 if (h instanceof ARCRecordMetaData) { 347 result = ((ARCRecordMetaData) h).getStatusCode(); 348 } 349 return (result != null) ? result: super.getStatusCode4Cdx(h); 350 } 351 352 @Override 353 protected String getDigest4Cdx(ArchiveRecordHeader h) { 354 String result = null; 355 if (h instanceof ARCRecordMetaData) { 356 result = ((ARCRecordMetaData) h).getDigest(); 357 } 358 return (result != null) ? result: super.getDigest4Cdx(h); 359 } 360 } | Popular Tags |