1 25 package org.archive.io; 26 27 import it.unimi.dsi.fastutil.io.RepositionableStream; 28 29 import java.io.ByteArrayOutputStream ; 30 import java.io.IOException ; 31 import java.io.InputStream ; 32 import java.util.Iterator ; 33 import java.util.zip.Deflater ; 34 import java.util.zip.GZIPInputStream ; 35 import java.util.zip.GZIPOutputStream ; 36 import java.util.zip.Inflater ; 37 38 39 64 public class GzippedInputStream 65 extends GZIPInputStream 66 implements RepositionableStream { 67 70 private static final int GZIP_TRAILER_LENGTH = 8; 71 72 76 private final GzipHeader gzipHeader = new GzipHeader(); 77 78 81 private static final int LINUX_PAGE_SIZE = 4 * 1024; 82 83 private final long initialOffset; 84 85 public GzippedInputStream(InputStream is) throws IOException { 86 this(is, LINUX_PAGE_SIZE); 88 } 89 90 98 public GzippedInputStream(final InputStream is, final int size) 99 throws IOException { 100 super(checkStream(is), size); 101 if (!is.markSupported()) { 102 throw new IllegalArgumentException ("GzippedInputStream requires " + 103 "a markable stream"); 104 } 105 if (!(is instanceof RepositionableStream)) { 106 throw new IllegalArgumentException ("GzippedInputStream requires " + 107 "a stream that implements RepositionableStream"); 108 } 109 long afterGZIPHeader = ((RepositionableStream)is).position(); 126 is.reset(); 127 this.initialOffset = ((RepositionableStream)is).position(); 128 ((RepositionableStream)is).position(afterGZIPHeader); 129 } 130 131 protected static InputStream checkStream(final InputStream is) 132 throws IOException { 133 if (is instanceof RepositionableStream) { 134 is.mark(GzipHeader.MINIMAL_GZIP_HEADER_LENGTH * 4); 138 return is; 139 } 140 throw new IOException ("Passed stream does not" + 141 " implement PositionableStream"); 142 } 143 144 153 public long gotoEOR(int ignore) throws IOException { 154 long bytesSkipped = 0; 155 if (this.inf.getTotalIn() <= 0) { 156 return bytesSkipped; 157 } 158 if (!this.inf.finished()) { 159 int read = 0; 160 while ((read = read()) != -1) { 161 if ((byte)read == (byte)ignore) { 162 continue; 163 } 164 bytesSkipped = gotoEOR() + 1; 165 break; 166 } 167 } 168 return bytesSkipped; 169 } 170 171 178 public long gotoEOR() throws IOException { 179 long bytesSkipped = 0; 180 if (this.inf.getTotalIn() <= 0) { 181 return bytesSkipped; 182 } 183 while(!this.inf.finished()) { 184 bytesSkipped += skip(Long.MAX_VALUE); 185 } 186 return bytesSkipped; 187 } 188 189 195 public Iterator iterator() { 196 try { 197 ((RepositionableStream)this.in).position(this.initialOffset); 203 } catch (IOException e) { 204 throw new RuntimeException (e); 205 } 206 return new Iterator () { 207 private GzippedInputStream compressedStream = 208 GzippedInputStream.this; 209 210 public boolean hasNext() { 211 try { 212 gotoEOR(); 213 } catch (IOException e) { 214 throw new RuntimeException (e); 215 } 216 return moveToNextGzipMember(); 217 } 218 219 222 public Object next() { 223 try { 224 gzipMemberSeek(); 225 } catch (IOException e) { 226 throw new RuntimeException ("Failed move to EOR or " + 227 "failed header read: " + e.getMessage()); 228 } 229 return this.compressedStream; 230 } 231 232 public void remove() { 233 throw new UnsupportedOperationException (); 234 } 235 }; 236 } 237 238 241 protected boolean moveToNextGzipMember() { 242 boolean result = false; 243 try { 248 RepositionableStream ps = (RepositionableStream)getInputStream(); 249 if (getInflater().getRemaining() > GZIP_TRAILER_LENGTH) { 254 ps.position(position() - getInflater().getRemaining() + 255 GZIP_TRAILER_LENGTH); 256 } 257 for (int read = -1, headerRead = 0; true; headerRead = 0) { 258 getInputStream().mark(3); 261 if ((read = getInputStream().read()) == -1) { 262 break; 263 } 264 if(compareBytes(read, GZIPInputStream.GZIP_MAGIC)) { 265 headerRead++; 266 if ((read = getInputStream().read()) == -1) { 267 break; 268 } 269 if(compareBytes(read, GZIPInputStream.GZIP_MAGIC >> 8)) { 270 headerRead++; 271 if ((read = getInputStream().read()) == -1) { 272 break; 273 } 274 if (compareBytes(read, Deflater.DEFLATED)) { 275 headerRead++; 276 getInputStream().reset(); 279 result = true; 280 break; 281 } 282 } 283 ps.position(ps.position() - headerRead); 286 } 287 } 288 } catch (IOException e) { 289 throw new RuntimeException ("Failed i/o: " + e.getMessage()); 290 } 291 return result; 292 } 293 294 protected boolean compareBytes(final int a, final int b) { 295 return ((byte)(a & 0xff)) == ((byte)(b & 0xff)); 296 } 297 298 protected Inflater getInflater() { 299 return this.inf; 300 } 301 302 protected InputStream getInputStream() { 303 return this.in; 304 } 305 306 protected GzipHeader getGzipHeader() { 307 return this.gzipHeader; 308 } 309 310 313 protected void resetInflater() { 314 this.eos = false; 315 this.inf.reset(); 316 } 317 318 322 protected void readHeader() throws IOException { 323 new GzipHeader(this.in); 324 this.crc.reset(); 326 } 327 328 338 public void position(long position) throws IOException { 339 ((RepositionableStream)this.in).position(position); 340 resetInflater(); 341 } 342 343 public long position() throws IOException { 344 return ((RepositionableStream)this.in).position(); 345 } 346 347 356 public void gzipMemberSeek(long position) throws IOException { 357 position(position); 358 readHeader(); 359 } 360 361 public void gzipMemberSeek() throws IOException { 362 gzipMemberSeek(position()); 363 } 364 365 372 public static byte [] gzip(byte [] bytes) throws IOException { 373 ByteArrayOutputStream baos = new ByteArrayOutputStream (); 374 GZIPOutputStream gzipOS = new GZIPOutputStream (baos); 375 gzipOS.write(bytes, 0, bytes.length); 376 gzipOS.close(); 377 return baos.toByteArray(); 378 } 379 380 387 public static boolean isCompressedRepositionableStream( 388 final RepositionableStream rs) 389 throws IOException { 390 boolean result = false; 391 long p = rs.position(); 392 try { 393 result = isCompressedStream((InputStream )rs); 394 } finally { 395 rs.position(p); 396 } 397 return result; 398 } 399 400 407 public static boolean isCompressedStream(final InputStream is) 408 throws IOException { 409 try { 410 new GzipHeader(is); 411 } catch (NoGzipMagicException e) { 412 return false; 413 } 414 return true; 415 } 416 } 417 | Popular Tags |