1 25 package org.archive.io.arc; 26 27 import java.io.File ; 28 import java.io.FileInputStream ; 29 import java.io.IOException ; 30 import java.io.InputStream ; 31 import java.net.MalformedURLException ; 32 import java.net.URL ; 33 import java.util.Iterator ; 34 import java.util.logging.Level ; 35 36 import org.archive.io.ArchiveReader; 37 import org.archive.io.ArchiveReaderFactory; 38 import org.archive.io.ArchiveRecord; 39 import org.archive.io.ArchiveRecordHeader; 40 import org.archive.io.GzipHeader; 41 import org.archive.io.GzippedInputStream; 42 import org.archive.io.NoGzipMagicException; 43 import org.archive.util.FileUtils; 44 45 46 53 public class ARCReaderFactory extends ArchiveReaderFactory 54 implements ARCConstants { 55 58 private static final ARCReaderFactory factory = new ARCReaderFactory(); 59 60 63 protected ARCReaderFactory() { 64 super(); 65 } 66 67 public static ARCReader get(String arcFileOrUrl) 68 throws MalformedURLException , IOException { 69 return (ARCReader)ARCReaderFactory.factory. 70 getArchiveReader(arcFileOrUrl); 71 } 72 73 public static ARCReader get(String arcFileOrUrl, final long offset) 74 throws MalformedURLException , IOException { 75 return (ARCReader)ARCReaderFactory.factory. 76 getArchiveReader(arcFileOrUrl, offset); 77 } 78 79 public static ARCReader get(final File f) throws IOException { 80 return (ARCReader)ARCReaderFactory.factory.getArchiveReader(f); 81 } 82 83 public static ARCReader get(final File f, final long offset) 84 throws IOException { 85 return (ARCReader)ARCReaderFactory.factory.getArchiveReader(f, offset); 86 } 87 88 protected ArchiveReader getArchiveReader(final File f, final long offset) 89 throws IOException { 90 return getArchiveReader(f, true, offset); 91 } 92 93 103 public static ARCReader get(final File f, 104 final boolean skipSuffixTest, final long offset) 105 throws IOException { 106 return (ARCReader)ARCReaderFactory.factory.getArchiveReader(f, 107 skipSuffixTest, offset); 108 } 109 110 protected ArchiveReader getArchiveReader(final File arcFile, 111 final boolean skipSuffixTest, final long offset) 112 throws IOException { 113 boolean compressed = testCompressedARCFile(arcFile, skipSuffixTest); 114 if (!compressed) { 115 if (!FileUtils.isReadableWithExtensionAndMagic(arcFile, 116 ARC_FILE_EXTENSION, ARC_MAGIC_NUMBER)) { 117 throw new IOException (arcFile.getAbsolutePath() + 118 " is not an Internet Archive ARC file."); 119 } 120 } 121 return compressed? 122 (ARCReader)ARCReaderFactory.factory. 123 new CompressedARCReader(arcFile, offset): 124 (ARCReader)ARCReaderFactory.factory. 125 new UncompressedARCReader(arcFile, offset); 126 } 127 128 public static ArchiveReader get(final String s, final InputStream is, 129 final boolean atFirstRecord) 130 throws IOException { 131 return ARCReaderFactory.factory.getArchiveReader(s, is, 132 atFirstRecord); 133 } 134 135 protected ArchiveReader getArchiveReader(final String arc, 136 final InputStream is, final boolean atFirstRecord) 137 throws IOException { 138 return new CompressedARCReader(arc, is, atFirstRecord); 141 } 142 143 153 public static ARCReader get(final URL arcUrl, final long offset) 154 throws IOException { 155 return (ARCReader)ARCReaderFactory.factory.getArchiveReader(arcUrl, 156 offset); 157 } 158 159 169 public static ARCReader get(final URL arcUrl) 170 throws IOException { 171 return (ARCReader)ARCReaderFactory.factory.getArchiveReader(arcUrl); 172 } 173 174 179 public boolean isCompressed(File arcFile) throws IOException { 180 return testCompressedARCFile(arcFile); 181 } 182 183 195 public static boolean testCompressedARCFile(File arcFile) 196 throws IOException { 197 return testCompressedARCFile(arcFile, false); 198 } 199 200 213 public static boolean testCompressedARCFile(File arcFile, 214 boolean skipSuffixCheck) 215 throws IOException { 216 boolean compressedARCFile = false; 217 FileUtils.isReadable(arcFile); 218 if(!skipSuffixCheck && !arcFile.getName().toLowerCase() 219 .endsWith(COMPRESSED_ARC_FILE_EXTENSION)) { 220 return compressedARCFile; 221 } 222 223 final InputStream is = new FileInputStream (arcFile); 224 try { 225 compressedARCFile = testCompressedARCStream(is); 226 } finally { 227 is.close(); 228 } 229 return compressedARCFile; 230 } 231 232 public static boolean isARCSuffix(final String arcName) { 233 return (arcName == null)? 234 false: 235 (arcName.toLowerCase().endsWith(DOT_COMPRESSED_ARC_FILE_EXTENSION))? 236 true: 237 (arcName.toLowerCase().endsWith(DOT_ARC_FILE_EXTENSION))? 238 true: false; 239 } 240 241 248 public static boolean testCompressedARCStream(final InputStream is) 249 throws IOException { 250 boolean compressedARCFile = false; 251 GzipHeader gh = null; 252 try { 253 gh = new GzipHeader(is); 254 } catch (NoGzipMagicException e ) { 255 return compressedARCFile; 256 } 257 258 byte[] fextra = gh.getFextra(); 259 if (fextra != null && 264 ARC_GZIP_EXTRA_FIELD.length - 2 == fextra.length) { 265 compressedARCFile = true; 266 for (int i = 0; i < fextra.length; i++) { 267 if (fextra[i] != ARC_GZIP_EXTRA_FIELD[i + 2]) { 268 compressedARCFile = false; 269 break; 270 } 271 } 272 } 273 return compressedARCFile; 274 } 275 276 280 private class UncompressedARCReader extends ARCReader { 281 286 public UncompressedARCReader(final File f) 287 throws IOException { 288 this(f, 0); 289 } 290 291 298 public UncompressedARCReader(final File f, final long offset) 299 throws IOException { 300 setIn(getInputStream(f, offset)); 303 initialize(f.getAbsolutePath()); 304 } 305 306 312 public UncompressedARCReader(final String f, final InputStream is) { 313 setIn(is); 316 initialize(f); 317 } 318 } 319 320 325 private class CompressedARCReader extends ARCReader { 326 327 334 public CompressedARCReader(final File f) throws IOException { 335 this(f, 0); 336 } 337 338 345 public CompressedARCReader(final File f, final long offset) 346 throws IOException { 347 setIn(new GzippedInputStream(getInputStream(f, offset))); 350 setCompressed((offset == 0)); 351 initialize(f.getAbsolutePath()); 352 } 353 354 361 public CompressedARCReader(final String f, final InputStream is, 362 final boolean atFirstRecord) 363 throws IOException { 364 setIn(new GzippedInputStream(is)); 367 setCompressed(true); 368 setAlignedOnFirstRecord(atFirstRecord); 369 initialize(f); 370 } 371 372 380 public ARCRecord get(long offset) throws IOException { 381 cleanupCurrentRecord(); 382 ((GzippedInputStream)getIn()).gzipMemberSeek(offset); 383 return createArchiveRecord(getIn(), offset); 384 } 385 386 public Iterator <ArchiveRecord> iterator() { 387 391 return new ArchiveRecordIterator() { 392 private GzippedInputStream gis = 393 (GzippedInputStream)getInputStream(); 394 395 private Iterator gzipIterator = this.gis.iterator(); 396 397 protected boolean innerHasNext() { 398 return this.gzipIterator.hasNext(); 399 } 400 401 protected ArchiveRecord innerNext() throws IOException { 402 long p = this.gis.position(); 405 InputStream is = (InputStream ) this.gzipIterator.next(); 406 return createArchiveRecord(is, p); 407 } 408 }; 409 } 410 411 protected void gotoEOR(ArchiveRecord rec) throws IOException { 412 long skipped = ((GzippedInputStream)getIn()). 413 gotoEOR(LINE_SEPARATOR); 414 if (skipped <= 0) { 415 return; 416 } 417 ArchiveRecordHeader meta = (getCurrentRecord() != null)? 420 rec.getHeader(): null; 421 String message = "Record ENDING at " + 422 ((GzippedInputStream)getIn()).position() + 423 " has " + skipped + " trailing byte(s): " + 424 ((meta != null)? meta.toString(): ""); 425 if (isStrict()) { 426 throw new IOException (message); 427 } 428 logStdErr(Level.WARNING, message); 429 } 430 } 431 } | Popular Tags |