1 23 package org.archive.io; 24 25 import it.unimi.dsi.fastutil.io.RepositionableStream; 26 27 import java.io.BufferedInputStream ; 28 import java.io.BufferedWriter ; 29 import java.io.File ; 30 import java.io.FileWriter ; 31 import java.io.IOException ; 32 import java.io.InputStream ; 33 import java.util.ArrayList ; 34 import java.util.Iterator ; 35 import java.util.List ; 36 import java.util.logging.Level ; 37 import java.util.logging.Logger ; 38 39 import org.archive.util.MimetypeUtils; 40 41 42 47 public abstract class ArchiveReader implements ArchiveFileConstants { 48 51 private boolean compressed = false; 52 53 56 private boolean digest = true; 57 58 61 private boolean strict = false; 62 63 71 private InputStream in = null; 72 73 78 public static final int MAX_ALLOWED_RECOVERABLES = 10; 79 80 81 87 private ArchiveRecord currentRecord = null; 88 89 93 private String identifier = null; 94 95 98 private String version = null; 99 100 101 protected ArchiveReader() { 102 super(); 103 } 104 105 109 protected void initialize(final String i) { 110 setReaderIdentifier(i); 111 } 112 113 122 protected InputStream getInputStream(final File f, final long offset) 123 throws IOException { 124 return new RandomAccessBufferedInputStream( 125 new RandomAccessInputStream(f, offset)); 126 } 127 128 public boolean isCompressed() { 129 return this.compressed; 130 } 131 132 139 public ArchiveRecord get(long offset) throws IOException { 140 cleanupCurrentRecord(); 141 RepositionableStream ps = (RepositionableStream)this.in; 142 long currentOffset = ps.position(); 143 if (currentOffset != offset) { 144 currentOffset = offset; 145 ps.position(offset); 146 } 147 return createArchiveRecord(this.in, currentOffset); 148 } 149 150 154 public ArchiveRecord get() throws IOException { 155 return createArchiveRecord(this.in, 156 ((RepositionableStream)this.in).position()); 157 } 158 159 public void close() throws IOException { 160 if (this.in != null) { 161 this.in.close(); 162 this.in = null; 163 } 164 } 165 166 170 protected void rewind() throws IOException { 171 cleanupCurrentRecord(); 172 if (this.in instanceof RepositionableStream) { 173 try { 174 ((RepositionableStream)this.in).position(0); 175 } catch (IOException e) { 176 throw new RuntimeException (e); 177 } 178 } else { 179 throw new IOException ("Stream is not resettable."); 180 } 181 } 182 183 187 protected void cleanupCurrentRecord() throws IOException { 188 if (this.currentRecord != null) { 189 this.currentRecord.close(); 190 gotoEOR(this.currentRecord); 191 this.currentRecord = null; 192 } 193 } 194 195 203 protected abstract ArchiveRecord createArchiveRecord(InputStream is, 204 long offset) 205 throws IOException ; 206 207 213 protected abstract void gotoEOR(ArchiveRecord record) throws IOException ; 214 215 public abstract String getFileExtension(); 216 public abstract String getDotFileExtension(); 217 218 221 public String getVersion() { 222 return this.version; 223 } 224 225 236 public List validate() throws IOException { 237 return validate(-1); 238 } 239 240 256 public List validate(int noRecords) throws IOException { 257 List <ArchiveRecordHeader> hs = new ArrayList <ArchiveRecordHeader>(); 258 int count = 0; 259 setStrict(true); 260 for (Iterator <ArchiveRecord> i = iterator(); i.hasNext();) { 261 count++; 262 ArchiveRecord r = i.next(); 263 if (r.getHeader().getLength() <= 0 264 && r.getHeader().getMimetype(). 265 equals(MimetypeUtils.NO_TYPE_MIMETYPE)) { 266 throw new IOException ("ARCRecord content is empty."); 267 } 268 r.close(); 269 hs.add(r.getHeader()); 271 } 272 273 if (noRecords != -1) { 274 if (count != noRecords) { 275 throw new IOException ("Count of records, " + 276 Integer.toString(count) + " is less than expected " + 277 Integer.toString(noRecords)); 278 } 279 } 280 281 return hs; 282 } 283 284 290 public boolean isValid() { 291 boolean valid = false; 292 try { 293 validate(); 294 valid = true; 295 } catch(Exception e) { 296 valid = false; 298 } 299 300 return valid; 301 } 302 303 306 public boolean isStrict() { 307 return this.strict; 308 } 309 310 313 public void setStrict(boolean s) { 314 this.strict = s; 315 } 316 317 320 public void setDigest(boolean d) { 321 this.digest = d; 322 } 323 324 327 public boolean isDigest() { 328 return this.digest; 329 } 330 331 protected Logger getLogger() { 332 return Logger.getLogger(this.getClass().getName()); 333 } 334 335 protected InputStream getInputStream() { 336 return this.in; 337 } 338 339 342 public Iterator <ArchiveRecord> iterator() { 343 try { 345 cleanupCurrentRecord(); 346 } catch (IOException e) { 347 throw new RuntimeException (e); 348 } 349 350 try { 352 rewind(); 353 } catch (IOException e) { 354 throw new RuntimeException (e); 355 } 356 return new ArchiveRecordIterator(); 357 } 358 359 protected void setCompressed(boolean compressed) { 360 this.compressed = compressed; 361 } 362 363 368 protected ArchiveRecord getCurrentRecord() { 369 return this.currentRecord; 370 } 371 372 protected ArchiveRecord currentRecord(final ArchiveRecord currentRecord) { 373 this.currentRecord = currentRecord; 374 return currentRecord; 375 } 376 377 protected InputStream getIn() { 378 return in; 379 } 380 381 protected void setIn(InputStream in) { 382 this.in = in; 383 } 384 385 protected void setVersion(String version) { 386 this.version = version; 387 } 388 389 public String getReaderIdentifier() { 390 return this.identifier; 391 } 392 393 protected void setReaderIdentifier(final String i) { 394 this.identifier = i; 395 } 396 397 408 public void logStdErr(Level level, String message) { 409 System.err.println(level.toString() + " " + message); 410 } 411 412 415 protected class RandomAccessBufferedInputStream 416 extends BufferedInputStream implements RepositionableStream { 417 418 public RandomAccessBufferedInputStream(RandomAccessInputStream is) 419 throws IOException { 420 super(is); 421 } 422 423 public RandomAccessBufferedInputStream(RandomAccessInputStream is, int size) 424 throws IOException { 425 super(is, size); 426 } 427 428 public long position() throws IOException { 429 return ((RandomAccessInputStream)this.in).position() - 432 (this.count - this.pos); 433 } 434 435 public void position(long position) throws IOException { 436 this.pos = 0; 438 this.count = 0; 439 ((RandomAccessInputStream)this.in).position(position); 440 } 441 } 442 443 449 protected class ArchiveRecordIterator implements Iterator <ArchiveRecord> { 450 456 public boolean hasNext() { 457 try { 460 cleanupCurrentRecord(); 461 } catch (IOException e) { 462 throw new RuntimeException (e); 463 } 464 return innerHasNext(); 465 } 466 467 protected boolean innerHasNext() { 468 long offset = -1; 469 try { 470 offset = ((RepositionableStream)getInputStream()).position(); 471 return getInputStream().available() > 0; 472 } catch (IOException e) { 473 throw new RuntimeException ("Offset " + offset, e); 474 } 475 } 476 477 487 public ArchiveRecord next() { 488 long offset = -1; 489 try { 490 offset = ((RepositionableStream)getInputStream()).position(); 491 return exceptionNext(); 492 } catch (IOException e) { 493 if (!isStrict()) { 494 try { 496 if (hasNext()) { 497 getLogger().warning("Retrying (Current offset " + 498 offset + "): " + e.getMessage()); 499 return exceptionNext(); 500 } 501 throw new RuntimeException ("Retried but " + 504 "no next record (Offset " + offset + ")", 505 e); 506 } catch (IOException e1) { 507 throw new RuntimeException ("After retry (Offset " + 508 offset + ")", e1); 509 } 510 } 511 throw new RuntimeException ("(Offset " + offset + ")", e); 512 } 513 } 514 515 524 protected ArchiveRecord exceptionNext() 525 throws IOException , RuntimeException { 526 ArchiveRecord result = null; 527 IOException ioe = null; 528 for (int i = MAX_ALLOWED_RECOVERABLES; i > 0 && 529 result == null; i--) { 530 ioe = null; 531 try { 532 result = innerNext(); 533 } catch (RecoverableIOException e) { 534 ioe = e; 535 getLogger().warning(e.getMessage()); 536 if (hasNext()) { 537 continue; 538 } 539 break; 544 } 545 } 546 if (ioe != null) { 547 throw new RuntimeException ("Retried " + 551 MAX_ALLOWED_RECOVERABLES + " times in a row", ioe); 552 } 553 return result; 554 } 555 556 protected ArchiveRecord innerNext() throws IOException { 557 return get(((RepositionableStream)getInputStream()).position()); 558 } 559 560 public void remove() { 561 throw new UnsupportedOperationException (); 562 } 563 } 564 565 protected static String stripExtension(final String name, 566 final String ext) { 567 return (!name.endsWith(ext))? name: 568 name.substring(0, name.length() - ext.length()); 569 } 570 571 574 public String getFileName() { 575 return (new File (getReaderIdentifier())).getName(); 576 } 577 578 581 public String getStrippedFileName() { 582 return getStrippedFileName(getFileName(), 583 getDotFileExtension()); 584 } 585 586 591 public static String getStrippedFileName(String name, 592 final String dotFileExtension) { 593 name = stripExtension(name, 594 ArchiveFileConstants.DOT_COMPRESSED_FILE_EXTENSION); 595 return stripExtension(name, dotFileExtension); 596 } 597 598 602 protected static boolean getTrueOrFalse(final String value) { 603 if (value == null || value.length() <= 0) { 604 return false; 605 } 606 return Boolean.TRUE.toString().equals(value.toLowerCase()); 607 } 608 609 615 protected boolean output(final String format) 616 throws IOException , java.text.ParseException { 617 boolean result = true; 618 620 if (format.equals(DUMP)) { 625 setDigest(false); 627 dump(false); 628 } else if (format.equals(GZIP_DUMP)) { 629 setDigest(false); 631 dump(true); 632 } else if (format.equals(CDX)) { 633 cdxOutput(false); 634 } else if (format.equals(CDX_FILE)) { 635 cdxOutput(true); 636 } else { 637 result = false; 638 } 639 return result; 640 } 641 642 protected void cdxOutput(boolean toFile) 643 throws IOException { 644 BufferedWriter cdxWriter = null; 645 if (toFile) { 646 String cdxFilename = stripExtension(getReaderIdentifier(), 647 DOT_COMPRESSED_FILE_EXTENSION); 648 cdxFilename = stripExtension(cdxFilename, getDotFileExtension()); 649 cdxFilename += ('.' + CDX); 650 cdxWriter = new BufferedWriter (new FileWriter (cdxFilename)); 651 } 652 653 String header = "CDX b e a m s c " + ((isCompressed()) ? "V" : "v") 654 + " n g"; 655 if (toFile) { 656 cdxWriter.write(header); 657 cdxWriter.newLine(); 658 } else { 659 System.out.println(header); 660 } 661 662 String strippedFileName = getStrippedFileName(); 663 try { 664 for (Iterator <ArchiveRecord> ii = iterator(); ii.hasNext();) { 665 ArchiveRecord r = ii.next(); 666 if (toFile) { 667 cdxWriter.write(r.outputCdx(strippedFileName)); 668 cdxWriter.newLine(); 669 } else { 670 System.out.println(r.outputCdx(strippedFileName)); 671 } 672 } 673 } finally { 674 if (toFile) { 675 cdxWriter.close(); 676 } 677 } 678 } 679 680 686 protected boolean outputRecord(final String format) 687 throws IOException { 688 boolean result = true; 689 if (format.equals(CDX)) { 690 System.out.println(get().outputCdx(getStrippedFileName())); 691 } else if(format.equals(ArchiveFileConstants.DUMP)) { 692 setDigest(false); 694 get().dump(); 695 } else { 696 result = false; 697 } 698 return result; 699 } 700 701 707 public abstract void dump(final boolean compress) 708 throws IOException , java.text.ParseException ; 709 710 714 public abstract ArchiveReader getDeleteFileOnCloseReader(final File f); 715 } 716 | Popular Tags |