1 25 package org.archive.io.arc; 26 27 import java.io.ByteArrayOutputStream ; 28 import java.io.File ; 29 import java.io.FileNotFoundException ; 30 import java.io.IOException ; 31 import java.io.OutputStream ; 32 import java.io.PrintStream ; 33 import java.util.Arrays ; 34 import java.util.Date ; 35 import java.util.Iterator ; 36 import java.util.List ; 37 import java.util.concurrent.atomic.AtomicInteger ; 38 39 import org.archive.io.ArchiveRecord; 40 import org.archive.io.ReplayInputStream; 41 import org.archive.io.WriterPoolMember; 42 import org.archive.util.ArchiveUtils; 43 import org.archive.util.FileUtils; 44 import org.archive.util.TmpDirTestCase; 45 46 47 55 public class ARCWriterTest 56 extends TmpDirTestCase implements ARCConstants { 57 60 private static final String PREFIX = 61 "IAH"; 62 63 private static final String SOME_URL = "http://www.archive.org/test/"; 64 65 66 private static final AtomicInteger SERIAL_NO = new AtomicInteger (); 67 68 71 protected void setUp() throws Exception { 72 super.setUp(); 73 } 74 75 78 protected void tearDown() throws Exception { 79 super.tearDown(); 80 } 81 82 protected static String getContent() { 83 return getContent(null); 84 } 85 86 protected static String getContent(String indexStr) { 87 String page = (indexStr != null)? "Page #" + indexStr: "Some Page"; 88 return "HTTP/1.1 200 OK\r\n" + 89 "Content-Type: text/html\r\n\r\n" + 90 "<html><head><title>" + page + 91 "</title></head>" + 92 "<body>" + page + 93 "</body></html>"; 94 } 95 96 protected int writeRandomHTTPRecord(ARCWriter arcWriter, int index) 97 throws IOException { 98 String indexStr = Integer.toString(index); 99 ByteArrayOutputStream baos = new ByteArrayOutputStream (); 100 String now = ArchiveUtils.get14DigitDate(); 102 int recordLength = 0; 103 byte[] record = (getContent(indexStr)).getBytes(); 104 recordLength += record.length; 105 baos.write(record); 106 baos.write("\n".getBytes()); 108 recordLength += 1; 109 arcWriter.write("http://www.one.net/id=" + indexStr, "text/html", 110 "0.1.2.3", Long.parseLong(now), recordLength, baos); 111 return recordLength; 112 } 113 114 private File writeRecords(String baseName, boolean compress, 115 int maxSize, int recordCount) 116 throws IOException { 117 cleanUpOldFiles(baseName); 118 File [] files = {getTmpDir()}; 119 ARCWriter arcWriter = new ARCWriter(SERIAL_NO, Arrays.asList(files), 120 baseName + '-' + PREFIX, compress, maxSize); 121 assertNotNull(arcWriter); 122 for (int i = 0; i < recordCount; i++) { 123 writeRandomHTTPRecord(arcWriter, i); 124 } 125 arcWriter.close(); 126 assertTrue("Doesn't exist: " + 127 arcWriter.getFile().getAbsolutePath(), 128 arcWriter.getFile().exists()); 129 return arcWriter.getFile(); 130 } 131 132 private void validate(File arcFile, int recordCount) 133 throws FileNotFoundException , IOException { 134 ARCReader reader = ARCReaderFactory.get(arcFile); 135 assertNotNull(reader); 136 List metaDatas = null; 137 if (recordCount == -1) { 138 metaDatas = reader.validate(); 139 } else { 140 metaDatas = reader.validate(recordCount); 141 } 142 reader.close(); 143 reader = ARCReaderFactory.get(arcFile); 147 for (int i = metaDatas.size() - 1; i >= 0; i--) { 148 ARCRecordMetaData meta = (ARCRecordMetaData)metaDatas.get(i); 149 ArchiveRecord r = reader.get(meta.getOffset()); 150 String mimeType = r.getHeader().getMimetype(); 151 assertTrue("Record is bogus", 152 mimeType != null && mimeType.length() > 0); 153 } 154 reader.close(); 155 assertTrue("Metadatas not equal", metaDatas.size() == recordCount); 156 for (Iterator i = metaDatas.iterator(); i.hasNext();) { 157 ARCRecordMetaData r = (ARCRecordMetaData)i.next(); 158 assertTrue("Record is empty", r.getLength() > 0); 159 } 160 } 161 162 public void testCheckARCFileSize() 163 throws IOException { 164 runCheckARCFileSizeTest("checkARCFileSize", false); 165 } 166 167 public void testCheckARCFileSizeCompressed() 168 throws IOException { 169 runCheckARCFileSizeTest("checkARCFileSize", true); 170 } 171 172 public void testWriteRecord() throws IOException { 173 final int recordCount = 2; 174 File arcFile = writeRecords("writeRecord", false, 175 DEFAULT_MAX_ARC_FILE_SIZE, recordCount); 176 validate(arcFile, recordCount + 1); } 178 179 public void testRandomAccess() throws IOException { 180 final int recordCount = 3; 181 File arcFile = writeRecords("writeRecord", true, 182 DEFAULT_MAX_ARC_FILE_SIZE, recordCount); 183 ARCReader reader = ARCReaderFactory.get(arcFile); 184 boolean readFirst = false; 186 String url = null; 187 long offset = -1; 188 long totalRecords = 0; 189 boolean readSecond = false; 190 for (final Iterator i = reader.iterator(); i.hasNext(); totalRecords++) { 191 ARCRecord ar = (ARCRecord)i.next(); 192 if (!readFirst) { 193 readFirst = true; 194 continue; 195 } 196 if (!readSecond) { 197 url = ar.getMetaData().getUrl(); 198 offset = ar.getMetaData().getOffset(); 199 readSecond = true; 200 } 201 } 202 203 reader = ARCReaderFactory.get(arcFile, offset); 204 ArchiveRecord ar = reader.get(); 205 assertEquals(ar.getHeader().getUrl(), url); 206 ar.close(); 207 208 reader = ARCReaderFactory.get(arcFile, offset); 210 int count = 0; 211 for (final Iterator i = reader.iterator(); i.hasNext(); i.next()) { 212 count++; 213 } 214 reader.close(); 215 assertEquals(totalRecords - 1, count); 216 } 217 218 public void testWriteRecordCompressed() throws IOException { 219 final int recordCount = 2; 220 File arcFile = writeRecords("writeRecordCompressed", true, 221 DEFAULT_MAX_ARC_FILE_SIZE, recordCount); 222 validate(arcFile, recordCount + 1 ); 223 } 224 225 private void runCheckARCFileSizeTest(String baseName, boolean compress) 226 throws FileNotFoundException , IOException { 227 writeRecords(baseName, compress, 1024, 15); 228 File [] files = FileUtils.getFilesWithPrefix(getTmpDir(), PREFIX); 230 for (int i = 0; i < files.length; i++) { 231 validate(files[i], -1); 232 } 233 } 234 235 protected ARCWriter createARCWriter(String NAME, boolean compress) { 236 File [] files = {getTmpDir()}; 237 return new ARCWriter(SERIAL_NO, Arrays.asList(files), NAME, 238 compress, DEFAULT_MAX_ARC_FILE_SIZE); 239 } 240 241 protected static ByteArrayOutputStream getBaos(String str) 242 throws IOException { 243 ByteArrayOutputStream baos = new ByteArrayOutputStream (); 244 baos.write(str.getBytes()); 245 return baos; 246 } 247 248 protected static void writeRecord(ARCWriter writer, String url, 249 String type, int len, ByteArrayOutputStream baos) 250 throws IOException { 251 writer.write(url, type, "192.168.1.1", (new Date ()).getTime(), len, 252 baos); 253 } 254 255 protected int iterateRecords(ARCReader r) 256 throws IOException { 257 int count = 0; 258 for (Iterator i = r.iterator(); i.hasNext();) { 259 ARCRecord rec = (ARCRecord)i.next(); 260 rec.close(); 261 if (count != 0) { 262 assertTrue("Unexpected URL " + rec.getMetaData().getUrl(), 263 rec.getMetaData().getUrl().equals(SOME_URL)); 264 } 265 count++; 266 } 267 return count; 268 } 269 270 protected ARCWriter createArcWithOneRecord(String name, 271 boolean compressed) 272 throws IOException { 273 ARCWriter writer = createARCWriter(name, compressed); 274 String content = getContent(); 275 writeRecord(writer, SOME_URL, "text/html", 276 content.length(), getBaos(content)); 277 return writer; 278 } 279 280 public void testSpaceInURL() { 281 String eMessage = null; 282 try { 283 holeyUrl("testSpaceInURL-" + PREFIX, false, " "); 284 } catch (IOException e) { 285 eMessage = e.getMessage(); 286 } 287 assertTrue("Didn't get expected exception: " + eMessage, 288 eMessage.startsWith("Metadata line doesn't match")); 289 } 290 291 public void testTabInURL() { 292 String eMessage = null; 293 try { 294 holeyUrl("testTabInURL-" + PREFIX, false, "\t"); 295 } catch (IOException e) { 296 eMessage = e.getMessage(); 297 } 298 assertTrue("Didn't get expected exception: " + eMessage, 299 eMessage.startsWith("Metadata line doesn't match")); 300 } 301 302 protected void holeyUrl(String name, boolean compress, String urlInsert) 303 throws IOException { 304 ARCWriter writer = createArcWithOneRecord(name, compress); 305 String content = getContent(); 307 ByteArrayOutputStream baos = getBaos(content); 308 writeRecord(writer, SOME_URL + urlInsert + "/index.html", "text/html", 309 content.length(), baos); 310 writer.close(); 311 } 312 313 319 public void testLengthTooShortCompressed() throws IOException { 320 lengthTooShort("testLengthTooShortCompressed-" + PREFIX, true, false); 321 } 322 323 public void testLengthTooShortCompressedStrict() 324 throws IOException { 325 String eMessage = null; 326 try { 327 lengthTooShort("testLengthTooShortCompressedStrict-" + PREFIX, 328 true, true); 329 } catch (RuntimeException e) { 330 eMessage = e.getMessage(); 331 } 332 assertTrue("Didn't get expected exception: " + eMessage, 333 eMessage.startsWith("java.io.IOException: Record ENDING at")); 334 } 335 336 protected void lengthTooShort(String name, boolean compress, boolean strict) 337 throws IOException { 338 ARCWriter writer = createArcWithOneRecord(name, compress); 339 String content = getContent(); 341 ByteArrayOutputStream baos = getBaos(content); 342 baos.write("SOME TRAILING BYTES".getBytes()); 343 writeRecord(writer, SOME_URL, "text/html", 344 content.length(), baos); 345 writeRecord(writer, SOME_URL, "text/html", 346 content.length(), getBaos(content)); 347 writer.close(); 348 349 ByteArrayOutputStream os = new ByteArrayOutputStream (); 351 System.setErr(new PrintStream (os)); 352 353 ARCReader r = ARCReaderFactory.get(writer.getFile()); 354 r.setStrict(strict); 355 int count = iterateRecords(r); 356 assertTrue("Count wrong " + count, count == 4); 357 358 String err = os.toString(); 361 assertTrue("No message " + err, err.startsWith("WARNING") && 362 (err.indexOf("Record ENDING at") > 0)); 363 } 364 365 373 public void testLengthTooLongCompressed() 374 throws IOException { 375 lengthTooLong("testLengthTooLongCompressed-" + PREFIX, 376 true, false); 377 } 378 379 public void testLengthTooLongCompressedStrict() { 380 String eMessage = null; 381 try { 382 lengthTooLong("testLengthTooLongCompressed-" + PREFIX, 383 true, true); 384 } catch (IOException e) { 385 eMessage = e.getMessage(); 386 } 387 assertTrue("Didn't get expected exception: " + eMessage, 388 eMessage.startsWith("Premature EOF before end-of-record")); 389 } 390 391 protected void lengthTooLong(String name, boolean compress, 392 boolean strict) 393 throws IOException { 394 ARCWriter writer = createArcWithOneRecord(name, compress); 395 String content = getContent(); 397 writeRecord(writer, SOME_URL, "text/html", 398 content.length() + 10, getBaos(content)); 399 writeRecord(writer, SOME_URL, "text/html", 400 content.length(), getBaos(content)); 401 writer.close(); 402 403 ByteArrayOutputStream os = new ByteArrayOutputStream (); 405 System.setErr(new PrintStream (os)); 406 407 ARCReader r = ARCReaderFactory.get(writer.getFile()); 408 r.setStrict(strict); 409 int count = iterateRecords(r); 410 assertTrue("Count wrong " + count, count == 4); 411 412 String err = os.toString(); 415 assertTrue("No message " + err, 416 err.startsWith("WARNING Premature EOF before end-of-record")); 417 } 418 419 public void testGapError() throws IOException { 420 ARCWriter writer = createArcWithOneRecord("testGapError", true); 421 String content = getContent(); 422 ReplayInputStream ris = new ReplayInputStream(content.getBytes(), 425 content.length(), null) { 426 private boolean readFullyToCalled = false; 427 public void readFullyTo(OutputStream os) 428 throws IOException { 429 super.readFullyTo(os); 430 this.readFullyToCalled = true; 431 } 432 433 public long remaining() { 434 return (this.readFullyToCalled)? -1: super.remaining(); 435 } 436 }; 437 String message = null; 438 try { 439 writer.write(SOME_URL, "text/html", "192.168.1.1", 440 (new Date ()).getTime(), content.length(), ris); 441 } catch (IOException e) { 442 message = e.getMessage(); 443 } 444 writer.close(); 445 assertTrue("No gap when should be", 446 message != null && 447 message.indexOf("Gap between expected and actual") >= 0); 448 } 449 450 457 public static File createARCFile(File arcdir, boolean compress) 458 throws IOException { 459 File [] files = {arcdir}; 460 ARCWriter writer = new ARCWriter(SERIAL_NO, Arrays.asList(files), 461 "test", compress, DEFAULT_MAX_ARC_FILE_SIZE); 462 String content = getContent(); 463 writeRecord(writer, SOME_URL, "text/html", content.length(), 464 getBaos(content)); 465 writer.close(); 466 return writer.getFile(); 467 } 468 469 483 484 public void testValidateMetaLine() throws Exception { 485 final String line = "http://www.aandw.net/images/walden2.png " + 486 "128.197.34.86 20060111174224 image/png 2160"; 487 ARCWriter w = createARCWriter("testValidateMetaLine", true); 488 try { 489 w.validateMetaLine(line); 490 w.validateMetaLine(line + LINE_SEPARATOR); 491 w.validateMetaLine(line + "\\r\\n"); 492 } finally { 493 w.close(); 494 } 495 } 496 497 public void testArcRecordOffsetReads() throws Exception { 498 WriterPoolMember w = 500 createArcWithOneRecord("testArcRecordInBufferStream", true); 501 w.close(); 502 ARCReader r = ARCReaderFactory.get(w.getFile()); 504 final Iterator i = r.iterator(); 505 ARCRecord ar = (ARCRecord) i.next(); 507 i.hasNext(); 508 ar = (ARCRecord) i.next(); 510 final byte[] buffer = new byte[17]; 514 final int maxRead = 4; 515 int totalRead = 0; 516 while (totalRead < maxRead) { 517 totalRead = totalRead 518 + ar.read(buffer, 13 + totalRead, maxRead - totalRead); 519 assertTrue(totalRead > 0); 520 } 521 } 522 } 523 | Popular Tags |