1 25 package org.archive.crawler.io; 26 27 import it.unimi.dsi.mg4j.util.MutableString; 28 29 import java.util.logging.Formatter ; 30 import java.util.logging.LogRecord ; 31 32 import org.archive.crawler.datamodel.CoreAttributeConstants; 33 import org.archive.crawler.datamodel.CrawlURI; 34 import org.archive.util.ArchiveUtils; 35 import org.archive.util.Base32; 36 import org.archive.util.MimetypeUtils; 37 38 43 public class UriProcessingFormatter 44 extends Formatter implements CoreAttributeConstants { 45 private final static String NA = "-"; 46 52 private final static int GUESS_AT_LOG_LENGTH = 53 17 + 1 + 3 + 1 + 10 + 128 + + 1 + 10 + 1 + 128 + 1 + 10 + 1 + 3 + 54 14 + 1 + 32 + 4 + 128 + 1; 55 56 59 private final MutableString buffer = 60 new MutableString(GUESS_AT_LOG_LENGTH); 61 62 public String format(LogRecord lr) { 63 CrawlURI curi = (CrawlURI)lr.getParameters()[0]; 64 String length = NA; 65 String mime = null; 66 if (curi.isHttpTransaction()) { 67 if(curi.getContentLength() >= 0) { 68 length = Long.toString(curi.getContentLength()); 69 } else if (curi.getContentSize() > 0) { 70 length = Long.toString(curi.getContentSize()); 71 } 72 mime = curi.getContentType(); 73 } else { 74 if (curi.getContentSize() > 0) { 75 length = Long.toString(curi.getContentSize()); 76 } 77 mime = curi.getContentType(); 78 } 79 mime = MimetypeUtils.truncate(mime); 80 81 long time = System.currentTimeMillis(); 82 String arcTimeAndDuration; 83 if(curi.containsKey(A_FETCH_COMPLETED_TIME)) { 84 long completedTime = curi.getLong(A_FETCH_COMPLETED_TIME); 85 long beganTime = curi.getLong(A_FETCH_BEGAN_TIME); 86 arcTimeAndDuration = ArchiveUtils.get17DigitDate(beganTime) + "+" 87 + Long.toString(completedTime - beganTime); 88 } else { 89 arcTimeAndDuration = NA; 90 } 91 92 String via = curi.flattenVia(); 93 94 Object digest = curi.getContentDigest(); 95 if (digest != null) { 96 digest = Base32.encode((byte [])digest); 97 } 98 99 String sourceTag = curi.containsKey(A_SOURCE_TAG) 100 ? curi.getString(A_SOURCE_TAG) 101 : null; 102 103 this.buffer.length(0); 104 return this.buffer.append(ArchiveUtils.getLog17Date(time)) 105 .append(" ") 106 .append(ArchiveUtils.padTo(curi.getFetchStatus(), 5)) 107 .append(" ") 108 .append(ArchiveUtils.padTo(length, 10)) 109 .append(" ") 110 .append(curi.getUURI().toString()) 111 .append(" ") 112 .append(checkForNull(curi.getPathFromSeed())) 113 .append(" ") 114 .append(checkForNull(via)) 115 .append(" ") 116 .append(mime) 117 .append(" ") 118 .append("#") 119 .append(ArchiveUtils.padTo( 121 Integer.toString(curi.getThreadNumber()), 3, '0')) 122 .append(" ") 123 .append(arcTimeAndDuration) 124 .append(" ") 125 .append(checkForNull((String )digest)) 126 .append(" ") 127 .append(checkForNull(sourceTag)) 128 .append(" ") 129 .append(checkForNull(curi.getAnnotations())) 130 .append("\n").toString(); 131 } 132 133 137 protected String checkForNull(String str) { 138 return (str == null || str.length() <= 0)? NA: str; 139 } 140 } 141 142 143 | Popular Tags |