1 26 package org.archive.io.warc; 27 28 import java.util.Arrays ; 29 import java.util.List ; 30 31 import org.archive.io.ArchiveFileConstants; 32 33 38 public interface WARCConstants extends ArchiveFileConstants { 39 43 public static final int DEFAULT_MAX_WARC_FILE_SIZE = 1024 * 1024 * 1024; 44 45 49 public static final String WARC_MAGIC = "WARC/"; 50 51 56 public static final String WARC_VERSION = "0.10"; 57 58 67 public static final int MAX_WARC_HEADER_LINE_LENGTH = 1024 * 100; 68 public static final int MAX_LINE_LENGTH = MAX_WARC_HEADER_LINE_LENGTH; 69 70 73 public static final String WARC_FILE_EXTENSION = "warc"; 74 75 78 public static final String DOT_WARC_FILE_EXTENSION = 79 "." + WARC_FILE_EXTENSION; 80 81 public static final String DOT_COMPRESSED_FILE_EXTENSION = 82 ArchiveFileConstants.DOT_COMPRESSED_FILE_EXTENSION; 83 84 87 public static final String COMPRESSED_WARC_FILE_EXTENSION = 88 WARC_FILE_EXTENSION + DOT_COMPRESSED_FILE_EXTENSION; 89 90 93 public static final String DOT_COMPRESSED_WARC_FILE_EXTENSION = 94 DOT_WARC_FILE_EXTENSION + DOT_COMPRESSED_FILE_EXTENSION; 95 96 109 public static final String DEFAULT_ENCODING = "ISO-8859-1"; 110 public static final String HEADER_LINE_ENCODING = DEFAULT_ENCODING; 111 112 public static final String [] HEADER_FIELD_KEYS = { 113 VERSION_FIELD_KEY, 114 LENGTH_FIELD_KEY, 115 TYPE_FIELD_KEY, 116 URL_FIELD_KEY, 117 DATE_FIELD_KEY, 118 RECORD_IDENTIFIER_FIELD_KEY, 119 MIMETYPE_FIELD_KEY 120 }; 121 122 125 public static final String WARCINFO = "warcinfo"; 126 public static final String RESPONSE = "response"; 127 public static final String RESOURCE = "resource"; 128 public static final String REQUEST = "request"; 129 public static final String METADATA = "metadata"; 130 public static final String REVISIT = "revist"; 131 public static final String CONVERSION = "conversion"; 132 public static final String CONTINUATION = "continuation"; 133 134 public static final String TYPE = "type"; 135 136 public static final String [] TYPES = {WARCINFO, RESPONSE, RESOURCE, 138 REQUEST, METADATA, REVISIT, CONVERSION, CONTINUATION}; 139 140 public static final int WARCINFO_INDEX = 0; 142 public static final int RESPONSE_INDEX = 1; 143 public static final int RESOURCE_INDEX = 2; 144 public static final int REQUEST_INDEX = 3; 145 public static final int METADATA_INDEX = 4; 146 public static final int REVISIT_INDEX = 5; 147 public static final int CONVERSION_INDEX = 6; 148 public static final int CONTINUATION_INDEX = 7; 149 150 public static final List TYPES_LIST = Arrays.asList(TYPES); 152 153 156 public static final String WARC_ID = WARC_MAGIC + WARC_VERSION; 157 158 161 public static final char HEADER_FIELD_SEPARATOR = ' '; 162 163 168 public static final Character [] WSP = {HEADER_FIELD_SEPARATOR, '\t'}; 169 170 175 public static final String PLACEHOLDER_RECORD_LENGTH_STRING = 176 "000000000000"; 177 178 public static final String NAMED_FIELD_IP_LABEL = "IP-Address"; 179 public static final String NAMED_FIELD_CHECKSUM_LABEL = "Checksum"; 180 public static final String NAMED_FIELD_RELATED_LABEL = "Related-Record-ID"; 181 public static final String NAMED_FIELD_WARCFILENAME = "Filename"; 182 public static final String NAMED_FIELD_DESCRIPTION = "Description"; 183 public static final String NAMED_FIELD_FILEDESC = "ARC-FileDesc"; 184 public static final String NAMED_FIELD_TRUNCATED = "Truncated"; 185 public static final String NAMED_FIELD_TRUNCATED_VALUE_TIME = "time"; 186 public static final String NAMED_FIELD_TRUNCATED_VALUE_LEN = "length"; 187 public static final String NAMED_FIELD_TRUNCATED_VALUE_HEAD = 188 "long-headers"; 189 public static final String NAMED_FIELD_TRUNCATED_VALUE_UNSPECIFIED = null; 190 191 199 public static final String HTTP_REQUEST_MIMETYPE = 200 "application/http;msgtype=request"; 201 public static final String HTTP_RESPONSE_MIMETYPE = 202 "application/http;msgtype=response"; 203 } 204 | Popular Tags |