KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > io > arc > ARCConstants


1 /*
2  * ARCConstants
3  *
4  * $Id: ARCConstants.java,v 1.18 2006/08/25 17:34:38 stack-sf Exp $
5  *
6  * Created on Dec 30, 2003.
7  *
8  * Copyright (C) 2003 Internet Archive.
9  *
10  * This file is part of the Heritrix web crawler (crawler.archive.org).
11  *
12  * Heritrix is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Lesser Public License as published by
14  * the Free Software Foundation; either version 2.1 of the License, or
15  * any later version.
16  *
17  * Heritrix is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Lesser Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser Public License
23  * along with Heritrix; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  */

26 package org.archive.io.arc;
27
28 import java.util.Arrays JavaDoc;
29 import java.util.List JavaDoc;
30 import java.util.zip.Deflater JavaDoc;
31 import java.util.zip.GZIPInputStream JavaDoc;
32
33 import org.archive.io.ArchiveFileConstants;
34 import org.archive.io.GzipHeader;
35
36 /**
37  * Constants used by ARC files and in ARC file processing.
38  *
39  * @author stack
40  */

41 public interface ARCConstants extends ArchiveFileConstants {
42     /**
43      * Default maximum ARC file size.
44      */

45     public static final int DEFAULT_MAX_ARC_FILE_SIZE = 100000000;
46     
47     /**
48      * Maximum length for a metadata line.
49      */

50     public static final int MAX_METADATA_LINE_LENGTH = (4 * 1024);
51
52     /**
53      * ARC file extention.
54      */

55     public static final String JavaDoc ARC_FILE_EXTENSION = "arc";
56     
57     /**
58      * Dot ARC file extension.
59      */

60     public static final String JavaDoc DOT_ARC_FILE_EXTENSION =
61         "." + ARC_FILE_EXTENSION;
62     
63     public static final String JavaDoc DOT_COMPRESSED_FILE_EXTENSION =
64         ArchiveFileConstants.DOT_COMPRESSED_FILE_EXTENSION;
65
66     /**
67      * Compressed arc file extension.
68      */

69     public static final String JavaDoc COMPRESSED_ARC_FILE_EXTENSION =
70         ARC_FILE_EXTENSION + DOT_COMPRESSED_FILE_EXTENSION;
71     
72     /**
73      * Compressed dot arc file extension.
74      */

75     public static final String JavaDoc DOT_COMPRESSED_ARC_FILE_EXTENSION =
76         DOT_ARC_FILE_EXTENSION + DOT_COMPRESSED_FILE_EXTENSION;
77
78     /**
79      * Encoding to use getting bytes from strings.
80      *
81      * Specify an encoding rather than leave it to chance: i.e whatever the
82      * JVMs encoding. Use an encoding that gets the stream as bytes, not chars.
83      */

84     public static final String JavaDoc DEFAULT_ENCODING = "ISO-8859-1";
85
86     /**
87      * ARC file line seperator character.
88      *
89      * This is what the alexa c-code looks for delimiting lines.
90      */

91     public static final char LINE_SEPARATOR = '\n';
92
93     /**
94      * ARC header field seperator character.
95      */

96     public static final char HEADER_FIELD_SEPARATOR = ' ';
97
98     /**
99      * ARC file *MAGIC NUMBER*.
100      *
101      * Every ARC file must begin w/ this.
102      */

103     public static final String JavaDoc ARC_MAGIC_NUMBER = "filedesc://";
104
105     /**
106      * The FLG.FEXTRA field that is added to ARC files. (See RFC1952 to
107      * understand FLG.FEXTRA).
108      */

109     public static final byte[] ARC_GZIP_EXTRA_FIELD = { 8, 0, 'L', 'X', 4, 0,
110             0, 0, 0, 0 };
111
112     /**
113      * Key for the ARC Header IP field.
114      *
115      * Lowercased.
116      */

117     public static final String JavaDoc IP_HEADER_FIELD_KEY = "ip-address";
118
119     /**
120      * Key for the ARC Header Result Code field.
121      *
122      * Lowercased.
123      */

124     public static final String JavaDoc CODE_HEADER_FIELD_KEY = "result-code";
125
126     /**
127      * Key for the ARC Header Checksum field.
128      *
129      * Lowercased.
130      */

131     public static final String JavaDoc CHECKSUM_HEADER_FIELD_KEY = "checksum";
132
133     /**
134      * Key for the ARC Header Location field.
135      *
136      * Lowercased.
137      */

138     public static final String JavaDoc LOCATION_HEADER_FIELD_KEY = "location";
139
140     /**
141      * Key for the ARC Header Offset field.
142      *
143      * Lowercased.
144      */

145     public static final String JavaDoc OFFSET_HEADER_FIELD_KEY = "offset";
146
147     /**
148      * Key for the ARC Header filename field.
149      *
150      * Lowercased.
151      */

152     public static final String JavaDoc FILENAME_HEADER_FIELD_KEY = "filename";
153     
154     /**
155      * Key for statuscode field.
156      */

157     public static final String JavaDoc STATUSCODE_FIELD_KEY = "statuscode";
158     
159     /**
160      * Key for offset field.
161      */

162     public static final String JavaDoc OFFSET_FIELD_KEY = OFFSET_HEADER_FIELD_KEY;
163     
164     /**
165      * Key for filename field.
166      */

167     public static final String JavaDoc FILENAME_FIELD_KEY = FILENAME_HEADER_FIELD_KEY;
168     
169     /**
170      * Key for checksum field.
171      */

172     public static final String JavaDoc CHECKSUM_FIELD_KEY = CHECKSUM_HEADER_FIELD_KEY;
173     
174     /**
175      * Tokenized field prefix.
176      *
177      * Use this prefix for tokenized fields when naming fields in
178      * an index.
179      */

180     public static final String JavaDoc TOKENIZED_PREFIX = "tokenized_";
181     
182
183     /**
184      * Version 1 required metadata fields.
185      */

186     public static List JavaDoc REQUIRED_VERSION_1_HEADER_FIELDS = Arrays
187             .asList(new String JavaDoc[] { URL_FIELD_KEY, IP_HEADER_FIELD_KEY,
188                     DATE_FIELD_KEY, MIMETYPE_FIELD_KEY,
189                     LENGTH_FIELD_KEY, VERSION_FIELD_KEY,
190                     ABSOLUTE_OFFSET_KEY });
191
192     /**
193      * Minimum possible record length.
194      *
195      * This is a rough calc. When the header is data it will occupy less space.
196      */

197     public static int MINIMUM_RECORD_LENGTH = 1 + "://".length() + 1
198             + ARC_FILE_EXTENSION.length() + " ".length() + +1 + " ".length()
199             + 1 + " ".length() + 1 + "/".length() + 1 + " ".length() + 1;
200
201     /**
202      * Start of a GZIP header that uses default deflater.
203      */

204     public static final byte[] GZIP_HEADER_BEGIN = {
205             (byte) GZIPInputStream.GZIP_MAGIC, // Magic number (short)
206
(byte) (GZIPInputStream.GZIP_MAGIC >> 8), // Magic number (short)
207
Deflater.DEFLATED // Compression method (CM)
208
};
209
210     /**
211      * Length of minimual 'default GZIP header.
212      *
213      * See RFC1952 for explaination of value of 10.
214      */

215     public static final int DEFAULT_GZIP_HEADER_LENGTH =
216         GzipHeader.MINIMAL_GZIP_HEADER_LENGTH;
217 }
218
Popular Tags