KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > io > arc > ARCRecordMetaData


1 /* ARCRecordMetaData
2  *
3  * $Id: ARCRecordMetaData.java,v 1.17 2006/08/28 23:44:17 stack-sf Exp $
4  *
5  * Created on Jan 7, 2004
6  *
7  * Copyright (C) 2004 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.io.arc;
26
27 import java.io.File JavaDoc;
28 import java.io.IOException JavaDoc;
29 import java.util.Iterator JavaDoc;
30 import java.util.Map JavaDoc;
31 import java.util.Set JavaDoc;
32
33 import org.archive.io.ArchiveRecordHeader;
34
35
36 /**
37  * An immutable class to hold an ARC record meta data.
38  *
39  * @author stack
40  */

41 public class ARCRecordMetaData implements ArchiveRecordHeader, ARCConstants {
42     /**
43      * Map of record header fields.
44      *
45      * We store all in a hashmap. This way we can hold version 1 or
46      * version 2 record meta data.
47      *
48      * <p>Keys are lowercase.
49      */

50     protected Map JavaDoc headerFields = null;
51     
52     /**
53      * Digest for the record.
54      *
55      * Only available after the record has been read in totality.
56      */

57     private String JavaDoc digest = null;
58     
59     /**
60      * Status for this request.
61      *
62      * There may be no status.
63      */

64     private String JavaDoc statusCode = null;
65     
66     /**
67      * The arc this metadata came out.
68      * Descriptive String, either path or URL.
69      */

70     private String JavaDoc arc = null;
71     
72     private int contentBegin = 0;
73     
74     /**
75      * Shut down the default constructor.
76      */

77     protected ARCRecordMetaData() {
78         super();
79     }
80
81     /**
82      * Constructor.
83      *
84      * @param arc The arc file this metadata came out of.
85      * @param headerFields Hash of meta fields.
86      *
87      * @throws IOException
88      */

89     public ARCRecordMetaData(final String JavaDoc arc, Map JavaDoc headerFields)
90         throws IOException JavaDoc {
91         // Make sure the minimum required fields are present,
92
for (Iterator JavaDoc i = REQUIRED_VERSION_1_HEADER_FIELDS.iterator();
93             i.hasNext(); ) {
94             testRequiredField(headerFields, (String JavaDoc)i.next());
95         }
96         this.headerFields = headerFields;
97         this.arc = arc;
98     }
99
100     /**
101      * Test required field is present in hash.
102      *
103      * @param fields Map of fields.
104      * @param requiredField Field to test for.
105      *
106      * @exception IOException If required field is not present.
107      */

108     protected void testRequiredField(Map JavaDoc fields, String JavaDoc requiredField)
109         throws IOException JavaDoc {
110         if (!fields.containsKey(requiredField)) {
111             throw new IOException JavaDoc("Required field " + requiredField +
112             " not in meta data.");
113         }
114     }
115
116     /**
117      * Get the time when the record was harvested.
118      * <p>
119      * Returns the date in Heritrix 14 digit time format (UTC). See the
120      * {@link org.archive.util.ArchiveUtils} class for converting to Java
121      * dates.
122      *
123      * @return Header date in Heritrix 14 digit format.
124      * @see org.archive.util.ArchiveUtils#parse14DigitDate(String)
125      */

126     public String JavaDoc getDate() {
127         return (String JavaDoc) this.headerFields.get(DATE_FIELD_KEY);
128     }
129
130     /**
131      * @return Return length of the record.
132      */

133     public long getLength() {
134         return Long.parseLong((String JavaDoc)this.headerFields.
135             get(LENGTH_FIELD_KEY));
136     }
137
138     /**
139      * @return Header url.
140      */

141     public String JavaDoc getUrl() {
142         return (String JavaDoc)this.headerFields.get(URL_FIELD_KEY);
143     }
144
145     /**
146      * @return IP.
147      */

148     public String JavaDoc getIp()
149     {
150         return (String JavaDoc)this.headerFields.get(IP_HEADER_FIELD_KEY);
151     }
152
153     /**
154      * @return mimetype The mimetype that is in the ARC metaline -- NOT the http
155      * content-type content.
156      */

157     public String JavaDoc getMimetype() {
158         return (String JavaDoc)this.headerFields.get(MIMETYPE_FIELD_KEY);
159     }
160
161     /**
162      * @return Arcfile version.
163      */

164     public String JavaDoc getVersion() {
165         return (String JavaDoc)this.headerFields.get(VERSION_FIELD_KEY);
166     }
167
168     /**
169      * @return Offset into arcfile at which this record begins.
170      */

171     public long getOffset() {
172         return ((Long JavaDoc)this.headerFields.get(ABSOLUTE_OFFSET_KEY)).longValue();
173     }
174
175     /**
176      * @param key Key to use looking up field value.
177      * @return value for passed key of null if no such entry.
178      */

179     public Object JavaDoc getHeaderValue(String JavaDoc key) {
180         return this.headerFields.get(key);
181     }
182
183     /**
184      * @return Header field name keys.
185      */

186     public Set JavaDoc getHeaderFieldKeys()
187     {
188         return this.headerFields.keySet();
189     }
190
191     /**
192      * @return Map of header fields.
193      */

194     public Map JavaDoc getHeaderFields() {
195         return this.headerFields;
196     }
197     
198     /**
199      * @return Returns identifier for ARC.
200      */

201     public String JavaDoc getArc() {
202         return this.arc;
203     }
204     
205     /**
206      * @return Convenience method that does a
207      * return new File(this.arc) (Be aware this.arc is not always
208      * full path to an ARC file -- may be an URL). Test
209      * returned file for existence.
210      */

211     public File JavaDoc getArcFile() {
212         return new File JavaDoc(this.arc);
213     }
214     
215     /**
216      * @return Returns the digest.
217      */

218     public String JavaDoc getDigest() {
219         return this.digest;
220     }
221     
222     /**
223      * @param d The digest to set.
224      */

225     public void setDigest(String JavaDoc d) {
226         this.digest = d;
227     }
228     
229     /**
230      * @return Returns the statusCode. May be null.
231      */

232     public String JavaDoc getStatusCode() {
233         return this.statusCode;
234     }
235     
236     /**
237      * @param statusCode The statusCode to set.
238      */

239     public void setStatusCode(String JavaDoc statusCode) {
240         this.statusCode = statusCode;
241     }
242     
243     public String JavaDoc toString() {
244         return ((this.arc != null)? this.arc: "") +
245            ": " +
246            ((this.headerFields != null)? this.headerFields.toString(): "");
247     }
248
249     public String JavaDoc getReaderIdentifier() {
250         return this.getArc();
251     }
252
253     public String JavaDoc getRecordIdentifier() {
254         return getDate() + "/" + getUrl();
255     }
256
257     public int getContentBegin() {
258         return this.contentBegin;
259     }
260     
261     void setContentBegin(final int offset) {
262         this.contentBegin = offset;
263     }
264 }
Popular Tags