KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > io > GzipHeader


1 /* GzipHeader
2 *
3 * $Id: GzipHeader.java,v 1.7 2005/12/20 18:11:32 stack-sf Exp $
4 *
5 * Created on July 5, 2004
6 *
7 * Copyright (C) 2004 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.io;
26
27 import java.io.EOFException JavaDoc;
28 import java.io.IOException JavaDoc;
29 import java.io.InputStream JavaDoc;
30 import java.util.zip.CRC32 JavaDoc;
31 import java.util.zip.Deflater JavaDoc;
32 import java.util.zip.GZIPInputStream JavaDoc;
33
34 /**
35  * Read in the GZIP header.
36  *
37  * See RFC1952 for specification on what the header looks like.
38  * Assumption is that stream is cued-up with the gzip header as the
39  * next thing to be read.
40  *
41  * <p>Of <a HREF="http://jguru.com/faq/view.jsp?EID=13647">Java
42  * and unsigned bytes</a>. That is, its always a signed int in
43  * java no matter what the qualifier whether byte, char, etc.
44  *
45  * <p>Add accessors for optional filename, comment and MTIME.
46  *
47  * @author stack
48  */

49 public class GzipHeader {
50     /**
51      * Length of minimal GZIP header.
52      *
53      * See RFC1952 for explaination of value of 10.
54      */

55     public static final int MINIMAL_GZIP_HEADER_LENGTH = 10;
56     
57     /**
58      * Total length of the gzip header.
59      */

60     protected int length = 0;
61
62     /**
63      * The GZIP header FLG byte.
64      */

65     protected int flg;
66     
67     /**
68      * GZIP header XFL byte.
69      */

70     private int xfl;
71     
72     /**
73      * GZIP header OS byte.
74      */

75     private int os;
76     
77     /**
78      * Extra header field content.
79      */

80     private byte [] fextra = null;
81     
82     /**
83      * GZIP header MTIME field.
84      */

85     private int mtime;
86     
87     
88     /**
89      * Shutdown constructor.
90      *
91      * Must pass an input stream.
92      */

93     public GzipHeader() {
94         super();
95     }
96     
97     /**
98      * Constructor.
99      *
100      * This constructor advances the stream past any gzip header found.
101      *
102      * @param in InputStream to read from.
103      * @throws IOException
104      */

105     public GzipHeader(InputStream JavaDoc in) throws IOException JavaDoc {
106         super();
107         readHeader(in);
108     }
109     
110     /**
111      * Read in gzip header.
112      *
113      * Advances the stream past the gzip header.
114      * @param in InputStream.
115      *
116      * @throws IOException Throws if does not start with GZIP Header.
117      */

118     public void readHeader(InputStream JavaDoc in) throws IOException JavaDoc {
119         CRC32 JavaDoc crc = new CRC32 JavaDoc();
120         crc.reset();
121         if (!testGzipMagic(in, crc)) {
122             throw new NoGzipMagicException();
123         }
124         this.length += 2;
125         if (readByte(in, crc) != Deflater.DEFLATED) {
126             throw new IOException JavaDoc("Unknown compression");
127         }
128         this.length++;
129        
130         // Get gzip header flag.
131
this.flg = readByte(in, crc);
132         this.length++;
133         
134         // Get MTIME.
135
this.mtime = readInt(in, crc);
136         this.length += 4;
137         
138         // Read XFL and OS.
139
this.xfl = readByte(in, crc);
140         this.length++;
141         this.os = readByte(in, crc);
142         this.length++;
143         
144         // Skip optional extra field -- stuff w/ alexa stuff in it.
145
final int FLG_FEXTRA = 4;
146         if ((this.flg & FLG_FEXTRA) == FLG_FEXTRA) {
147             int count = readShort(in, crc);
148             this.length +=2;
149             this.fextra = new byte[count];
150             readByte(in, crc, this.fextra, 0, count);
151             this.length += count;
152         }
153         
154         // Skip file name. It ends in null.
155
final int FLG_FNAME = 8;
156         if ((this.flg & FLG_FNAME) == FLG_FNAME) {
157             while (readByte(in, crc) != 0) {
158                 this.length++;
159             }
160         }
161         
162         // Skip file comment. It ends in null.
163
final int FLG_FCOMMENT = 16; // File comment
164
if ((this.flg & FLG_FCOMMENT) == FLG_FCOMMENT) {
165             while (readByte(in, crc) != 0) {
166                 this.length++;
167             }
168         }
169         
170         // Check optional CRC.
171
final int FLG_FHCRC = 2;
172         if ((this.flg & FLG_FHCRC) == FLG_FHCRC) {
173             int calcCrc = (int)(crc.getValue() & 0xffff);
174             if (readShort(in, crc) != calcCrc) {
175                 throw new IOException JavaDoc("Bad header CRC");
176             }
177             this.length += 2;
178         }
179     }
180     
181     /**
182      * Test gzip magic is next in the stream.
183      * Reads two bytes. Caller needs to manage resetting stream.
184      * @param in InputStream to read.
185      * @return true if found gzip magic. False otherwise
186      * or an IOException (including EOFException).
187      * @throws IOException
188      */

189     public boolean testGzipMagic(InputStream JavaDoc in) throws IOException JavaDoc {
190         return testGzipMagic(in, null);
191     }
192     
193     /**
194      * Test gzip magic is next in the stream.
195      * Reads two bytes. Caller needs to manage resetting stream.
196      * @param in InputStream to read.
197      * @param crc CRC to update.
198      * @return true if found gzip magic. False otherwise
199      * or an IOException (including EOFException).
200      * @throws IOException
201      */

202     public boolean testGzipMagic(InputStream JavaDoc in, CRC32 JavaDoc crc)
203             throws IOException JavaDoc {
204         return readShort(in, crc) == GZIPInputStream.GZIP_MAGIC;
205     }
206     
207     /**
208      * Read an int.
209      *
210      * We do not expect to get a -1 reading. If we do, we throw exception.
211      * Update the crc as we go.
212      *
213      * @param in InputStream to read.
214      * @param crc CRC to update.
215      * @return int read.
216      *
217      * @throws IOException
218      */

219     private int readInt(InputStream JavaDoc in, CRC32 JavaDoc crc) throws IOException JavaDoc {
220         int s = readShort(in, crc);
221         return ((readShort(in, crc) << 16) & 0xffff0000) | s;
222     }
223     
224     /**
225      * Read a short.
226      *
227      * We do not expect to get a -1 reading. If we do, we throw exception.
228      * Update the crc as we go.
229      *
230      * @param in InputStream to read.
231      * @param crc CRC to update.
232      * @return Short read.
233      *
234      * @throws IOException
235      */

236     private int readShort(InputStream JavaDoc in, CRC32 JavaDoc crc) throws IOException JavaDoc {
237         int b = readByte(in, crc);
238         return ((readByte(in, crc) << 8) & 0x00ff00) | b;
239     }
240     
241     /**
242      * Read a byte.
243      *
244      * We do not expect to get a -1 reading. If we do, we throw exception.
245      * Update the crc as we go.
246      *
247      * @param in InputStream to read.
248      * @return Byte read.
249      *
250      * @throws IOException
251      */

252     protected int readByte(InputStream JavaDoc in) throws IOException JavaDoc {
253             return readByte(in, null);
254     }
255     
256     /**
257      * Read a byte.
258      *
259      * We do not expect to get a -1 reading. If we do, we throw exception.
260      * Update the crc as we go.
261      *
262      * @param in InputStream to read.
263      * @param crc CRC to update.
264      * @return Byte read.
265      *
266      * @throws IOException
267      */

268     protected int readByte(InputStream JavaDoc in, CRC32 JavaDoc crc) throws IOException JavaDoc {
269         int b = in.read();
270         if (b == -1) {
271             throw new EOFException JavaDoc();
272         }
273         if (crc != null) {
274             crc.update(b);
275         }
276         return b & 0xff;
277     }
278     
279     /**
280      * Read a byte.
281      *
282      * We do not expect to get a -1 reading. If we do, we throw exception.
283      * Update the crc as we go.
284      *
285      * @param in InputStream to read.
286      * @param crc CRC to update.
287      * @param buffer Buffer to read into.
288      * @param offset Offset to start filling buffer at.
289      * @param length How much to read.
290      * @return Bytes read.
291      *
292      * @throws IOException
293      */

294     protected int readByte(InputStream JavaDoc in, CRC32 JavaDoc crc, byte [] buffer,
295                 int offset, int length)
296             throws IOException JavaDoc {
297         for (int i = offset; i < length; i++) {
298             buffer[offset + i] = (byte)readByte(in, crc);
299         }
300         return length;
301     }
302     
303     /**
304      * @return Returns the fextra.
305      */

306     public byte[] getFextra() {
307         return this.fextra;
308     }
309     
310     /**
311      * @return Returns the flg.
312      */

313     public int getFlg() {
314         return this.flg;
315     }
316     
317     /**
318      * @return Returns the os.
319      */

320     public int getOs() {
321         return this.os;
322     }
323     
324     /**
325      * @return Returns the xfl.
326      */

327     public int getXfl() {
328         return this.xfl;
329     }
330     
331     /**
332      * @return Returns the mtime.
333      */

334     public int getMtime() {
335         return this.mtime;
336     }
337     
338     /**
339      * @return Returns the length.
340      */

341     public int getLength() {
342         return length;
343     }
344 }
345
Popular Tags