KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > io > RecordingOutputStream


1 /* ReplayableOutputStream
2  *
3  * $Id: RecordingOutputStream.java,v 1.30.4.1 2007/01/13 01:31:33 stack-sf Exp $
4  *
5  * Created on Sep 23, 2003
6  *
7  * Copyright (C) 2003 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.io;
26
27 import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
28
29 import java.io.FileOutputStream JavaDoc;
30 import java.io.IOException JavaDoc;
31 import java.io.OutputStream JavaDoc;
32 import java.security.MessageDigest JavaDoc;
33 import java.security.NoSuchAlgorithmException JavaDoc;
34
35 import org.archive.util.IoUtils;
36
37
38 /**
39  * An output stream that records all writes to wrapped output
40  * stream.
41  *
42  * A RecordingOutputStream can be wrapped around any other
43  * OutputStream to record all bytes written to it. You can
44  * then request a ReplayInputStream to read those bytes.
45  *
46  * <p>The RecordingOutputStream uses an in-memory buffer and
47  * backing disk file to allow it to record streams of
48  * arbitrary length limited only by available disk space.
49  *
50  * <p>As long as the stream recorded is smaller than the
51  * in-memory buffer, no disk access will occur.
52  *
53  * <p>Recorded content can be recovered as a ReplayInputStream
54  * (via getReplayInputStream() or, for only the content after
55  * the content-begin-mark is set, getContentReplayInputStream() )
56  * or as a ReplayCharSequence (via getReplayCharSequence()).
57  *
58  * <p>This class is also used as a straight output stream
59  * by {@link RecordingInputStream} to which it records all reads.
60  * {@link RecordingInputStream} is exploiting the file backed buffer
61  * facility of this class passing <code>null</code> for the stream
62  * to wrap. TODO: Make a FileBackedOutputStream class that is
63  * subclassed by RecordingInputStream.
64  *
65  * @author gojomo
66  *
67  */

68 public class RecordingOutputStream extends OutputStream {
69     /**
70      * Size of recording.
71      *
72      * Later passed to ReplayInputStream on creation. It uses it to know when
73      * EOS.
74      */

75     private long size = 0;
76
77     private String JavaDoc backingFilename;
78     private OutputStream diskStream = null;
79
80     /**
81      * Buffer we write recordings to.
82      *
83      * We write all recordings here first till its full. Thereafter we
84      * write the backing file.
85      */

86     private byte[] buffer;
87
88     /** current virtual position in the recording */
89     private long position;
90     
91     /** flag to disable recording */
92     private boolean recording;
93     
94     /**
95      * Reusable buffer for FastBufferedOutputStream
96      */

97     protected byte[] bufStreamBuf =
98         new byte [ FastBufferedOutputStream.DEFAULT_BUFFER_SIZE ];
99     
100     /**
101      * True if we're to digest content.
102      */

103     private boolean shouldDigest = false;
104  
105     /**
106      * Digest instance.
107      */

108     private MessageDigest JavaDoc digest = null;
109
110     /**
111      * Define for SHA1 alogarithm.
112      */

113     private static final String JavaDoc SHA1 = "SHA1";
114
115     /**
116      * Maximum amount of header material to accept without the content
117      * body beginning -- if more, throw a RecorderTooMuchHeaderException.
118      * TODO: make configurable? make smaller?
119      */

120     protected static final long MAX_HEADER_MATERIAL = 1024*1024; // 1MB
121

122     
123     /**
124      * When recording HTTP, where the content-body starts.
125      */

126     private long contentBeginMark;
127
128     /**
129      * Stream to record.
130      */

131     private OutputStream out = null;
132
133
134     /**
135      * Create a new RecordingOutputStream.
136      *
137      * @param bufferSize Buffer size to use.
138      * @param backingFilename Name of backing file to use.
139      */

140     public RecordingOutputStream(int bufferSize, String JavaDoc backingFilename) {
141         this.buffer = new byte[bufferSize];
142         this.backingFilename = backingFilename;
143         recording = true;
144     }
145
146     /**
147      * Wrap the given stream, both recording and passing along any data written
148      * to this RecordingOutputStream.
149      *
150      * @throws IOException If failed creation of backing file.
151      */

152     public void open() throws IOException JavaDoc {
153         this.open(null);
154     }
155
156     /**
157      * Wrap the given stream, both recording and passing along any data written
158      * to this RecordingOutputStream.
159      *
160      * @param wrappedStream Stream to wrap. May be null for case where we
161      * want to write to a file backed stream only.
162      *
163      * @throws IOException If failed creation of backing file.
164      */

165     public void open(OutputStream wrappedStream) throws IOException JavaDoc {
166         if(isOpen()) {
167             // error; should not be opening/wrapping in an unclosed
168
// stream remains open
169
throw new IOException JavaDoc("ROS already open for "
170                     +Thread.currentThread().getName());
171         }
172         this.out = wrappedStream;
173         this.position = 0;
174         this.size = 0;
175         this.contentBeginMark = -1;
176         // ensure recording turned on
177
this.recording = true;
178         // Always begins false; must use startDigest() to begin
179
this.shouldDigest = false;
180         if (this.diskStream != null) {
181             closeDiskStream();
182         }
183         if (this.diskStream == null) {
184             // TODO: Fix so we only make file when its actually needed.
185
FileOutputStream JavaDoc fis = new FileOutputStream JavaDoc(this.backingFilename);
186             
187             this.diskStream = new RecyclingFastBufferedOutputStream(fis, bufStreamBuf);
188         }
189     }
190
191     public void write(int b) throws IOException JavaDoc {
192         if(recording) {
193             record(b);
194         }
195         if (this.out != null) {
196             this.out.write(b);
197         }
198         checkLimits();
199     }
200
201     public void write(byte[] b) throws IOException JavaDoc {
202         if(recording) {
203             record(b, 0, b.length);
204         }
205         if (this.out != null) {
206             this.out.write(b);
207         }
208         checkLimits();
209     }
210
211     public void write(byte[] b, int off, int len) throws IOException JavaDoc {
212         if(recording) {
213             record(b, off, len);
214         }
215         if (this.out != null) {
216             this.out.write(b, off, len);
217         }
218         checkLimits();
219     }
220
221     /**
222      * Check any enforced limits. For now, this only checks MAX_HEADER_MATERIAL
223      * if markContentBegin() has not yet been called.
224      */

225     protected void checkLimits() throws RecorderTooMuchHeaderException {
226         if (contentBeginMark<0) {
227             // no mark yet
228
if(position>MAX_HEADER_MATERIAL) {
229                 throw new RecorderTooMuchHeaderException();
230             }
231         }
232     }
233
234     /**
235      * Record the given byte for later recovery
236      *
237      * @param b Int to record.
238      *
239      * @exception IOException Failed write to backing file.
240      */

241     private void record(int b) throws IOException JavaDoc {
242         if (this.shouldDigest) {
243             this.digest.update((byte)b);
244         }
245         if (this.position >= this.buffer.length) {
246             // TODO: Its possible to call write w/o having first opened a
247
// stream. Protect ourselves against this.
248
assert this.diskStream != null: "Diskstream is null";
249             this.diskStream.write(b);
250         } else {
251             this.buffer[(int) this.position] = (byte) b;
252         }
253         this.position++;
254     }
255
256     /**
257      * Record the given byte-array range for recovery later
258      *
259      * @param b Buffer to record.
260      * @param off Offset into buffer at which to start recording.
261      * @param len Length of buffer to record.
262      *
263      * @exception IOException Failed write to backing file.
264      */

265     private void record(byte[] b, int off, int len) throws IOException JavaDoc {
266         if(this.shouldDigest) {
267             assert this.digest != null: "Digest is null.";
268             this.digest.update(b, off, len);
269         }
270         tailRecord(b, off, len);
271     }
272
273     /**
274      * Record without digesting.
275      *
276      * @param b Buffer to record.
277      * @param off Offset into buffer at which to start recording.
278      * @param len Length of buffer to record.
279      *
280      * @exception IOException Failed write to backing file.
281      */

282     private void tailRecord(byte[] b, int off, int len) throws IOException JavaDoc {
283         if(this.position >= this.buffer.length){
284             // TODO: Its possible to call write w/o having first opened a
285
// stream. Lets protect ourselves against this.
286
if (this.diskStream == null) {
287                 throw new IOException JavaDoc("diskstream is null");
288             }
289             this.diskStream.write(b, off, len);
290             this.position += len;
291         } else {
292             assert this.buffer != null: "Buffer is null";
293             int toCopy = (int)Math.min(this.buffer.length - this.position, len);
294             assert b != null: "Passed buffer is null";
295             System.arraycopy(b, off, this.buffer, (int)this.position, toCopy);
296             this.position += toCopy;
297             // TODO verify these are +1 -1 right
298
if (toCopy < len) {
299                 tailRecord(b, off + toCopy, len - toCopy);
300             }
301         }
302     }
303
304     public void close() throws IOException JavaDoc {
305         if(contentBeginMark<0) {
306             // if unset, consider 0 posn as content-start
307
// (so that a -1 never survives to replay step)
308
contentBeginMark = 0;
309         }
310         if (this.out != null) {
311             this.out.close();
312             this.out = null;
313         }
314         closeRecorder();
315     }
316     
317     protected synchronized void closeDiskStream()
318     throws IOException JavaDoc {
319         if (this.diskStream != null) {
320             this.diskStream.close();
321             this.diskStream = null;
322         }
323     }
324
325     public void closeRecorder() throws IOException JavaDoc {
326         recording = false;
327         closeDiskStream(); // if any
328
// This setting of size is important. Its passed to ReplayInputStream
329
// on creation. It uses it to know EOS.
330
if (this.size == 0) {
331             this.size = this.position;
332         }
333     }
334
335     /* (non-Javadoc)
336      * @see java.io.OutputStream#flush()
337      */

338     public void flush() throws IOException JavaDoc {
339         if (this.out != null) {
340             this.out.flush();
341         }
342         if (this.diskStream != null) {
343             this.diskStream.flush();
344         }
345     }
346
347     public ReplayInputStream getReplayInputStream() throws IOException JavaDoc {
348         return getReplayInputStream(0);
349     }
350     
351     public ReplayInputStream getReplayInputStream(long skip) throws IOException JavaDoc {
352         // If this method is being called, then assumption must be that the
353
// stream is closed. If it ain't, then the stream gotten won't work
354
// -- the size will zero so any attempt at a read will get back EOF.
355
assert this.out == null: "Stream is still open.";
356         ReplayInputStream replay = new ReplayInputStream(this.buffer,
357                 this.size, this.contentBeginMark, this.backingFilename);
358         replay.skip(skip);
359         return replay;
360     }
361
362     /**
363      * Return a replay stream, cued up to begining of content
364      *
365      * @throws IOException
366      * @return An RIS.
367      */

368     public ReplayInputStream getContentReplayInputStream() throws IOException JavaDoc {
369         return getReplayInputStream(this.contentBeginMark);
370     }
371
372     public long getSize() {
373         return this.size;
374     }
375
376     /**
377      * Remember the current position as the start of the "response
378      * body". Useful when recording HTTP traffic as a way to start
379      * replays after the headers.
380      */

381     public void markContentBegin() {
382         this.contentBeginMark = this.position;
383         startDigest();
384     }
385
386     /**
387      * Starts digesting recorded data, if a MessageDigest has been
388      * set.
389      */

390     public void startDigest() {
391         if (this.digest != null) {
392             this.digest.reset();
393             this.shouldDigest = true;
394         }
395     }
396
397     /**
398      * Convenience method for setting SHA1 digest.
399      * @see #setDigest(String)
400      */

401     public void setSha1Digest() {
402         setDigest(SHA1);
403     }
404     
405
406     /**
407      * Sets a digest function which may be applied to recorded data.
408      * The difference between calling this method and {@link #setDigest(MessageDigest)}
409      * is that this method tries to reuse MethodDigest instance if already allocated
410      * and of appropriate algorithm.
411      * @param algorithm Message digest algorithm to use.
412      * @see #setDigest(MessageDigest)
413      */

414     public void setDigest(String JavaDoc algorithm) {
415         try {
416             // Reuse extant digest if its sha1 algorithm.
417
if (this.digest == null ||
418                     !this.digest.getAlgorithm().equals(algorithm)) {
419                 setDigest(MessageDigest.getInstance(algorithm));
420             }
421         } catch (NoSuchAlgorithmException JavaDoc e) {
422             e.printStackTrace();
423         }
424     }
425
426     /**
427      * Sets a digest function which may be applied to recorded data.
428      *
429      * As usually only a subset of the recorded data should
430      * be fed to the digest, you must also call startDigest()
431      * to begin digesting.
432      *
433      * @param md Message digest function to use.
434      */

435     public void setDigest(MessageDigest JavaDoc md) {
436         this.digest = md;
437     }
438
439     /**
440      * Return the digest value for any recorded, digested data. Call
441      * only after all data has been recorded; otherwise, the running
442      * digest state is ruined.
443      *
444      * @return the digest final value
445      */

446     public byte[] getDigestValue() {
447         if(this.digest == null) {
448             return null;
449         }
450         return this.digest.digest();
451     }
452
453     public ReplayCharSequence getReplayCharSequence() throws IOException JavaDoc {
454         return getReplayCharSequence(null);
455     }
456
457     public ReplayCharSequence getReplayCharSequence(String JavaDoc characterEncoding)
458     throws IOException JavaDoc {
459         return getReplayCharSequence(characterEncoding, this.contentBeginMark);
460     }
461     
462     /**
463      * @param characterEncoding Encoding of recorded stream.
464      * @return A ReplayCharSequence Will return null if an IOException. Call
465      * close on returned RCS when done.
466      * @throws IOException
467      */

468     public ReplayCharSequence getReplayCharSequence(String JavaDoc characterEncoding,
469             long startOffset) throws IOException JavaDoc {
470         // TODO: handled transfer-encoding: chunked content-bodies properly
471
float maxBytesPerChar = IoUtils.encodingMaxBytesPerChar(characterEncoding);
472         if(maxBytesPerChar<=1) {
473             // single
474
// TODO: take into account single-byte encoding may be non-default
475
return new ByteReplayCharSequence(
476                     this.buffer,
477                     this.size,
478                     startOffset,
479                     this.backingFilename);
480         } else {
481             // multibyte
482
if(this.size <= this.buffer.length) {
483                 // raw data is all in memory; do in memory
484
return new MultiByteReplayCharSequence(
485                         this.buffer,
486                         this.size,
487                         startOffset,
488                         characterEncoding);
489                 
490             } else {
491                 // raw data overflows to disk; use temp file
492
ReplayInputStream ris = getReplayInputStream(startOffset);
493                 ReplayCharSequence rcs = new MultiByteReplayCharSequence(
494                         ris,
495                         this.backingFilename,
496                         characterEncoding);
497                 ris.close();
498                 return rcs;
499             }
500             
501         }
502         
503     }
504
505     public long getResponseContentLength() {
506         return this.size - this.contentBeginMark;
507     }
508
509     /**
510      * @return True if this ROS is open.
511      */

512     public boolean isOpen() {
513         return this.out != null;
514     }
515 }
516
Popular Tags