KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > jforum > util > legacy > commons > fileupload > MultipartStream


1 /*
2  * Copyright 2001-2004 The Apache Software Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package net.jforum.util.legacy.commons.fileupload;
17
18 import java.io.ByteArrayOutputStream JavaDoc;
19 import java.io.IOException JavaDoc;
20 import java.io.InputStream JavaDoc;
21 import java.io.OutputStream JavaDoc;
22 import java.io.UnsupportedEncodingException JavaDoc;
23
24 /**
25  * <p> Low level API for processing file uploads.
26  *
27  * <p> This class can be used to process data streams conforming to MIME
28  * 'multipart' format as defined in
29  * <a HREF="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Arbitrarily
30  * large amounts of data in the stream can be processed under constant
31  * memory usage.
32  *
33  * <p> The format of the stream is defined in the following way:<br>
34  *
35  * <code>
36  * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
37  * encapsulation := delimiter body CRLF<br>
38  * delimiter := "--" boundary CRLF<br>
39  * close-delimiter := "--" boudary "--"<br>
40  * preamble := &lt;ignore&gt;<br>
41  * epilogue := &lt;ignore&gt;<br>
42  * body := header-part CRLF body-part<br>
43  * header-part := 1*header CRLF<br>
44  * header := header-name ":" header-value<br>
45  * header-name := &lt;printable ascii characters except ":"&gt;<br>
46  * header-value := &lt;any ascii characters except CR & LF&gt;<br>
47  * body-data := &lt;arbitrary data&gt;<br>
48  * </code>
49  *
50  * <p>Note that body-data can contain another mulipart entity. There
51  * is limited support for single pass processing of such nested
52  * streams. The nested stream is <strong>required</strong> to have a
53  * boundary token of the same length as the parent stream (see {@link
54  * #setBoundary(byte[])}).
55  *
56  * <p>Here is an example of usage of this class.<br>
57  *
58  * <pre>
59  * try {
60  * MultipartStream multipartStream = new MultipartStream(input,
61  * boundary);
62  * boolean nextPart = multipartStream.skipPreamble();
63  * OutputStream output;
64  * while(nextPart) {
65  * header = chunks.readHeader();
66  * // process headers
67  * // create some output stream
68  * multipartStream.readBodyPart(output);
69  * nextPart = multipartStream.readBoundary();
70  * }
71  * } catch(MultipartStream.MalformedStreamException e) {
72  * // the stream failed to follow required syntax
73  * } catch(IOException) {
74  * // a read or write error occurred
75  * }
76  *
77  * </pre>
78  *
79  * @author <a HREF="mailto:Rafal.Krzewski@e-point.pl">Rafal Krzewski</a>
80  * @author <a HREF="mailto:martinc@apache.org">Martin Cooper</a>
81  * @author Sean C. Sullivan
82  *
83  * @version $Id: MultipartStream.java,v 1.3 2005/07/26 03:04:59 rafaelsteil Exp $
84  */

85 public class MultipartStream {
86
87     // ----------------------------------------------------- Manifest constants
88

89
90     /**
91      * The Carriage Return ASCII character value.
92      */

93     public static final byte CR = 0x0D;
94
95
96     /**
97      * The Line Feed ASCII character value.
98      */

99     public static final byte LF = 0x0A;
100
101
102     /**
103      * The dash (-) ASCII character value.
104      */

105     public static final byte DASH = 0x2D;
106
107
108     /**
109      * The maximum length of <code>header-part</code> that will be
110      * processed (10 kilobytes = 10240 bytes.).
111      */

112     public static final int HEADER_PART_SIZE_MAX = 10240;
113
114
115     /**
116      * The default length of the buffer used for processing a request.
117      */

118     protected static final int DEFAULT_BUFSIZE = 4096;
119
120
121     /**
122      * A byte sequence that marks the end of <code>header-part</code>
123      * (<code>CRLFCRLF</code>).
124      */

125     protected static final byte[] HEADER_SEPARATOR = {
126             CR, LF, CR, LF };
127
128
129     /**
130      * A byte sequence that that follows a delimiter that will be
131      * followed by an encapsulation (<code>CRLF</code>).
132      */

133     protected static final byte[] FIELD_SEPARATOR = {
134             CR, LF};
135
136
137     /**
138      * A byte sequence that that follows a delimiter of the last
139      * encapsulation in the stream (<code>--</code>).
140      */

141     protected static final byte[] STREAM_TERMINATOR = {
142             DASH, DASH};
143
144
145     // ----------------------------------------------------------- Data members
146

147
148     /**
149      * The input stream from which data is read.
150      */

151     private InputStream JavaDoc input;
152
153
154     /**
155      * The length of the boundary token plus the leading <code>CRLF--</code>.
156      */

157     private int boundaryLength;
158
159
160     /**
161      * The amount of data, in bytes, that must be kept in the buffer in order
162      * to detect delimiters reliably.
163      */

164     private int keepRegion;
165
166
167     /**
168      * The byte sequence that partitions the stream.
169      */

170     private byte[] boundary;
171
172
173     /**
174      * The length of the buffer used for processing the request.
175      */

176     private int bufSize;
177
178
179     /**
180      * The buffer used for processing the request.
181      */

182     private byte[] buffer;
183
184
185     /**
186      * The index of first valid character in the buffer.
187      * <br>
188      * 0 <= head < bufSize
189      */

190     private int head;
191
192
193     /**
194      * The index of last valid characer in the buffer + 1.
195      * <br>
196      * 0 <= tail <= bufSize
197      */

198     private int tail;
199
200
201     /**
202      * The content encoding to use when reading headers.
203      */

204     private String JavaDoc headerEncoding;
205
206
207     // ----------------------------------------------------------- Constructors
208

209
210     /**
211      * Default constructor.
212      *
213      * @see #MultipartStream(InputStream, byte[], int)
214      * @see #MultipartStream(InputStream, byte[])
215      *
216      */

217     public MultipartStream() {
218     }
219
220
221     /**
222      * <p> Constructs a <code>MultipartStream</code> with a custom size buffer.
223      *
224      * <p> Note that the buffer must be at least big enough to contain the
225      * boundary string, plus 4 characters for CR/LF and double dash, plus at
226      * least one byte of data. Too small a buffer size setting will degrade
227      * performance.
228      *
229      * @param input The <code>InputStream</code> to serve as a data source.
230      * @param boundary The token used for dividing the stream into
231      * <code>encapsulations</code>.
232      * @param bufSize The size of the buffer to be used, in bytes.
233      *
234      *
235      * @see #MultipartStream()
236      * @see #MultipartStream(InputStream, byte[])
237      *
238      */

239     public MultipartStream(InputStream JavaDoc input,
240                            byte[] boundary,
241                            int bufSize) {
242         this.input = input;
243         this.bufSize = bufSize;
244         this.buffer = new byte[bufSize];
245
246         // We prepend CR/LF to the boundary to chop trailng CR/LF from
247
// body-data tokens.
248
this.boundary = new byte[boundary.length + 4];
249         this.boundaryLength = boundary.length + 4;
250         this.keepRegion = boundary.length + 3;
251         this.boundary[0] = CR;
252         this.boundary[1] = LF;
253         this.boundary[2] = DASH;
254         this.boundary[3] = DASH;
255         System.arraycopy(boundary, 0, this.boundary, 4, boundary.length);
256
257         head = 0;
258         tail = 0;
259     }
260
261
262     /**
263      * <p> Constructs a <code>MultipartStream</code> with a default size buffer.
264      *
265      * @param input The <code>InputStream</code> to serve as a data source.
266      * @param boundary The token used for dividing the stream into
267      * <code>encapsulations</code>.
268      *
269      * @exception IOException when an error occurs.
270      *
271      * @see #MultipartStream()
272      * @see #MultipartStream(InputStream, byte[], int)
273      *
274      */

275     public MultipartStream(InputStream JavaDoc input,
276                            byte[] boundary) {
277         this(input, boundary, DEFAULT_BUFSIZE);
278     }
279
280
281     // --------------------------------------------------------- Public methods
282

283
284     /**
285      * Retrieves the character encoding used when reading the headers of an
286      * individual part. When not specified, or <code>null</code>, the platform
287      * default encoding is used.
288
289      *
290      * @return The encoding used to read part headers.
291      */

292     public String JavaDoc getHeaderEncoding() {
293         return headerEncoding;
294     }
295
296
297     /**
298      * Specifies the character encoding to be used when reading the headers of
299      * individual parts. When not specified, or <code>null</code>, the platform
300      * default encoding is used.
301      *
302      * @param encoding The encoding used to read part headers.
303      */

304     public void setHeaderEncoding(String JavaDoc encoding) {
305         headerEncoding = encoding;
306     }
307
308
309     /**
310      * Reads a byte from the <code>buffer</code>, and refills it as
311      * necessary.
312      *
313      * @return The next byte from the input stream.
314      *
315      * @exception IOException if there is no more data available.
316      */

317     public byte readByte()
318         throws IOException JavaDoc {
319         // Buffer depleted ?
320
if (head == tail) {
321             head = 0;
322             // Refill.
323
tail = input.read(buffer, head, bufSize);
324             if (tail == -1) {
325                 // No more data available.
326
throw new IOException JavaDoc("No more data is available");
327             }
328         }
329         return buffer[head++];
330     }
331
332
333     /**
334      * Skips a <code>boundary</code> token, and checks whether more
335      * <code>encapsulations</code> are contained in the stream.
336      *
337      * @return <code>true</code> if there are more encapsulations in
338      * this stream; <code>false</code> otherwise.
339      *
340      * @exception MalformedStreamException if the stream ends unexpecetedly or
341      * fails to follow required syntax.
342      */

343     public boolean readBoundary()
344         throws MalformedStreamException {
345         byte[] marker = new byte[2];
346         boolean nextChunk = false;
347
348         head += boundaryLength;
349         try {
350             marker[0] = readByte();
351             if (marker[0] == LF) {
352                 // Work around IE5 Mac bug with input type=image.
353
// Because the boundary delimiter, not including the trailing
354
// CRLF, must not appear within any file (RFC 2046, section
355
// 5.1.1), we know the missing CR is due to a buggy browser
356
// rather than a file containing something similar to a
357
// boundary.
358
return true;
359             }
360
361             marker[1] = readByte();
362             if (arrayequals(marker, STREAM_TERMINATOR, 2)) {
363                 nextChunk = false;
364             } else if (arrayequals(marker, FIELD_SEPARATOR, 2)) {
365                 nextChunk = true;
366             } else {
367                 throw new MalformedStreamException(
368                         "Unexpected characters follow a boundary");
369             }
370         } catch (IOException JavaDoc e) {
371             throw new MalformedStreamException("Stream ended unexpectedly");
372         }
373         return nextChunk;
374     }
375
376
377     /**
378      * <p>Changes the boundary token used for partitioning the stream.
379      *
380      * <p>This method allows single pass processing of nested multipart
381      * streams.
382      *
383      * <p>The boundary token of the nested stream is <code>required</code>
384      * to be of the same length as the boundary token in parent stream.
385      *
386      * <p>Restoring the parent stream boundary token after processing of a
387      * nested stream is left to the application.
388      *
389      * @param boundary The boundary to be used for parsing of the nested
390      * stream.
391      *
392      * @exception IllegalBoundaryException if the <code>boundary</code>
393      * has a different length than the one
394      * being currently parsed.
395      */

396     public void setBoundary(byte[] boundary)
397         throws IllegalBoundaryException {
398         if (boundary.length != boundaryLength - 4) {
399             throw new IllegalBoundaryException(
400                     "The length of a boundary token can not be changed");
401         }
402         System.arraycopy(boundary, 0, this.boundary, 4, boundary.length);
403     }
404
405
406     /**
407      * <p>Reads the <code>header-part</code> of the current
408      * <code>encapsulation</code>.
409      *
410      * <p>Headers are returned verbatim to the input stream, including the
411      * trailing <code>CRLF</code> marker. Parsing is left to the
412      * application.
413      *
414      * <p><strong>TODO</strong> allow limiting maximum header size to
415      * protect against abuse.
416      *
417      * @return The <code>header-part</code> of the current encapsulation.
418      *
419      * @exception MalformedStreamException if the stream ends unexpecetedly.
420      */

421     public String JavaDoc readHeaders()
422         throws MalformedStreamException {
423         int i = 0;
424         byte[] b = new byte[1];
425         // to support multi-byte characters
426
ByteArrayOutputStream JavaDoc baos = new ByteArrayOutputStream JavaDoc();
427         int sizeMax = HEADER_PART_SIZE_MAX;
428         int size = 0;
429         while (i < 4) {
430             try {
431                 b[0] = readByte();
432             } catch (IOException JavaDoc e) {
433                 throw new MalformedStreamException("Stream ended unexpectedly");
434             }
435             size++;
436             if (b[0] == HEADER_SEPARATOR[i]) {
437                 i++;
438             } else {
439                 i = 0;
440             }
441             if (size <= sizeMax) {
442                 baos.write(b[0]);
443             }
444         }
445
446         String JavaDoc headers = null;
447         if (headerEncoding != null) {
448             try {
449                 headers = baos.toString(headerEncoding);
450             } catch (UnsupportedEncodingException JavaDoc e) {
451                 // Fall back to platform default if specified encoding is not
452
// supported.
453
headers = baos.toString();
454             }
455         } else {
456             headers = baos.toString();
457         }
458
459         return headers;
460     }
461
462
463     /**
464      * <p>Reads <code>body-data</code> from the current
465      * <code>encapsulation</code> and writes its contents into the
466      * output <code>Stream</code>.
467      *
468      * <p>Arbitrary large amounts of data can be processed by this
469      * method using a constant size buffer. (see {@link
470      * #MultipartStream(InputStream,byte[],int) constructor}).
471      *
472      * @param output The <code>Stream</code> to write data into.
473      *
474      * @return the amount of data written.
475      *
476      * @exception MalformedStreamException if the stream ends unexpectedly.
477      * @exception IOException if an i/o error occurs.
478      */

479     public int readBodyData(OutputStream JavaDoc output)
480         throws MalformedStreamException,
481                IOException JavaDoc {
482         boolean done = false;
483         int pad;
484         int pos;
485         int bytesRead;
486         int total = 0;
487         while (!done) {
488             // Is boundary token present somewere in the buffer?
489
pos = findSeparator();
490             if (pos != -1) {
491                 // Write the rest of the data before the boundary.
492
output.write(buffer, head, pos - head);
493                 total += pos - head;
494                 head = pos;
495                 done = true;
496             } else {
497                 // Determine how much data should be kept in the
498
// buffer.
499
if (tail - head > keepRegion) {
500                     pad = keepRegion;
501                 } else {
502                     pad = tail - head;
503                 }
504                 // Write out the data belonging to the body-data.
505
output.write(buffer, head, tail - head - pad);
506
507                 // Move the data to the beginning of the buffer.
508
total += tail - head - pad;
509                 System.arraycopy(buffer, tail - pad, buffer, 0, pad);
510
511                 // Refill buffer with new data.
512
head = 0;
513                 bytesRead = input.read(buffer, pad, bufSize - pad);
514
515                 // [pprrrrrrr]
516
if (bytesRead != -1) {
517                     tail = pad + bytesRead;
518                 } else {
519                     // The last pad amount is left in the buffer.
520
// Boundary can't be in there so write out the
521
// data you have and signal an error condition.
522
output.write(buffer, 0, pad);
523                     output.flush();
524                     total += pad;
525                     throw new MalformedStreamException(
526                             "Stream ended unexpectedly");
527                 }
528             }
529         }
530         output.flush();
531         return total;
532     }
533
534
535     /**
536      * <p> Reads <code>body-data</code> from the current
537      * <code>encapsulation</code> and discards it.
538      *
539      * <p>Use this method to skip encapsulations you don't need or don't
540      * understand.
541      *
542      * @return The amount of data discarded.
543      *
544      * @exception MalformedStreamException if the stream ends unexpectedly.
545      * @exception IOException if an i/o error occurs.
546      */

547     public int discardBodyData()
548         throws MalformedStreamException,
549                IOException JavaDoc {
550         boolean done = false;
551         int pad;
552         int pos;
553         int bytesRead;
554         int total = 0;
555         while (!done) {
556             // Is boundary token present somewere in the buffer?
557
pos = findSeparator();
558             if (pos != -1) {
559                 // Write the rest of the data before the boundary.
560
total += pos - head;
561                 head = pos;
562                 done = true;
563             } else {
564                 // Determine how much data should be kept in the
565
// buffer.
566
if (tail - head > keepRegion) {
567                     pad = keepRegion;
568                 } else {
569                     pad = tail - head;
570                 }
571                 total += tail - head - pad;
572
573                 // Move the data to the beginning of the buffer.
574
System.arraycopy(buffer, tail - pad, buffer, 0, pad);
575
576                 // Refill buffer with new data.
577
head = 0;
578                 bytesRead = input.read(buffer, pad, bufSize - pad);
579
580                 // [pprrrrrrr]
581
if (bytesRead != -1) {
582                     tail = pad + bytesRead;
583                 } else {
584                     // The last pad amount is left in the buffer.
585
// Boundary can't be in there so signal an error
586
// condition.
587
total += pad;
588                     throw new MalformedStreamException(
589                             "Stream ended unexpectedly");
590                 }
591             }
592         }
593         return total;
594     }
595
596
597     /**
598      * Finds the beginning of the first <code>encapsulation</code>.
599      *
600      * @return <code>true</code> if an <code>encapsulation</code> was found in
601      * the stream.
602      *
603      * @exception IOException if an i/o error occurs.
604      */

605     public boolean skipPreamble()
606         throws IOException JavaDoc {
607         // First delimiter may be not preceeded with a CRLF.
608
System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
609         boundaryLength = boundary.length - 2;
610         try {
611             // Discard all data up to the delimiter.
612
discardBodyData();
613
614             // Read boundary - if succeded, the stream contains an
615
// encapsulation.
616
return readBoundary();
617         } catch (MalformedStreamException e) {
618             return false;
619         } finally {
620             // Restore delimiter.
621
System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2);
622             boundaryLength = boundary.length;
623             boundary[0] = CR;
624             boundary[1] = LF;
625         }
626     }
627
628
629     /**
630      * Compares <code>count</code> first bytes in the arrays
631      * <code>a</code> and <code>b</code>.
632      *
633      * @param a The first array to compare.
634      * @param b The second array to compare.
635      * @param count How many bytes should be compared.
636      *
637      * @return <code>true</code> if <code>count</code> first bytes in arrays
638      * <code>a</code> and <code>b</code> are equal.
639      */

640     public static boolean arrayequals(byte[] a,
641                                       byte[] b,
642                                       int count) {
643         for (int i = 0; i < count; i++) {
644             if (a[i] != b[i]) {
645                 return false;
646             }
647         }
648         return true;
649     }
650
651
652     /**
653      * Searches for a byte of specified value in the <code>buffer</code>,
654      * starting at the specified <code>position</code>.
655      *
656      * @param value The value to find.
657      * @param pos The starting position for searching.
658      *
659      * @return The position of byte found, counting from beginning of the
660      * <code>buffer</code>, or <code>-1</code> if not found.
661      */

662     protected int findByte(byte value,
663                            int pos) {
664         for (int i = pos; i < tail; i++) {
665             if (buffer[i] == value) {
666                 return i;
667             }
668         }
669
670         return -1;
671     }
672
673
674     /**
675      * Searches for the <code>boundary</code> in the <code>buffer</code>
676      * region delimited by <code>head</code> and <code>tail</code>.
677      *
678      * @return The position of the boundary found, counting from the
679      * beginning of the <code>buffer</code>, or <code>-1</code> if
680      * not found.
681      */

682     protected int findSeparator() {
683         int first;
684         int match = 0;
685         int maxpos = tail - boundaryLength;
686         for (first = head;
687              (first <= maxpos) && (match != boundaryLength);
688              first++) {
689             first = findByte(boundary[0], first);
690             if (first == -1 || (first > maxpos)) {
691                 return -1;
692             }
693             for (match = 1; match < boundaryLength; match++) {
694                 if (buffer[first + match] != boundary[match]) {
695                     break;
696                 }
697             }
698         }
699         if (match == boundaryLength) {
700             return first - 1;
701         }
702         return -1;
703     }
704
705     /**
706      * Returns a string representation of this object.
707      *
708      * @return The string representation of this object.
709      */

710     public String JavaDoc toString() {
711         StringBuffer JavaDoc sbTemp = new StringBuffer JavaDoc();
712         sbTemp.append("boundary='");
713         sbTemp.append(String.valueOf(boundary));
714         sbTemp.append("'\nbufSize=");
715         sbTemp.append(bufSize);
716         return sbTemp.toString();
717     }
718
719     /**
720      * Thrown to indicate that the input stream fails to follow the
721      * required syntax.
722      */

723     public class MalformedStreamException
724         extends IOException JavaDoc {
725         /**
726          * Constructs a <code>MalformedStreamException</code> with no
727          * detail message.
728          */

729         public MalformedStreamException() {
730             super();
731         }
732
733         /**
734          * Constructs an <code>MalformedStreamException</code> with
735          * the specified detail message.
736          *
737          * @param message The detail message.
738          */

739         public MalformedStreamException(String JavaDoc message) {
740             super(message);
741         }
742     }
743
744
745     /**
746      * Thrown upon attempt of setting an invalid boundary token.
747      */

748     public class IllegalBoundaryException
749         extends IOException JavaDoc {
750         /**
751          * Constructs an <code>IllegalBoundaryException</code> with no
752          * detail message.
753          */

754         public IllegalBoundaryException() {
755             super();
756         }
757
758         /**
759          * Constructs an <code>IllegalBoundaryException</code> with
760          * the specified detail message.
761          *
762          * @param message The detail message.
763          */

764         public IllegalBoundaryException(String JavaDoc message) {
765             super(message);
766         }
767     }
768
769
770     // ------------------------------------------------------ Debugging methods
771

772
773     // These are the methods that were used to debug this stuff.
774
/*
775
776     // Dump data.
777     protected void dump()
778     {
779         System.out.println("01234567890");
780         byte[] temp = new byte[buffer.length];
781         for(int i=0; i<buffer.length; i++)
782         {
783             if (buffer[i] == 0x0D || buffer[i] == 0x0A)
784             {
785                 temp[i] = 0x21;
786             }
787             else
788             {
789                 temp[i] = buffer[i];
790             }
791         }
792         System.out.println(new String(temp));
793         int i;
794         for (i=0; i<head; i++)
795             System.out.print(" ");
796         System.out.println("h");
797         for (i=0; i<tail; i++)
798             System.out.print(" ");
799         System.out.println("t");
800         System.out.flush();
801     }
802
803     // Main routine, for testing purposes only.
804     //
805     // @param args A String[] with the command line arguments.
806     // @exception Exception, a generic exception.
807     public static void main( String[] args )
808         throws Exception
809     {
810         File boundaryFile = new File("boundary.dat");
811         int boundarySize = (int)boundaryFile.length();
812         byte[] boundary = new byte[boundarySize];
813         FileInputStream input = new FileInputStream(boundaryFile);
814         input.read(boundary,0,boundarySize);
815
816         input = new FileInputStream("multipart.dat");
817         MultipartStream chunks = new MultipartStream(input, boundary);
818
819         int i = 0;
820         String header;
821         OutputStream output;
822         boolean nextChunk = chunks.skipPreamble();
823         while (nextChunk)
824         {
825             header = chunks.readHeaders();
826             System.out.println("!"+header+"!");
827             System.out.println("wrote part"+i+".dat");
828             output = new FileOutputStream("part"+(i++)+".dat");
829             chunks.readBodyData(output);
830             nextChunk = chunks.readBoundary();
831         }
832     }
833
834     */

835 }
836
Popular Tags