KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > tomcat > util > http > fileupload > MultipartStream


1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18
19 package org.apache.tomcat.util.http.fileupload;
20
21
22 import java.io.ByteArrayOutputStream JavaDoc;
23 import java.io.IOException JavaDoc;
24 import java.io.InputStream JavaDoc;
25 import java.io.OutputStream JavaDoc;
26 import java.io.UnsupportedEncodingException JavaDoc;
27
28
29 /**
30  * <p> Low level API for processing file uploads.
31  *
32  * <p> This class can be used to process data streams conforming to MIME
33  * 'multipart' format as defined in
34  * <a HREF="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Arbitrarily
35  * large amounts of data in the stream can be processed under constant
36  * memory usage.
37  *
38  * <p> The format of the stream is defined in the following way:<br>
39  *
40  * <code>
41  * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
42  * encapsulation := delimiter body CRLF<br>
43  * delimiter := "--" boundary CRLF<br>
44  * close-delimiter := "--" boudary "--"<br>
45  * preamble := &lt;ignore&gt;<br>
46  * epilogue := &lt;ignore&gt;<br>
47  * body := header-part CRLF body-part<br>
48  * header-part := 1*header CRLF<br>
49  * header := header-name ":" header-value<br>
50  * header-name := &lt;printable ascii characters except ":"&gt;<br>
51  * header-value := &lt;any ascii characters except CR & LF&gt;<br>
52  * body-data := &lt;arbitrary data&gt;<br>
53  * </code>
54  *
55  * <p>Note that body-data can contain another mulipart entity. There
56  * is limited support for single pass processing of such nested
57  * streams. The nested stream is <strong>required</strong> to have a
58  * boundary token of the same length as the parent stream (see {@link
59  * #setBoundary(byte[])}).
60  *
61  * <p>Here is an exaple of usage of this class.<br>
62  *
63  * <pre>
64  * try {
65  * MultipartStream multipartStream = new MultipartStream(input,
66  * boundary);
67  * boolean nextPart = malitPartStream.skipPreamble();
68  * OutputStream output;
69  * while(nextPart) {
70  * header = chunks.readHeader();
71  * // process headers
72  * // create some output stream
73  * multipartStream.readBodyPart(output);
74  * nextPart = multipartStream.readBoundary();
75  * }
76  * } catch(MultipartStream.MalformedStreamException e) {
77  * // the stream failed to follow required syntax
78  * } catch(IOException) {
79  * // a read or write error occurred
80  * }
81  *
82  * </pre>
83  *
84  * @author <a HREF="mailto:Rafal.Krzewski@e-point.pl">Rafal Krzewski</a>
85  * @author <a HREF="mailto:martinc@apache.org">Martin Cooper</a>
86  * @author Sean C. Sullivan
87  *
88  * @version $Id: MultipartStream.java 467222 2006-10-24 03:17:11Z markt $
89  */

90 public class MultipartStream
91 {
92
93     // ----------------------------------------------------- Manifest constants
94

95
96     /**
97      * The maximum length of <code>header-part</code> that will be
98      * processed (10 kilobytes = 10240 bytes.).
99      */

100     public static final int HEADER_PART_SIZE_MAX = 10240;
101
102
103     /**
104      * The default length of the buffer used for processing a request.
105      */

106     protected static final int DEFAULT_BUFSIZE = 4096;
107
108
109     /**
110      * A byte sequence that marks the end of <code>header-part</code>
111      * (<code>CRLFCRLF</code>).
112      */

113     protected static final byte[] HEADER_SEPARATOR = {0x0D, 0x0A, 0x0D, 0x0A};
114
115
116     /**
117      * A byte sequence that that follows a delimiter that will be
118      * followed by an encapsulation (<code>CRLF</code>).
119      */

120     protected static final byte[] FIELD_SEPARATOR = { 0x0D, 0x0A };
121
122
123     /**
124      * A byte sequence that that follows a delimiter of the last
125      * encapsulation in the stream (<code>--</code>).
126      */

127     protected static final byte[] STREAM_TERMINATOR = { 0x2D, 0x2D };
128
129
130     // ----------------------------------------------------------- Data members
131

132
133     /**
134      * The input stream from which data is read.
135      */

136     private InputStream JavaDoc input;
137
138
139     /**
140      * The length of the boundary token plus the leading <code>CRLF--</code>.
141      */

142     private int boundaryLength;
143
144
145     /**
146      * The amount of data, in bytes, that must be kept in the buffer in order
147      * to detect delimiters reliably.
148      */

149     private int keepRegion;
150
151
152     /**
153      * The byte sequence that partitions the stream.
154      */

155     private byte[] boundary;
156
157
158     /**
159      * The length of the buffer used for processing the request.
160      */

161     private int bufSize;
162
163
164     /**
165      * The buffer used for processing the request.
166      */

167     private byte[] buffer;
168
169
170     /**
171      * The index of first valid character in the buffer.
172      * <br>
173      * 0 <= head < bufSize
174      */

175     private int head;
176
177
178     /**
179      * The index of last valid characer in the buffer + 1.
180      * <br>
181      * 0 <= tail <= bufSize
182      */

183     private int tail;
184
185
186     /**
187      * The content encoding to use when reading headers.
188      */

189     private String JavaDoc headerEncoding;
190
191
192     // ----------------------------------------------------------- Constructors
193

194
195     /**
196      * Default constructor.
197      *
198      * @see #MultipartStream(InputStream, byte[], int)
199      * @see #MultipartStream(InputStream, byte[])
200      *
201      */

202     public MultipartStream()
203     {
204     }
205
206
207     /**
208      * <p> Constructs a <code>MultipartStream</code> with a custom size buffer.
209      *
210      * <p> Note that the buffer must be at least big enough to contain the
211      * boundary string, plus 4 characters for CR/LF and double dash, plus at
212      * least one byte of data. Too small a buffer size setting will degrade
213      * performance.
214      *
215      * @param input The <code>InputStream</code> to serve as a data source.
216      * @param boundary The token used for dividing the stream into
217      * <code>encapsulations</code>.
218      * @param bufSize The size of the buffer to be used, in bytes.
219      *
220      *
221      * @see #MultipartStream()
222      * @see #MultipartStream(InputStream, byte[])
223      *
224      */

225     public MultipartStream(InputStream JavaDoc input,
226                            byte[] boundary,
227                            int bufSize)
228     {
229         this.input = input;
230         this.bufSize = bufSize;
231         this.buffer = new byte[bufSize];
232
233         // We prepend CR/LF to the boundary to chop trailng CR/LF from
234
// body-data tokens.
235
this.boundary = new byte[boundary.length + 4];
236         this.boundaryLength = boundary.length + 4;
237         this.keepRegion = boundary.length + 3;
238         this.boundary[0] = 0x0D;
239         this.boundary[1] = 0x0A;
240         this.boundary[2] = 0x2D;
241         this.boundary[3] = 0x2D;
242         System.arraycopy(boundary, 0, this.boundary, 4, boundary.length);
243
244         head = 0;
245         tail = 0;
246     }
247
248
249     /**
250      * <p> Constructs a <code>MultipartStream</code> with a default size buffer.
251      *
252      * @param input The <code>InputStream</code> to serve as a data source.
253      * @param boundary The token used for dividing the stream into
254      * <code>encapsulations</code>.
255      *
256      * @exception IOException when an error occurs.
257      *
258      * @see #MultipartStream()
259      * @see #MultipartStream(InputStream, byte[], int)
260      *
261      */

262     public MultipartStream(InputStream JavaDoc input,
263                            byte[] boundary)
264         throws IOException JavaDoc
265     {
266         this(input, boundary, DEFAULT_BUFSIZE);
267     }
268
269
270     // --------------------------------------------------------- Public methods
271

272
273     /**
274      * Retrieves the character encoding used when reading the headers of an
275      * individual part. When not specified, or <code>null</code>, the platform
276      * default encoding is used.
277
278      *
279      * @return The encoding used to read part headers.
280      */

281     public String JavaDoc getHeaderEncoding()
282     {
283         return headerEncoding;
284     }
285
286
287     /**
288      * Specifies the character encoding to be used when reading the headers of
289      * individual parts. When not specified, or <code>null</code>, the platform
290      * default encoding is used.
291      *
292      * @param encoding The encoding used to read part headers.
293      */

294     public void setHeaderEncoding(String JavaDoc encoding)
295     {
296         headerEncoding = encoding;
297     }
298
299
300     /**
301      * Reads a byte from the <code>buffer</code>, and refills it as
302      * necessary.
303      *
304      * @return The next byte from the input stream.
305      *
306      * @exception IOException if there is no more data available.
307      */

308     public byte readByte()
309         throws IOException JavaDoc
310     {
311         // Buffer depleted ?
312
if (head == tail)
313         {
314             head = 0;
315             // Refill.
316
tail = input.read(buffer, head, bufSize);
317             if (tail == -1)
318             {
319                 // No more data available.
320
throw new IOException JavaDoc("No more data is available");
321             }
322         }
323         return buffer[head++];
324     }
325
326
327     /**
328      * Skips a <code>boundary</code> token, and checks whether more
329      * <code>encapsulations</code> are contained in the stream.
330      *
331      * @return <code>true</code> if there are more encapsulations in
332      * this stream; <code>false</code> otherwise.
333      *
334      * @exception MalformedStreamException if the stream ends unexpecetedly or
335      * fails to follow required syntax.
336      */

337     public boolean readBoundary()
338         throws MalformedStreamException
339     {
340         byte[] marker = new byte[2];
341         boolean nextChunk = false;
342
343         head += boundaryLength;
344         try
345         {
346             marker[0] = readByte();
347             marker[1] = readByte();
348             if (arrayequals(marker, STREAM_TERMINATOR, 2))
349             {
350                 nextChunk = false;
351             }
352             else if (arrayequals(marker, FIELD_SEPARATOR, 2))
353             {
354                 nextChunk = true;
355             }
356             else
357             {
358                 throw new MalformedStreamException(
359                         "Unexpected characters follow a boundary");
360             }
361         }
362         catch (IOException JavaDoc e)
363         {
364             throw new MalformedStreamException("Stream ended unexpectedly");
365         }
366         return nextChunk;
367     }
368
369
370     /**
371      * <p>Changes the boundary token used for partitioning the stream.
372      *
373      * <p>This method allows single pass processing of nested multipart
374      * streams.
375      *
376      * <p>The boundary token of the nested stream is <code>required</code>
377      * to be of the same length as the boundary token in parent stream.
378      *
379      * <p>Restoring the parent stream boundary token after processing of a
380      * nested stream is left to the application.
381      *
382      * @param boundary The boundary to be used for parsing of the nested
383      * stream.
384      *
385      * @exception IllegalBoundaryException if the <code>boundary</code>
386      * has a different length than the one
387      * being currently parsed.
388      */

389     public void setBoundary(byte[] boundary)
390         throws IllegalBoundaryException
391     {
392         if (boundary.length != boundaryLength - 4)
393         {
394             throw new IllegalBoundaryException(
395                     "The length of a boundary token can not be changed");
396         }
397         System.arraycopy(boundary, 0, this.boundary, 4, boundary.length);
398     }
399
400
401     /**
402      * <p>Reads the <code>header-part</code> of the current
403      * <code>encapsulation</code>.
404      *
405      * <p>Headers are returned verbatim to the input stream, including the
406      * trailing <code>CRLF</code> marker. Parsing is left to the
407      * application.
408      *
409      * <p><strong>TODO</strong> allow limiting maximum header size to
410      * protect against abuse.
411      *
412      * @return The <code>header-part</code> of the current encapsulation.
413      *
414      * @exception MalformedStreamException if the stream ends unexpecetedly.
415      */

416     public String JavaDoc readHeaders()
417         throws MalformedStreamException
418     {
419         int i = 0;
420         byte b[] = new byte[1];
421         // to support multi-byte characters
422
ByteArrayOutputStream JavaDoc baos = new ByteArrayOutputStream JavaDoc();
423         int sizeMax = HEADER_PART_SIZE_MAX;
424         int size = 0;
425         while (i < 4)
426         {
427             try
428             {
429                 b[0] = readByte();
430             }
431             catch (IOException JavaDoc e)
432             {
433                 throw new MalformedStreamException("Stream ended unexpectedly");
434             }
435             size++;
436             if (b[0] == HEADER_SEPARATOR[i])
437             {
438                 i++;
439             }
440             else
441             {
442                 i = 0;
443             }
444             if (size <= sizeMax)
445             {
446                 baos.write(b[0]);
447             }
448         }
449
450         String JavaDoc headers = null;
451         if (headerEncoding != null)
452         {
453             try
454             {
455                 headers = baos.toString(headerEncoding);
456             }
457             catch (UnsupportedEncodingException JavaDoc e)
458             {
459                 // Fall back to platform default if specified encoding is not
460
// supported.
461
headers = baos.toString();
462             }
463         }
464         else
465         {
466             headers = baos.toString();
467         }
468
469         return headers;
470     }
471
472
473     /**
474      * <p>Reads <code>body-data</code> from the current
475      * <code>encapsulation</code> and writes its contents into the
476      * output <code>Stream</code>.
477      *
478      * <p>Arbitrary large amounts of data can be processed by this
479      * method using a constant size buffer. (see {@link
480      * #MultipartStream(InputStream,byte[],int) constructor}).
481      *
482      * @param output The <code>Stream</code> to write data into.
483      *
484      * @return the amount of data written.
485      *
486      * @exception MalformedStreamException if the stream ends unexpectedly.
487      * @exception IOException if an i/o error occurs.
488      */

489     public int readBodyData(OutputStream JavaDoc output)
490         throws MalformedStreamException,
491                IOException JavaDoc
492     {
493         boolean done = false;
494         int pad;
495         int pos;
496         int bytesRead;
497         int total = 0;
498         while (!done)
499         {
500             // Is boundary token present somewere in the buffer?
501
pos = findSeparator();
502             if (pos != -1)
503             {
504                 // Write the rest of the data before the boundary.
505
output.write(buffer, head, pos - head);
506                 total += pos - head;
507                 head = pos;
508                 done = true;
509             }
510             else
511             {
512                 // Determine how much data should be kept in the
513
// buffer.
514
if (tail - head > keepRegion)
515                 {
516                     pad = keepRegion;
517                 }
518                 else
519                 {
520                     pad = tail - head;
521                 }
522                 // Write out the data belonging to the body-data.
523
output.write(buffer, head, tail - head - pad);
524
525                 // Move the data to the beging of the buffer.
526
total += tail - head - pad;
527                 System.arraycopy(buffer, tail - pad, buffer, 0, pad);
528
529                 // Refill buffer with new data.
530
head = 0;
531                 bytesRead = input.read(buffer, pad, bufSize - pad);
532
533                 // [pprrrrrrr]
534
if (bytesRead != -1)
535                 {
536                     tail = pad + bytesRead;
537                 }
538                 else
539                 {
540                     // The last pad amount is left in the buffer.
541
// Boundary can't be in there so write out the
542
// data you have and signal an error condition.
543
output.write(buffer, 0, pad);
544                     output.flush();
545                     total += pad;
546                     throw new MalformedStreamException(
547                             "Stream ended unexpectedly");
548                 }
549             }
550         }
551         output.flush();
552         return total;
553     }
554
555
556     /**
557      * <p> Reads <code>body-data</code> from the current
558      * <code>encapsulation</code> and discards it.
559      *
560      * <p>Use this method to skip encapsulations you don't need or don't
561      * understand.
562      *
563      * @return The amount of data discarded.
564      *
565      * @exception MalformedStreamException if the stream ends unexpectedly.
566      * @exception IOException if an i/o error occurs.
567      */

568     public int discardBodyData()
569         throws MalformedStreamException,
570                IOException JavaDoc
571     {
572         boolean done = false;
573         int pad;
574         int pos;
575         int bytesRead;
576         int total = 0;
577         while (!done)
578         {
579             // Is boundary token present somewere in the buffer?
580
pos = findSeparator();
581             if (pos != -1)
582             {
583                 // Write the rest of the data before the boundary.
584
total += pos - head;
585                 head = pos;
586                 done = true;
587             }
588             else
589             {
590                 // Determine how much data should be kept in the
591
// buffer.
592
if (tail - head > keepRegion)
593                 {
594                     pad = keepRegion;
595                 }
596                 else
597                 {
598                     pad = tail - head;
599                 }
600                 total += tail - head - pad;
601
602                 // Move the data to the beging of the buffer.
603
System.arraycopy(buffer, tail - pad, buffer, 0, pad);
604
605                 // Refill buffer with new data.
606
head = 0;
607                 bytesRead = input.read(buffer, pad, bufSize - pad);
608
609                 // [pprrrrrrr]
610
if (bytesRead != -1)
611                 {
612                     tail = pad + bytesRead;
613                 }
614                 else
615                 {
616                     // The last pad amount is left in the buffer.
617
// Boundary can't be in there so signal an error
618
// condition.
619
total += pad;
620                     throw new MalformedStreamException(
621                             "Stream ended unexpectedly");
622                 }
623             }
624         }
625         return total;
626     }
627
628
629     /**
630      * Finds the beginning of the first <code>encapsulation</code>.
631      *
632      * @return <code>true</code> if an <code>encapsulation</code> was found in
633      * the stream.
634      *
635      * @exception IOException if an i/o error occurs.
636      */

637     public boolean skipPreamble()
638         throws IOException JavaDoc
639     {
640         // First delimiter may be not preceeded with a CRLF.
641
System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
642         boundaryLength = boundary.length - 2;
643         try
644         {
645             // Discard all data up to the delimiter.
646
discardBodyData();
647
648             // Read boundary - if succeded, the stream contains an
649
// encapsulation.
650
return readBoundary();
651         }
652         catch (MalformedStreamException e)
653         {
654             return false;
655         }
656         finally
657         {
658             // Restore delimiter.
659
System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2);
660             boundaryLength = boundary.length;
661             boundary[0] = 0x0D;
662             boundary[1] = 0x0A;
663         }
664     }
665
666
667     /**
668      * Compares <code>count</code> first bytes in the arrays
669      * <code>a</code> and <code>b</code>.
670      *
671      * @param a The first array to compare.
672      * @param b The second array to compare.
673      * @param count How many bytes should be compared.
674      *
675      * @return <code>true</code> if <code>count</code> first bytes in arrays
676      * <code>a</code> and <code>b</code> are equal.
677      */

678     public static boolean arrayequals(byte[] a,
679                                       byte[] b,
680                                       int count)
681     {
682         for (int i = 0; i < count; i++)
683         {
684             if (a[i] != b[i])
685             {
686                 return false;
687             }
688         }
689         return true;
690     }
691
692
693     /**
694      * Searches for a byte of specified value in the <code>buffer</code>,
695      * starting at the specified <code>position</code>.
696      *
697      * @param value The value to find.
698      * @param pos The starting position for searching.
699      *
700      * @return The position of byte found, counting from beginning of the
701      * <code>buffer</code>, or <code>-1</code> if not found.
702      */

703     protected int findByte(byte value,
704                            int pos)
705     {
706         for (int i = pos; i < tail; i++)
707         {
708             if (buffer[i] == value)
709             {
710                 return i;
711             }
712         }
713
714         return -1;
715     }
716
717
718     /**
719      * Searches for the <code>boundary</code> in the <code>buffer</code>
720      * region delimited by <code>head</code> and <code>tail</code>.
721      *
722      * @return The position of the boundary found, counting from the
723      * beginning of the <code>buffer</code>, or <code>-1</code> if
724      * not found.
725      */

726     protected int findSeparator()
727     {
728         int first;
729         int match = 0;
730         int maxpos = tail - boundaryLength;
731         for (first = head;
732              (first <= maxpos) && (match != boundaryLength);
733              first++)
734         {
735             first = findByte(boundary[0], first);
736             if (first == -1 || (first > maxpos))
737             {
738                 return -1;
739             }
740             for (match = 1; match < boundaryLength; match++)
741             {
742                 if (buffer[first + match] != boundary[match])
743                 {
744                     break;
745                 }
746             }
747         }
748         if (match == boundaryLength)
749         {
750             return first - 1;
751         }
752         return -1;
753     }
754
755     /**
756      * Returns a string representation of this object.
757      *
758      * @return The string representation of this object.
759      */

760     public String JavaDoc toString()
761     {
762         StringBuffer JavaDoc sbTemp = new StringBuffer JavaDoc();
763         sbTemp.append("boundary='");
764         sbTemp.append(String.valueOf(boundary));
765         sbTemp.append("'\nbufSize=");
766         sbTemp.append(bufSize);
767         return sbTemp.toString();
768     }
769
770     /**
771      * Thrown to indicate that the input stream fails to follow the
772      * required syntax.
773      */

774     public class MalformedStreamException
775         extends IOException JavaDoc
776     {
777         /**
778          * Constructs a <code>MalformedStreamException</code> with no
779          * detail message.
780          */

781         public MalformedStreamException()
782         {
783             super();
784         }
785
786         /**
787          * Constructs an <code>MalformedStreamException</code> with
788          * the specified detail message.
789          *
790          * @param message The detail message.
791          */

792         public MalformedStreamException(String JavaDoc message)
793         {
794             super(message);
795         }
796     }
797
798
799     /**
800      * Thrown upon attempt of setting an invalid boundary token.
801      */

802     public class IllegalBoundaryException
803         extends IOException JavaDoc
804     {
805         /**
806          * Constructs an <code>IllegalBoundaryException</code> with no
807          * detail message.
808          */

809         public IllegalBoundaryException()
810         {
811             super();
812         }
813
814         /**
815          * Constructs an <code>IllegalBoundaryException</code> with
816          * the specified detail message.
817          *
818          * @param message The detail message.
819          */

820         public IllegalBoundaryException(String JavaDoc message)
821         {
822             super(message);
823         }
824     }
825
826
827     // ------------------------------------------------------ Debugging methods
828

829
830     // These are the methods that were used to debug this stuff.
831
/*
832
833     // Dump data.
834     protected void dump()
835     {
836         System.out.println("01234567890");
837         byte[] temp = new byte[buffer.length];
838         for(int i=0; i<buffer.length; i++)
839         {
840             if (buffer[i] == 0x0D || buffer[i] == 0x0A)
841             {
842                 temp[i] = 0x21;
843             }
844             else
845             {
846                 temp[i] = buffer[i];
847             }
848         }
849         System.out.println(new String(temp));
850         int i;
851         for (i=0; i<head; i++)
852             System.out.print(" ");
853         System.out.println("h");
854         for (i=0; i<tail; i++)
855             System.out.print(" ");
856         System.out.println("t");
857         System.out.flush();
858     }
859
860     // Main routine, for testing purposes only.
861     //
862     // @param args A String[] with the command line arguments.
863     // @exception Exception, a generic exception.
864     public static void main( String[] args )
865         throws Exception
866     {
867         File boundaryFile = new File("boundary.dat");
868         int boundarySize = (int)boundaryFile.length();
869         byte[] boundary = new byte[boundarySize];
870         FileInputStream input = new FileInputStream(boundaryFile);
871         input.read(boundary,0,boundarySize);
872
873         input = new FileInputStream("multipart.dat");
874         MultipartStream chunks = new MultipartStream(input, boundary);
875
876         int i = 0;
877         String header;
878         OutputStream output;
879         boolean nextChunk = chunks.skipPreamble();
880         while (nextChunk)
881         {
882             header = chunks.readHeaders();
883             System.out.println("!"+header+"!");
884             System.out.println("wrote part"+i+".dat");
885             output = new FileOutputStream("part"+(i++)+".dat");
886             chunks.readBodyData(output);
887             nextChunk = chunks.readBoundary();
888         }
889     }
890
891     */

892 }
893
Popular Tags