BodyParser


1   /* ***** BEGIN LICENSE BLOCK *****
2    * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3    *
4    * The contents of this file are subject to the Mozilla Public License Version
5    * 1.1 (the "License"); you may not use this file except in compliance with
6    * the License. You may obtain a copy of the License at
7    * http://www.mozilla.org/MPL/
8    *
9    * Software distributed under the License is distributed on an "AS IS" basis,
10   * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11   * for the specific language governing rights and limitations under the
12   * License.
13   *
14   * The Original Code is Ristretto Mail API.
15   *
16   * The Initial Developers of the Original Code are
17   * Timo Stich and Frederik Dietz.
18   * Portions created by the Initial Developers are Copyright (C) 2004
19   * All Rights Reserved.
20   *
21   * Contributor(s):
22   *
23   * Alternatively, the contents of this file may be used under the terms of
24   * either the GNU General Public License Version 2 or later (the "GPL"), or
25   * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26   * in which case the provisions of the GPL or the LGPL are applicable instead
27   * of those above. If you wish to allow use of your version of this file only
28   * under the terms of either the GPL or the LGPL, and not to allow others to
29   * use your version of this file under the terms of the MPL, indicate your
30   * decision by deleting the provisions above and replace them with the notice
31   * and other provisions required by the GPL or the LGPL. If you do not delete
32   * the provisions above, a recipient may use your version of this file under
33   * the terms of any one of the MPL, the GPL or the LGPL.
34   *
35   * ***** END LICENSE BLOCK ***** */
36  package org.columba.ristretto.parser;
37  
38  import java.io.IOException  ;
39  import java.util.Iterator  ;
40  import java.util.List  ;
41  import java.util.regex.Matcher  ;
42  import java.util.regex.Pattern  ;
43  
44  import org.columba.ristretto.io.Source;
45  import org.columba.ristretto.message.Header;
46  import org.columba.ristretto.message.LocalMimePart;
47  import org.columba.ristretto.message.MimeHeader;
48  import org.columba.ristretto.message.MimeType;
49  
50  /**
51   * Parser for the MIME structure of a RFC 2822 or RFC 2045 compliant message.
52   * 
53   * @author Timo Stich <tstich@users.sourceforge.net>
54   */
55  public class BodyParser {
56  
57      private static final Pattern   headerEndPattern = Pattern.compile("\r\n\r\n");
58  
59      private static final Pattern   lineEndPattern = Pattern
60              .compile("\r?\n|\r\n?");
61  
62      private BodyParser() {
63      }
64  
65      /**
66       * Positions the {@link Source}after the header of the message. This is
67       * necessary when invoking the BodyParser on a complete message source
68       * without use of the {@link HeaderParser}.
69       * 
70       * @param source
71       * @throws IOException
72       */
73      public static void skipHeader(Source source) throws IOException   {
74          Matcher   matcher = headerEndPattern.matcher(source);
75          if (matcher.find()) {
76              source.seek(matcher.end());
77          }
78      }
79  
80      /**
81       * Parse the MIME structure of a message from a {@link Source}. The Source
82       * is parsed from the actual position of the Source and is positioned after
83       * the MIME structure when parsing is finished.
84       * <p>
85       * <b>Note: </b> Normally the BodyParser is called <it>after </it> the
86       * {@link HeaderParser}. When using without the HeaderParser the Source
87       * must be positioned correctly!
88       * 
89       * @see #skipHeader(Source)
90       * 
91       * @param header
92       *            the header of this part which contains information about the
93       *            content type.
94       * @param message
95       *            the source of the message which is parsed. The source will be
96       *            positioned after the MIME structure when parsing has finished.
97       * @return the parsed LocalMimePart
98       * @throws IOException
99       * @throws ParserException
100      */
101     public static LocalMimePart parseMimePart(MimeHeader header, Source message)
102             throws IOException  , ParserException {
103 
104         boolean endBoundaryFound = false;
105 
106         MimeType type = header.getMimeType();
107         // If normal part just return it
108         if (!type.getType().equals("multipart")) {
109             return new LocalMimePart(header, message.fromActualPosition());
110         }
111 
112         // Search for the boundaries and call this method recursive on every
113         // found part
114         LocalMimePart multipart = new LocalMimePart(header, message
115                 .fromActualPosition());
116         String   boundary = header.getContentParameter("boundary");
117         if (boundary == null)
118             throw new ParserException(
119                     "Content-Type is multipart, but no boundary specified!");
120 
121         CharSequenceSearcher boundarySearcher = new CharSequenceSearcher("--"
122                 + boundary);
123         List   foundBoundaries = boundarySearcher.match(message);
124 
125         // filter invalid boundaries that are not followed by \r, \n or -
126         Iterator   it = foundBoundaries.iterator();
127         while (it.hasNext()) {
128             char suffix = message.charAt(((Integer  ) it.next()).intValue()
129                     + boundary.length() + 2);
130             if (suffix != '\r' && suffix != '\n' && suffix != '-') {
131                 it.remove();
132             }
133         }
134 
135         // Process the found boundaries
136         if (foundBoundaries.size() == 0) {
137             throw new ParserException("No startboundary found: " + boundary,
138                     message);
139         }
140 
141         it = foundBoundaries.iterator();
142 
143         int boundaryStart, boundaryEnd;
144 
145         int start = ((Integer  ) it.next()).intValue() + boundary.length() + 2;
146 
147         //cope with suffixed (--)\r?\n
148         switch (message.charAt(start)) {
149         case '\r': {
150             start++;
151             if (message.charAt(start) == '\n') {
152                 start++;
153             }
154             break;
155         }
156 
157         case '\n': {
158             start++;
159             break;
160         }
161         }
162 
163         // For every found start boundary
164         while (it.hasNext()) {
165             boundaryStart = ((Integer  ) it.next()).intValue();
166             boundaryEnd = boundaryStart + boundary.length() + 2; // 2 = "--"
167 
168             //cope with prefixed \r? and \n
169             switch (message.charAt(boundaryStart - 1)) {
170             case '\n': {
171                 boundaryStart--;
172                 if (message.charAt(boundaryStart) == '\n') {
173                     boundaryStart--;
174                 }
175                 break;
176             }
177 
178             case '\r': {
179                 boundaryStart--;
180             }
181             }
182 
183             //cope with suffixed (--)\r?\n
184             switch (message.charAt(boundaryEnd)) {
185             case '-': {
186                 // this is the last boundary we do not need
187                 // the exact end position
188                 boundaryEnd += 4;
189                 endBoundaryFound = true;
190                 break;
191             }
192 
193             case '\r': {
194                 boundaryEnd++;
195                 if (message.length() > boundaryEnd
196                         && message.charAt(boundaryEnd) == '\n') {
197                     boundaryEnd++;
198                 }
199                 break;
200             }
201 
202             case '\n': {
203                 boundaryEnd++;
204                 break;
205             }
206 
207             }
208 
209             //Subsource that includes the mimepart
210             Source subSource = message.subSource(start, boundaryStart);
211 
212             //Parse the Header of the subpart
213             Header subHeader = HeaderParser.parse(subSource);
214 
215             //And parse the body of subpart
216             LocalMimePart subPart = BodyParser.parseMimePart(new MimeHeader(
217                     subHeader), subSource);
218 
219             //Set the sources and add it to the mimeparttree
220             subPart.setSource(subSource);
221             multipart.addChild(subPart);
222             start = boundaryEnd;
223         }
224 
225         int end;
226         if (!endBoundaryFound) {
227             // Handle a missing end boundary with taking everything till the end
228             end = message.length();
229 
230             //Subsource that includes the mimepart
231             Source subSource = message.subSource(start, end);
232 
233             //Parse the Header of the subpart
234             Header subHeader = HeaderParser.parse(subSource);
235 
236             //And parse the body of subpart
237             LocalMimePart subPart = BodyParser.parseMimePart(new MimeHeader(
238                     subHeader), subSource);
239 
240             //Set the sources and add it to the mimeparttree
241             subPart.setSource(subSource);
242             multipart.addChild(subPart);
243         }
244 
245         return multipart;
246     }
247 
248 }
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Free Books Free Magazines
Popular Tags