KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > javax > mail > internet > HeaderTokenizer


1 /*
2  * The contents of this file are subject to the terms
3  * of the Common Development and Distribution License
4  * (the "License"). You may not use this file except
5  * in compliance with the License.
6  *
7  * You can obtain a copy of the license at
8  * glassfish/bootstrap/legal/CDDLv1.0.txt or
9  * https://glassfish.dev.java.net/public/CDDLv1.0.html.
10  * See the License for the specific language governing
11  * permissions and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL
14  * HEADER in each file and include the License file at
15  * glassfish/bootstrap/legal/CDDLv1.0.txt. If applicable,
16  * add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your
18  * own identifying information: Portions Copyright [yyyy]
19  * [name of copyright owner]
20  */

21
22 /*
23  * @(#)HeaderTokenizer.java 1.10 05/08/29
24  *
25  * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
26  */

27
28 package javax.mail.internet;
29
30 import java.util.*;
31
32 /**
33  * This class tokenizes RFC822 and MIME headers into the basic
34  * symbols specified by RFC822 and MIME. <p>
35  *
36  * This class handles folded headers (ie headers with embedded
37  * CRLF SPACE sequences). The folds are removed in the returned
38  * tokens.
39  *
40  * @version 1.10, 05/08/29
41  * @author John Mani
42  */

43
44 public class HeaderTokenizer {
45
46     /**
47      * The Token class represents tokens returned by the
48      * HeaderTokenizer.
49      */

50     public static class Token {
51
52     private int type;
53     private String JavaDoc value;
54
55     /**
56      * Token type indicating an ATOM.
57      */

58     public static final int ATOM = -1;
59
60     /**
61      * Token type indicating a quoted string. The value
62      * field contains the string without the quotes.
63      */

64     public static final int QUOTEDSTRING = -2;
65
66     /**
67      * Token type indicating a comment. The value field
68      * contains the comment string without the comment
69      * start and end symbols.
70      */

71     public static final int COMMENT = -3;
72
73     /**
74      * Token type indicating end of input.
75      */

76     public static final int EOF = -4;
77
78     /**
79      * Constructor.
80      * @param type Token type
81      * @param value Token value
82      */

83     public Token(int type, String JavaDoc value) {
84          this.type = type;
85          this.value = value;
86     }
87
88     /**
89      * Return the type of the token. If the token represents a
90      * delimiter or a control character, the type is that character
91      * itself, converted to an integer. Otherwise, it's value is
92      * one of the following:
93      * <ul>
94      * <li><code>ATOM</code> A sequence of ASCII characters
95      * delimited by either SPACE, CTL, "(", <"> or the
96      * specified SPECIALS
97      * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
98      * within quotes
99      * <li><code>COMMENT</code> A sequence of ASCII characters
100      * within "(" and ")".
101      * <li><code>EOF</code> End of header
102      * </ul>
103      */

104     public int getType() {
105         return type;
106     }
107
108     /**
109      * Returns the value of the token just read. When the current
110      * token is a quoted string, this field contains the body of the
111      * string, without the quotes. When the current token is a comment,
112      * this field contains the body of the comment.
113      *
114      * @return token value
115      */

116     public String JavaDoc getValue() {
117         return value;
118     }
119     }
120
121     private String JavaDoc string; // the string to be tokenized
122
private boolean skipComments; // should comments be skipped ?
123
private String JavaDoc delimiters; // delimiter string
124
private int currentPos; // current parse position
125
private int maxPos; // string length
126
private int nextPos; // track start of next Token for next()
127
private int peekPos; // track start of next Token for peek()
128

129     /**
130      * RFC822 specials
131      */

132     public final static String JavaDoc RFC822 = "()<>@,;:\\\"\t .[]";
133
134     /**
135      * MIME specials
136      */

137     public final static String JavaDoc MIME = "()<>@,;:\\\"\t []/?=";
138
139     // The EOF Token
140
private final static Token EOFToken = new Token(Token.EOF, null);
141
142     /**
143      * Constructor that takes a rfc822 style header.
144      *
145      * @param header The rfc822 header to be tokenized
146      * @param delimiters Set of delimiter characters
147      * to be used to delimit ATOMS. These
148      * are usually <code>RFC822</code> or
149      * <code>MIME</code>
150      * @param skipComments If true, comments are skipped and
151      * not returned as tokens
152      */

153     public HeaderTokenizer(String JavaDoc header, String JavaDoc delimiters,
154                    boolean skipComments) {
155     string = (header == null) ? "" : header; // paranoia ?!
156
this.skipComments = skipComments;
157     this.delimiters = delimiters;
158     currentPos = nextPos = peekPos = 0;
159     maxPos = string.length();
160     }
161
162     /**
163      * Constructor. Comments are ignored and not returned as tokens
164      *
165      * @param header The header that is tokenized
166      * @param delimiters The delimiters to be used
167      */

168     public HeaderTokenizer(String JavaDoc header, String JavaDoc delimiters) {
169     this(header, delimiters, true);
170     }
171
172     /**
173      * Constructor. The RFC822 defined delimiters - RFC822 - are
174      * used to delimit ATOMS. Also comments are skipped and not
175      * returned as tokens
176      */

177     public HeaderTokenizer(String JavaDoc header) {
178     this(header, RFC822);
179     }
180
181     /**
182      * Parses the next token from this String. <p>
183      *
184      * Clients sit in a loop calling next() to parse successive
185      * tokens until an EOF Token is returned.
186      *
187      * @return the next Token
188      * @exception ParseException if the parse fails
189      */

190     public Token next() throws ParseException JavaDoc {
191     Token tk;
192
193     currentPos = nextPos; // setup currentPos
194
tk = getNext();
195     nextPos = peekPos = currentPos; // update currentPos and peekPos
196
return tk;
197     }
198
199     /**
200      * Peek at the next token, without actually removing the token
201      * from the parse stream. Invoking this method multiple times
202      * will return successive tokens, until <code>next()</code> is
203      * called. <p>
204      *
205      * @return the next Token
206      * @exception ParseException if the parse fails
207      */

208     public Token peek() throws ParseException JavaDoc {
209     Token tk;
210
211     currentPos = peekPos; // setup currentPos
212
tk = getNext();
213     peekPos = currentPos; // update peekPos
214
return tk;
215     }
216
217     /**
218      * Return the rest of the Header.
219      *
220      * @return String rest of header. null is returned if we are
221      * already at end of header
222      */

223     public String JavaDoc getRemainder() {
224     return string.substring(nextPos);
225     }
226
227     /*
228      * Return the next token starting from 'currentPos'. After the
229      * parse, 'currentPos' is updated to point to the start of the
230      * next token.
231      */

232     private Token getNext() throws ParseException JavaDoc {
233     // If we're already at end of string, return EOF
234
if (currentPos >= maxPos)
235         return EOFToken;
236
237     // Skip white-space, position currentPos beyond the space
238
if (skipWhiteSpace() == Token.EOF)
239         return EOFToken;
240
241     char c;
242     int start;
243     boolean filter = false;
244     
245     c = string.charAt(currentPos);
246
247     // Check or Skip comments and position currentPos
248
// beyond the comment
249
while (c == '(') {
250         // Parsing comment ..
251
int nesting;
252         for (start = ++currentPos, nesting = 1;
253          nesting > 0 && currentPos < maxPos;
254          currentPos++) {
255         c = string.charAt(currentPos);
256         if (c == '\\') { // Escape sequence
257
currentPos++; // skip the escaped character
258
filter = true;
259         } else if (c == '\r')
260             filter = true;
261         else if (c == '(')
262             nesting++;
263         else if (c == ')')
264             nesting--;
265         }
266         if (nesting != 0)
267         throw new ParseException JavaDoc("Unbalanced comments");
268
269         if (!skipComments) {
270         // Return the comment, if we are asked to.
271
// Note that the comment start & end markers are ignored.
272
String JavaDoc s;
273         if (filter) // need to go thru the token again.
274
s = filterToken(string, start, currentPos-1);
275         else
276             s = string.substring(start,currentPos-1);
277
278         return new Token(Token.COMMENT, s);
279         }
280
281         // Skip any whitespace after the comment.
282
if (skipWhiteSpace() == Token.EOF)
283         return EOFToken;
284         c = string.charAt(currentPos);
285     }
286
287     // Check for quoted-string and position currentPos
288
// beyond the terminating quote
289
if (c == '"') {
290         for (start = ++currentPos; currentPos < maxPos; currentPos++) {
291         c = string.charAt(currentPos);
292         if (c == '\\') { // Escape sequence
293
currentPos++;
294             filter = true;
295         } else if (c == '\r')
296             filter = true;
297         else if (c == '"') {
298             currentPos++;
299             String JavaDoc s;
300
301             if (filter)
302             s = filterToken(string, start, currentPos-1);
303             else
304             s = string.substring(start,currentPos-1);
305
306             return new Token(Token.QUOTEDSTRING, s);
307         }
308         }
309         throw new ParseException JavaDoc("Unbalanced quoted string");
310     }
311     
312     // Check for SPECIAL or CTL
313
if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
314         currentPos++; // re-position currentPos
315
char ch[] = new char[1];
316         ch[0] = c;
317         return new Token((int)c, new String JavaDoc(ch));
318     }
319
320     // Check for ATOM
321
for (start = currentPos; currentPos < maxPos; currentPos++) {
322         c = string.charAt(currentPos);
323         // ATOM is delimited by either SPACE, CTL, "(", <">
324
// or the specified SPECIALS
325
if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
326         c == '"' || delimiters.indexOf(c) >= 0)
327         break;
328     }
329     return new Token(Token.ATOM, string.substring(start, currentPos));
330     }
331
332     // Skip SPACE, HT, CR and NL
333
private int skipWhiteSpace() {
334     char c;
335     for (; currentPos < maxPos; currentPos++)
336         if (((c = string.charAt(currentPos)) != ' ') &&
337         (c != '\t') && (c != '\r') && (c != '\n'))
338         return currentPos;
339     return Token.EOF;
340     }
341
342     /* Process escape sequences and embedded LWSPs from a comment or
343      * quoted string.
344      */

345     private static String JavaDoc filterToken(String JavaDoc s, int start, int end) {
346     StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
347     char c;
348     boolean gotEscape = false;
349     boolean gotCR = false;
350
351     for (int i = start; i < end; i++) {
352         c = s.charAt(i);
353         if (c == '\n' && gotCR) {
354         // This LF is part of an unescaped
355
// CRLF sequence (i.e, LWSP). Skip it.
356
gotCR = false;
357         continue;
358         }
359
360         gotCR = false;
361         if (!gotEscape) {
362         // Previous character was NOT '\'
363
if (c == '\\') // skip this character
364
gotEscape = true;
365         else if (c == '\r') // skip this character
366
gotCR = true;
367         else // append this character
368
sb.append(c);
369         } else {
370         // Previous character was '\'. So no need to
371
// bother with any special processing, just
372
// append this character
373
sb.append(c);
374         gotEscape = false;
375         }
376     }
377     return sb.toString();
378     }
379 }
380
Popular Tags