KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > javax > mail > internet > MimeUtility


1 /*
2  * The contents of this file are subject to the terms
3  * of the Common Development and Distribution License
4  * (the "License"). You may not use this file except
5  * in compliance with the License.
6  *
7  * You can obtain a copy of the license at
8  * glassfish/bootstrap/legal/CDDLv1.0.txt or
9  * https://glassfish.dev.java.net/public/CDDLv1.0.html.
10  * See the License for the specific language governing
11  * permissions and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL
14  * HEADER in each file and include the License file at
15  * glassfish/bootstrap/legal/CDDLv1.0.txt. If applicable,
16  * add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your
18  * own identifying information: Portions Copyright [yyyy]
19  * [name of copyright owner]
20  */

21
22 /*
23  * @(#)MimeUtility.java 1.54 05/08/29
24  *
25  * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
26  */

27
28 package javax.mail.internet;
29
30 import javax.mail.MessagingException JavaDoc;
31 import javax.activation.*;
32 import java.util.*;
33 import java.io.*;
34 import com.sun.mail.util.*;
35
36 /**
37  * This is a utility class that provides various MIME related
38  * functionality. <p>
39  *
40  * There are a set of methods to encode and decode MIME headers as
41  * per RFC 2047. A brief description on handling such headers is
42  * given below: <p>
43  *
44  * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
45  * characters. Headers that contain non US-ASCII characters must be
46  * encoded so that they contain only US-ASCII characters. Basically,
47  * this process involves using either BASE64 or QP to encode certain
48  * characters. RFC 2047 describes this in detail. <p>
49  *
50  * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
51  * subset of Unicode (and occupies the range 0 - 127). A String
52  * that contains only ASCII characters is already mail-safe. If the
53  * String contains non US-ASCII characters, it must be encoded. An
54  * additional complexity in this step is that since Unicode is not
55  * yet a widely used charset, one might want to first charset-encode
56  * the String into another charset and then do the transfer-encoding.
57  * <p>
58  * Note that to get the actual bytes of a mail-safe String (say,
59  * for sending over SMTP), one must do
60  * <p><blockquote><pre>
61  *
62  * byte[] bytes = string.getBytes("iso-8859-1");
63  *
64  * </pre></blockquote><p>
65  *
66  * The <code>setHeader</code> and <code>addHeader</code> methods
67  * on MimeMessage and MimeBodyPart assume that the given header values
68  * are Unicode strings that contain only US-ASCII characters. Hence
69  * the callers of those methods must insure that the values they pass
70  * do not contain non US-ASCII characters. The methods in this class
71  * help do this. <p>
72  *
73  * The <code>getHeader</code> family of methods on MimeMessage and
74  * MimeBodyPart return the raw header value. These might be encoded
75  * as per RFC 2047, and if so, must be decoded into Unicode Strings.
76  * The methods in this class help to do this. <p>
77  *
78  * Several System properties control strict conformance to the MIME
79  * spec. Note that these are not session properties but must be set
80  * globally as System properties. <p>
81  *
82  * The <code>mail.mime.decodetext.strict</code> property controls
83  * decoding of MIME encoded words. The MIME spec requires that encoded
84  * words start at the beginning of a whitespace separated word. Some
85  * mailers incorrectly include encoded words in the middle of a word.
86  * If the <code>mail.mime.decodetext.strict</code> System property is
87  * set to <code>"false"</code>, an attempt will be made to decode these
88  * illegal encoded words. The default is true. <p>
89  *
90  * The <code>mail.mime.encodeeol.strict</code> property controls the
91  * choice of Content-Transfer-Encoding for MIME parts that are not of
92  * type "text". Often such parts will contain textual data for which
93  * an encoding that allows normal end of line conventions is appropriate.
94  * In rare cases, such a part will appear to contain entirely textual
95  * data, but will require an encoding that preserves CR and LF characters
96  * without change. If the <code>mail.mime.encodeeol.strict</code>
97  * System property is set to <code>"true"</code>, such an encoding will
98  * be used when necessary. The default is false. <p>
99  *
100  * In addition, the <code>mail.mime.charset</code> System property can
101  * be used to specify the default MIME charset to use for encoded words
102  * and text parts that don't otherwise specify a charset. Normally, the
103  * default MIME charset is derived from the default Java charset, as
104  * specified in the <code>file.encoding</code> System property. Most
105  * applications will have no need to explicitly set the default MIME
106  * charset. In cases where the default MIME charset to be used for
107  * mail messages is different than the charset used for files stored on
108  * the system, this property should be set.
109  *
110  * @version 1.54, 05/08/29
111  * @author John Mani
112  * @author Bill Shannon
113  */

114
115 public class MimeUtility {
116
117     // This class cannot be instantiated
118
private MimeUtility() { }
119
120     public static final int ALL = -1;
121
122     private static boolean decodeStrict = true;
123     private static boolean encodeEolStrict = false;
124     private static boolean foldEncodedWords = false;
125     private static boolean foldText = true;
126
127     static {
128     try {
129         String JavaDoc s = System.getProperty("mail.mime.decodetext.strict");
130         // default to true
131
decodeStrict = s == null || !s.equalsIgnoreCase("false");
132         s = System.getProperty("mail.mime.encodeeol.strict");
133         // default to false
134
encodeEolStrict = s != null && s.equalsIgnoreCase("true");
135         s = System.getProperty("mail.mime.foldencodedwords");
136         // default to false
137
foldEncodedWords = s != null && s.equalsIgnoreCase("true");
138         s = System.getProperty("mail.mime.foldtext");
139         // default to true
140
foldText = s == null || !s.equalsIgnoreCase("false");
141     } catch (SecurityException JavaDoc sex) {
142         // ignore it
143
}
144     }
145         
146
147     /**
148      * Get the content-transfer-encoding that should be applied
149      * to the input stream of this datasource, to make it mailsafe. <p>
150      *
151      * The algorithm used here is: <br>
152      * <ul>
153      * <li>
154      * If the primary type of this datasource is "text" and if all
155      * the bytes in its input stream are US-ASCII, then the encoding
156      * is "7bit". If more than half of the bytes are non-US-ASCII, then
157      * the encoding is "base64". If less than half of the bytes are
158      * non-US-ASCII, then the encoding is "quoted-printable".
159      * <li>
160      * If the primary type of this datasource is not "text", then if
161      * all the bytes of its input stream are US-ASCII, the encoding
162      * is "7bit". If there is even one non-US-ASCII character, the
163      * encoding is "base64".
164      * </ul>
165      *
166      * @param ds DataSource
167      * @return the encoding. This is either "7bit",
168      * "quoted-printable" or "base64"
169      */

170     public static String JavaDoc getEncoding(DataSource ds) {
171     ContentType JavaDoc cType = null;
172     InputStream is = null;
173     String JavaDoc encoding = null;
174
175     try {
176         cType = new ContentType JavaDoc(ds.getContentType());
177         is = ds.getInputStream();
178     } catch (Exception JavaDoc ex) {
179         return "base64"; // what else ?!
180
}
181
182     boolean isText = cType.match("text/*");
183     // if not text, stop processing when we see non-ASCII
184
int i = checkAscii(is, ALL, !isText);
185     switch (i) {
186     case ALL_ASCII:
187         encoding = "7bit"; // all ascii
188
break;
189     case MOSTLY_ASCII:
190         encoding = "quoted-printable"; // mostly ascii
191
break;
192     default:
193         encoding = "base64"; // mostly binary
194
break;
195     }
196
197     // Close the input stream
198
try {
199         is.close();
200     } catch (IOException ioex) { }
201
202     return encoding;
203     }
204
205     /**
206      * Same as <code>getEncoding(DataSource)</code> except that instead
207      * of reading the data from an <code>InputStream</code> it uses the
208      * <code>writeTo</code> method to examine the data. This is more
209      * efficient in the common case of a <code>DataHandler</code>
210      * created with an object and a MIME type (for example, a
211      * "text/plain" String) because all the I/O is done in this
212      * thread. In the case requiring an <code>InputStream</code> the
213      * <code>DataHandler</code> uses a thread, a pair of pipe streams,
214      * and the <code>writeTo</code> method to produce the data. <p>
215      *
216      * @since JavaMail 1.2
217      */

218     public static String JavaDoc getEncoding(DataHandler dh) {
219     ContentType JavaDoc cType = null;
220     String JavaDoc encoding = null;
221
222     /*
223      * Try to pick the most efficient means of determining the
224      * encoding. If this DataHandler was created using a DataSource,
225      * the getEncoding(DataSource) method is typically faster. If
226      * the DataHandler was created with an object, this method is
227      * much faster. To distinguish the two cases, we use a heuristic.
228      * A DataHandler created with an object will always have a null name.
229      * A DataHandler created with a DataSource will usually have a
230      * non-null name.
231      *
232      * XXX - This is actually quite a disgusting hack, but it makes
233      * a common case run over twice as fast.
234      */

235     if (dh.getName() != null)
236         return getEncoding(dh.getDataSource());
237
238     try {
239         cType = new ContentType JavaDoc(dh.getContentType());
240     } catch (Exception JavaDoc ex) {
241         return "base64"; // what else ?!
242
}
243
244     if (cType.match("text/*")) {
245         // Check all of the available bytes
246
AsciiOutputStream aos = new AsciiOutputStream(false, false);
247         try {
248         dh.writeTo(aos);
249         } catch (IOException ex) { } // ignore it
250
switch (aos.getAscii()) {
251         case ALL_ASCII:
252         encoding = "7bit"; // all ascii
253
break;
254         case MOSTLY_ASCII:
255         encoding = "quoted-printable"; // mostly ascii
256
break;
257         default:
258         encoding = "base64"; // mostly binary
259
break;
260         }
261     } else { // not "text"
262
// Check all of available bytes, break out if we find
263
// at least one non-US-ASCII character
264
AsciiOutputStream aos =
265             new AsciiOutputStream(true, encodeEolStrict);
266         try {
267         dh.writeTo(aos);
268         } catch (IOException ex) { } // ignore it
269
if (aos.getAscii() == ALL_ASCII) // all ascii
270
encoding = "7bit";
271         else // found atleast one non-ascii character, use b64
272
encoding = "base64";
273     }
274
275     return encoding;
276     }
277
278     /**
279      * Decode the given input stream. The Input stream returned is
280      * the decoded input stream. All the encodings defined in RFC 2045
281      * are supported here. They include "base64", "quoted-printable",
282      * "7bit", "8bit", and "binary". In addition, "uuencode" is also
283      * supported.
284      *
285      * @param is input stream
286      * @param encoding the encoding of the stream.
287      * @return decoded input stream.
288      */

289     public static InputStream decode(InputStream is, String JavaDoc encoding)
290         throws MessagingException JavaDoc {
291     if (encoding.equalsIgnoreCase("base64"))
292         return new BASE64DecoderStream(is);
293     else if (encoding.equalsIgnoreCase("quoted-printable"))
294         return new QPDecoderStream(is);
295     else if (encoding.equalsIgnoreCase("uuencode") ||
296          encoding.equalsIgnoreCase("x-uuencode") ||
297          encoding.equalsIgnoreCase("x-uue"))
298         return new UUDecoderStream(is);
299     else if (encoding.equalsIgnoreCase("binary") ||
300          encoding.equalsIgnoreCase("7bit") ||
301          encoding.equalsIgnoreCase("8bit"))
302         return is;
303     else
304         throw new MessagingException JavaDoc("Unknown encoding: " + encoding);
305     }
306
307     /**
308      * Wrap an encoder around the given output stream.
309      * All the encodings defined in RFC 2045 are supported here.
310      * They include "base64", "quoted-printable", "7bit", "8bit" and
311      * "binary". In addition, "uuencode" is also supported.
312      *
313      * @param os output stream
314      * @param encoding the encoding of the stream.
315      * @return output stream that applies the
316      * specified encoding.
317      */

318     public static OutputStream encode(OutputStream os, String JavaDoc encoding)
319         throws MessagingException JavaDoc {
320         if (encoding == null)
321         return os;
322     else if (encoding.equalsIgnoreCase("base64"))
323         return new BASE64EncoderStream(os);
324     else if (encoding.equalsIgnoreCase("quoted-printable"))
325         return new QPEncoderStream(os);
326     else if (encoding.equalsIgnoreCase("uuencode") ||
327          encoding.equalsIgnoreCase("x-uuencode") ||
328          encoding.equalsIgnoreCase("x-uue"))
329         return new UUEncoderStream(os);
330     else if (encoding.equalsIgnoreCase("binary") ||
331          encoding.equalsIgnoreCase("7bit") ||
332          encoding.equalsIgnoreCase("8bit"))
333         return os;
334     else
335         throw new MessagingException JavaDoc("Unknown encoding: " +encoding);
336     }
337
338     /**
339      * Wrap an encoder around the given output stream.
340      * All the encodings defined in RFC 2045 are supported here.
341      * They include "base64", "quoted-printable", "7bit", "8bit" and
342      * "binary". In addition, "uuencode" is also supported.
343      * The <code>filename</code> parameter is used with the "uuencode"
344      * encoding and is included in the encoded output.
345      *
346      * @param os output stream
347      * @param encoding the encoding of the stream.
348      * @param filename name for the file being encoded (only used
349      * with uuencode)
350      * @return output stream that applies the
351      * specified encoding.
352      * @since JavaMail 1.2
353      */

354     public static OutputStream encode(OutputStream os, String JavaDoc encoding,
355                                       String JavaDoc filename)
356                 throws MessagingException JavaDoc {
357         if (encoding == null)
358             return os;
359         else if (encoding.equalsIgnoreCase("base64"))
360             return new BASE64EncoderStream(os);
361         else if (encoding.equalsIgnoreCase("quoted-printable"))
362             return new QPEncoderStream(os);
363         else if (encoding.equalsIgnoreCase("uuencode") ||
364                  encoding.equalsIgnoreCase("x-uuencode") ||
365                  encoding.equalsIgnoreCase("x-uue"))
366             return new UUEncoderStream(os, filename);
367         else if (encoding.equalsIgnoreCase("binary") ||
368                  encoding.equalsIgnoreCase("7bit") ||
369                  encoding.equalsIgnoreCase("8bit"))
370             return os;
371         else
372             throw new MessagingException JavaDoc("Unknown encoding: " +encoding);
373     }
374
375     /**
376      * Encode a RFC 822 "text" token into mail-safe form as per
377      * RFC 2047. <p>
378      *
379      * The given Unicode string is examined for non US-ASCII
380      * characters. If the string contains only US-ASCII characters,
381      * it is returned as-is. If the string contains non US-ASCII
382      * characters, it is first character-encoded using the platform's
383      * default charset, then transfer-encoded using either the B or
384      * Q encoding. The resulting bytes are then returned as a Unicode
385      * string containing only ASCII characters. <p>
386      *
387      * Note that this method should be used to encode only
388      * "unstructured" RFC 822 headers. <p>
389      *
390      * Example of usage:
391      * <p><blockquote><pre>
392      *
393      * MimePart part = ...
394      * String rawvalue = "FooBar Mailer, Japanese version 1.1"
395      * try {
396      * // If we know for sure that rawvalue contains only US-ASCII
397      * // characters, we can skip the encoding part
398      * part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
399      * } catch (UnsupportedEncodingException e) {
400      * // encoding failure
401      * } catch (MessagingException me) {
402      * // setHeader() failure
403      * }
404      *
405      * </pre></blockquote><p>
406      *
407      * @param text Unicode string
408      * @return Unicode string containing only US-ASCII characters
409      * @exception UnsupportedEncodingException if the encoding fails
410      */

411     public static String JavaDoc encodeText(String JavaDoc text)
412             throws UnsupportedEncodingException {
413     return encodeText(text, null, null);
414     }
415
416     /**
417      * Encode a RFC 822 "text" token into mail-safe form as per
418      * RFC 2047. <p>
419      *
420      * The given Unicode string is examined for non US-ASCII
421      * characters. If the string contains only US-ASCII characters,
422      * it is returned as-is. If the string contains non US-ASCII
423      * characters, it is first character-encoded using the specified
424      * charset, then transfer-encoded using either the B or Q encoding.
425      * The resulting bytes are then returned as a Unicode string
426      * containing only ASCII characters. <p>
427      *
428      * Note that this method should be used to encode only
429      * "unstructured" RFC 822 headers.
430      *
431      * @param text the header value
432      * @param charset the charset. If this parameter is null, the
433      * platform's default chatset is used.
434      * @param encoding the encoding to be used. Currently supported
435      * values are "B" and "Q". If this parameter is null, then
436      * the "Q" encoding is used if most of characters to be
437      * encoded are in the ASCII charset, otherwise "B" encoding
438      * is used.
439      * @return Unicode string containing only US-ASCII characters
440      */

441     public static String JavaDoc encodeText(String JavaDoc text, String JavaDoc charset,
442                     String JavaDoc encoding)
443             throws UnsupportedEncodingException {
444     return encodeWord(text, charset, encoding, false);
445     }
446
447     /**
448      * Decode "unstructured" headers, that is, headers that are defined
449      * as '*text' as per RFC 822. <p>
450      *
451      * The string is decoded using the algorithm specified in
452      * RFC 2047, Section 6.1.1. If the charset-conversion fails
453      * for any sequence, an UnsupportedEncodingException is thrown.
454      * If the String is not an RFC 2047 style encoded header, it is
455      * returned as-is <p>
456      *
457      * Example of usage:
458      * <p><blockquote><pre>
459      *
460      * MimePart part = ...
461      * String rawvalue = null;
462      * String value = null;
463      * try {
464      * if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
465      * value = MimeUtility.decodeText(rawvalue);
466      * } catch (UnsupportedEncodingException e) {
467      * // Don't care
468      * value = rawvalue;
469      * } catch (MessagingException me) { }
470      *
471      * return value;
472      *
473      * </pre></blockquote><p>
474      *
475      * @param etext the possibly encoded value
476      * @exception UnsupportedEncodingException if the charset
477      * conversion failed.
478      */

479     public static String JavaDoc decodeText(String JavaDoc etext)
480         throws UnsupportedEncodingException {
481     /*
482      * We look for sequences separated by "linear-white-space".
483      * (as per RFC 2047, Section 6.1.1)
484      * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
485      */

486     String JavaDoc lwsp = " \t\n\r";
487     StringTokenizer st;
488
489     /*
490      * First, lets do a quick run thru the string and check
491      * whether the sequence "=?" exists at all. If none exists,
492      * we know there are no encoded-words in here and we can just
493      * return the string as-is, without suffering thru the later
494      * decoding logic.
495      * This handles the most common case of unencoded headers
496      * efficiently.
497      */

498     if (etext.indexOf("=?") == -1)
499         return etext;
500
501     // Encoded words found. Start decoding ...
502

503     st = new StringTokenizer(etext, lwsp, true);
504     StringBuffer JavaDoc sb = new StringBuffer JavaDoc(); // decode buffer
505
StringBuffer JavaDoc wsb = new StringBuffer JavaDoc(); // white space buffer
506
boolean prevWasEncoded = false;
507
508     while (st.hasMoreTokens()) {
509         char c;
510         String JavaDoc s = st.nextToken();
511         // If whitespace, append it to the whitespace buffer
512
if (((c = s.charAt(0)) == ' ') || (c == '\t') ||
513         (c == '\r') || (c == '\n'))
514         wsb.append(c);
515         else {
516         // Check if token is an 'encoded-word' ..
517
String JavaDoc word;
518         try {
519             word = decodeWord(s);
520             // Yes, this IS an 'encoded-word'.
521
if (!prevWasEncoded && wsb.length() > 0) {
522             // if the previous word was also encoded, we
523
// should ignore the collected whitespace. Else
524
// we include the whitespace as well.
525
sb.append(wsb);
526             }
527             prevWasEncoded = true;
528         } catch (ParseException JavaDoc pex) {
529             // This is NOT an 'encoded-word'.
530
word = s;
531             // possibly decode inner encoded words
532
if (!decodeStrict)
533             word = decodeInnerWords(word);
534             // include colleced whitespace ..
535
if (wsb.length() > 0)
536             sb.append(wsb);
537             prevWasEncoded = false;
538         }
539         sb.append(word); // append the actual word
540
wsb.setLength(0); // reset wsb for reuse
541
}
542     }
543     return sb.toString();
544     }
545
546     /**
547      * Encode a RFC 822 "word" token into mail-safe form as per
548      * RFC 2047. <p>
549      *
550      * The given Unicode string is examined for non US-ASCII
551      * characters. If the string contains only US-ASCII characters,
552      * it is returned as-is. If the string contains non US-ASCII
553      * characters, it is first character-encoded using the platform's
554      * default charset, then transfer-encoded using either the B or
555      * Q encoding. The resulting bytes are then returned as a Unicode
556      * string containing only ASCII characters. <p>
557      *
558      * This method is meant to be used when creating RFC 822 "phrases".
559      * The InternetAddress class, for example, uses this to encode
560      * it's 'phrase' component.
561      *
562      * @param word Unicode string
563      * @return Array of Unicode strings containing only US-ASCII
564      * characters.
565      * @exception UnsupportedEncodingException if the encoding fails
566      */

567     public static String JavaDoc encodeWord(String JavaDoc word)
568             throws UnsupportedEncodingException {
569     return encodeWord(word, null, null);
570     }
571
572     /**
573      * Encode a RFC 822 "word" token into mail-safe form as per
574      * RFC 2047. <p>
575      *
576      * The given Unicode string is examined for non US-ASCII
577      * characters. If the string contains only US-ASCII characters,
578      * it is returned as-is. If the string contains non US-ASCII
579      * characters, it is first character-encoded using the specified
580      * charset, then transfer-encoded using either the B or Q encoding.
581      * The resulting bytes are then returned as a Unicode string
582      * containing only ASCII characters. <p>
583      *
584      * @param word Unicode string
585      * @param charset the MIME charset
586      * @param encoding the encoding to be used. Currently supported
587      * values are "B" and "Q". If this parameter is null, then
588      * the "Q" encoding is used if most of characters to be
589      * encoded are in the ASCII charset, otherwise "B" encoding
590      * is used.
591      * @return Unicode string containing only US-ASCII characters
592      * @exception UnsupportedEncodingException if the encoding fails
593      */

594     public static String JavaDoc encodeWord(String JavaDoc word, String JavaDoc charset,
595                     String JavaDoc encoding)
596                 throws UnsupportedEncodingException {
597     return encodeWord(word, charset, encoding, true);
598     }
599
600     /*
601      * Encode the given string. The parameter 'encodingWord' should
602      * be true if a RFC 822 "word" token is being encoded and false if a
603      * RFC 822 "text" token is being encoded. This is because the
604      * "Q" encoding defined in RFC 2047 has more restrictions when
605      * encoding "word" tokens. (Sigh)
606      */

607     private static String JavaDoc encodeWord(String JavaDoc string, String JavaDoc charset,
608                      String JavaDoc encoding, boolean encodingWord)
609             throws UnsupportedEncodingException {
610
611     // If 'string' contains only US-ASCII characters, just
612
// return it.
613
int ascii = checkAscii(string);
614     if (ascii == ALL_ASCII)
615         return string;
616
617     // Else, apply the specified charset conversion.
618
String JavaDoc jcharset;
619     if (charset == null) { // use default charset
620
jcharset = getDefaultJavaCharset(); // the java charset
621
charset = getDefaultMIMECharset(); // the MIME equivalent
622
} else // MIME charset -> java charset
623
jcharset = javaCharset(charset);
624
625     // If no transfer-encoding is specified, figure one out.
626
if (encoding == null) {
627         if (ascii != MOSTLY_NONASCII)
628         encoding = "Q";
629         else
630         encoding = "B";
631     }
632
633     boolean b64;
634     if (encoding.equalsIgnoreCase("B"))
635         b64 = true;
636     else if (encoding.equalsIgnoreCase("Q"))
637         b64 = false;
638     else
639         throw new UnsupportedEncodingException(
640             "Unknown transfer encoding: " + encoding);
641
642     StringBuffer JavaDoc outb = new StringBuffer JavaDoc(); // the output buffer
643
doEncode(string, b64, jcharset,
644          // As per RFC 2047, size of an encoded string should not
645
// exceed 75 bytes.
646
// 7 = size of "=?", '?', 'B'/'Q', '?', "?="
647
75 - 7 - charset.length(), // the available space
648
"=?" + charset + "?" + encoding + "?", // prefix
649
true, encodingWord, outb);
650
651     return outb.toString();
652     }
653
654     private static void doEncode(String JavaDoc string, boolean b64,
655         String JavaDoc jcharset, int avail, String JavaDoc prefix,
656         boolean first, boolean encodingWord, StringBuffer JavaDoc buf)
657             throws UnsupportedEncodingException {
658
659     // First find out what the length of the encoded version of
660
// 'string' would be.
661
byte[] bytes = string.getBytes(jcharset);
662     int len;
663     if (b64) // "B" encoding
664
len = BEncoderStream.encodedLength(bytes);
665     else // "Q"
666
len = QEncoderStream.encodedLength(bytes, encodingWord);
667     
668     int size;
669     if ((len > avail) && ((size = string.length()) > 1)) {
670         // If the length is greater than 'avail', split 'string'
671
// into two and recurse.
672
doEncode(string.substring(0, size/2), b64, jcharset,
673              avail, prefix, first, encodingWord, buf);
674         doEncode(string.substring(size/2, size), b64, jcharset,
675              avail, prefix, false, encodingWord, buf);
676     } else {
677         // length <= than 'avail'. Encode the given string
678
ByteArrayOutputStream os = new ByteArrayOutputStream();
679         OutputStream eos; // the encoder
680
if (b64) // "B" encoding
681
eos = new BEncoderStream(os);
682         else // "Q" encoding
683
eos = new QEncoderStream(os, encodingWord);
684         
685         try { // do the encoding
686
eos.write(bytes);
687         eos.close();
688         } catch (IOException ioex) { }
689
690         byte[] encodedBytes = os.toByteArray(); // the encoded stuff
691
// Now write out the encoded (all ASCII) bytes into our
692
// StringBuffer
693
if (!first) // not the first line of this sequence
694
if (foldEncodedWords)
695             buf.append("\r\n "); // start a continuation line
696
else
697             buf.append(" "); // line will be folded later
698

699         buf.append(prefix);
700         for (int i = 0; i < encodedBytes.length; i++)
701         buf.append((char)encodedBytes[i]);
702         buf.append("?="); // terminate the current sequence
703
}
704     }
705
706     /**
707      * The string is parsed using the rules in RFC 2047 for parsing
708      * an "encoded-word". If the parse fails, a ParseException is
709      * thrown. Otherwise, it is transfer-decoded, and then
710      * charset-converted into Unicode. If the charset-conversion
711      * fails, an UnsupportedEncodingException is thrown.<p>
712      *
713      * @param eword the possibly encoded value
714      * @exception ParseException if the string is not an
715      * encoded-word as per RFC 2047.
716      * @exception UnsupportedEncodingException if the charset
717      * conversion failed.
718      */

719     public static String JavaDoc decodeWord(String JavaDoc eword)
720         throws ParseException JavaDoc, UnsupportedEncodingException {
721
722     if (!eword.startsWith("=?")) // not an encoded word
723
throw new ParseException JavaDoc();
724     
725     // get charset
726
int start = 2; int pos;
727     if ((pos = eword.indexOf('?', start)) == -1)
728         throw new ParseException JavaDoc();
729     String JavaDoc charset = javaCharset(eword.substring(start, pos));
730
731     // get encoding
732
start = pos+1;
733     if ((pos = eword.indexOf('?', start)) == -1)
734         throw new ParseException JavaDoc();
735     String JavaDoc encoding = eword.substring(start, pos);
736
737     // get encoded-sequence
738
start = pos+1;
739     if ((pos = eword.indexOf("?=", start)) == -1)
740         throw new ParseException JavaDoc();
741     String JavaDoc word = eword.substring(start, pos);
742
743     try {
744         String JavaDoc decodedWord;
745         if (word.length() > 0) {
746         // Extract the bytes from word
747
ByteArrayInputStream bis =
748             new ByteArrayInputStream(ASCIIUtility.getBytes(word));
749
750         // Get the appropriate decoder
751
InputStream is;
752         if (encoding.equalsIgnoreCase("B"))
753             is = new BASE64DecoderStream(bis);
754         else if (encoding.equalsIgnoreCase("Q"))
755             is = new QDecoderStream(bis);
756         else
757             throw new UnsupportedEncodingException(
758                     "unknown encoding: " + encoding);
759
760         // For b64 & q, size of decoded word <= size of word. So
761
// the decoded bytes must fit into the 'bytes' array. This
762
// is certainly more efficient than writing bytes into a
763
// ByteArrayOutputStream and then pulling out the byte[]
764
// from it.
765
int count = bis.available();
766         byte[] bytes = new byte[count];
767         // count is set to the actual number of decoded bytes
768
count = is.read(bytes, 0, count);
769
770         // Finally, convert the decoded bytes into a String using
771
// the specified charset
772
decodedWord = count <= 0 ? "" :
773                 new String JavaDoc(bytes, 0, count, charset);
774         } else {
775         // no characters to decode, return empty string
776
decodedWord = "";
777         }
778         if (pos + 2 < eword.length()) {
779         // there's still more text in the string
780
String JavaDoc rest = eword.substring(pos + 2);
781         if (!decodeStrict)
782             rest = decodeInnerWords(rest);
783         decodedWord += rest;
784         }
785         return decodedWord;
786     } catch (UnsupportedEncodingException uex) {
787         // explicitly catch and rethrow this exception, otherwise
788
// the below IOException catch will swallow this up!
789
throw uex;
790     } catch (IOException ioex) {
791         // Shouldn't happen.
792
throw new ParseException JavaDoc();
793     } catch (IllegalArgumentException JavaDoc iex) {
794         /* An unknown charset of the form ISO-XXX-XXX, will cause
795          * the JDK to throw an IllegalArgumentException ... Since the
796          * JDK will attempt to create a classname using this string,
797          * but valid classnames must not contain the character '-',
798          * and this results in an IllegalArgumentException, rather than
799          * the expected UnsupportedEncodingException. Yikes
800          */

801         throw new UnsupportedEncodingException();
802     }
803     }
804
805     /**
806      * Look for encoded words within a word. The MIME spec doesn't
807      * allow this, but many broken mailers, especially Japanese mailers,
808      * produce such incorrect encodings.
809      */

810     private static String JavaDoc decodeInnerWords(String JavaDoc word)
811                 throws UnsupportedEncodingException {
812     int start = 0, i;
813     StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
814     while ((i = word.indexOf("=?", start)) >= 0) {
815         buf.append(word.substring(start, i));
816         int end = word.indexOf("?=", i);
817         if (end < 0)
818         break;
819         String JavaDoc s = word.substring(i, end + 2);
820         try {
821         s = decodeWord(s);
822         } catch (ParseException JavaDoc pex) {
823         // ignore it, just use the original string
824
}
825         buf.append(s);
826         start = end + 2;
827     }
828     if (start == 0)
829         return word;
830     if (start < word.length())
831         buf.append(word.substring(start));
832     return buf.toString();
833     }
834
835     /**
836      * A utility method to quote a word, if the word contains any
837      * characters from the specified 'specials' list.<p>
838      *
839      * The <code>HeaderTokenizer</code> class defines two special
840      * sets of delimiters - MIME and RFC 822. <p>
841      *
842      * This method is typically used during the generation of
843      * RFC 822 and MIME header fields.
844      *
845      * @param word word to be quoted
846      * @param specials the set of special characters
847      * @return the possibly quoted word
848      * @see javax.mail.internet.HeaderTokenizer#MIME
849      * @see javax.mail.internet.HeaderTokenizer#RFC822
850      */

851     public static String JavaDoc quote(String JavaDoc word, String JavaDoc specials) {
852     int len = word.length();
853
854     /*
855      * Look for any "bad" characters, Escape and
856      * quote the entire string if necessary.
857      */

858     boolean needQuoting = false;
859     for (int i = 0; i < len; i++) {
860         char c = word.charAt(i);
861         if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
862         // need to escape them and then quote the whole string
863
StringBuffer JavaDoc sb = new StringBuffer JavaDoc(len + 3);
864         sb.append('"');
865         sb.append(word.substring(0, i));
866         int lastc = 0;
867         for (int j = i; j < len; j++) {
868             char cc = word.charAt(j);
869             if ((cc == '"') || (cc == '\\') ||
870             (cc == '\r') || (cc == '\n'))
871             if (cc == '\n' && lastc == '\r')
872                 ; // do nothing, CR was already escaped
873
else
874                 sb.append('\\'); // Escape the character
875
sb.append(cc);
876             lastc = cc;
877         }
878         sb.append('"');
879         return sb.toString();
880         } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
881         // These characters cause the string to be quoted
882
needQuoting = true;
883     }
884
885     if (needQuoting) {
886         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(len + 2);
887         sb.append('"').append(word).append('"');
888         return sb.toString();
889     } else
890         return word;
891     }
892
893     /**
894      * Fold a string at linear whitespace so that each line is no longer
895      * than 76 characters, if possible. If there are more than 76
896      * non-whitespace characters consecutively, the string is folded at
897      * the first whitespace after that sequence. The parameter
898      * <code>used</code> indicates how many characters have been used in
899      * the current line; it is usually the length of the header name. <p>
900      *
901      * Note that line breaks in the string aren't escaped; they probably
902      * should be.
903      *
904      * @param used characters used in line so far
905      * @param s the string to fold
906      * @return the folded string
907 &n