KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > columba > ristretto > coder > EncodedWord


1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Mozilla Public License Version
5  * 1.1 (the "License"); you may not use this file except in compliance with
6  * the License. You may obtain a copy of the License at
7  * http://www.mozilla.org/MPL/
8  *
9  * Software distributed under the License is distributed on an "AS IS" basis,
10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11  * for the specific language governing rights and limitations under the
12  * License.
13  *
14  * The Original Code is Ristretto Mail API.
15  *
16  * The Initial Developers of the Original Code are
17  * Timo Stich and Frederik Dietz.
18  * Portions created by the Initial Developers are Copyright (C) 2004
19  * All Rights Reserved.
20  *
21  * Contributor(s):
22  *
23  * Alternatively, the contents of this file may be used under the terms of
24  * either the GNU General Public License Version 2 or later (the "GPL"), or
25  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26  * in which case the provisions of the GPL or the LGPL are applicable instead
27  * of those above. If you wish to allow use of your version of this file only
28  * under the terms of either the GPL or the LGPL, and not to allow others to
29  * use your version of this file under the terms of the MPL, indicate your
30  * decision by deleting the provisions above and replace them with the notice
31  * and other provisions required by the GPL or the LGPL. If you do not delete
32  * the provisions above, a recipient may use your version of this file under
33  * the terms of any one of the MPL, the GPL or the LGPL.
34  *
35  * ***** END LICENSE BLOCK ***** */

36 package org.columba.ristretto.coder;
37
38 import java.nio.CharBuffer JavaDoc;
39 import java.nio.charset.Charset JavaDoc;
40 import java.nio.charset.UnsupportedCharsetException JavaDoc;
41 import java.util.Iterator JavaDoc;
42 import java.util.LinkedList JavaDoc;
43 import java.util.logging.Logger JavaDoc;
44 import java.util.regex.Matcher JavaDoc;
45 import java.util.regex.Pattern JavaDoc;
46
47 /**
48  * Implementation of EncodedWord en- and decoding methods.
49  * <br>
50  * <b>RFC(s):</b> 2047
51  *
52  * @author Timo Stich <tstich@users.sourceforge.net>
53  */

54 public class EncodedWord {
55
56     /** JDK 1.4+ logging framework logger, used for logging. */
57     private static final Logger JavaDoc LOG = Logger.getLogger("org.columba.ristretto.coder");
58
59
60     /**
61      * QuotedPritntable Encoding. Default.
62      */

63     public static final int QUOTED_PRINTABLE = 0;
64
65     /**
66      *Base64 Encoding. Should be used to encode 16bit charsets
67      */

68     public static final int BASE64 = 1;
69
70
71     // finds a encoded word wich if of the form
72
// =?charset?encoding(b/g)?encoded text part?=
73
private static final Pattern JavaDoc encodedWordPattern =
74         Pattern.compile("=\\?([^?]+)\\?([bBqQ])\\?([^?]+)\\?=");
75
76     
77     // filters whitespaces
78
private static final Pattern JavaDoc spacePattern = Pattern.compile("\\s*");
79
80     // tokenizes a string into words
81
private static final Pattern JavaDoc wordTokenizerPattern =
82         Pattern.compile("\\b([^\\s]+[\\s]*)");
83
84     private static final Pattern JavaDoc whitespacePattern = Pattern.compile(" ");
85
86     /**
87      * Decodes a string that contains EncodedWords.
88      *
89      * @param input a string containing EncodedWords
90      * @return the decoded string
91      */

92     public static StringBuffer JavaDoc decode(CharSequence JavaDoc input) {
93         StringBuffer JavaDoc result = new StringBuffer JavaDoc(input.length());
94         int lastMatchEnd = 0;
95         Matcher JavaDoc matcher = encodedWordPattern.matcher(input);
96         Charset JavaDoc charset;
97         char type;
98         String JavaDoc encodedPart;
99
100         while (matcher.find()) {
101             CharSequence JavaDoc inbetween =
102                 input.subSequence(lastMatchEnd, matcher.start());
103             if (!spacePattern.matcher(inbetween).matches()) {
104                 result.append(inbetween);
105             }
106
107
108             try {
109                 charset = Charset.forName(matcher.group(1));
110             } catch ( UnsupportedCharsetException JavaDoc e ) {
111                 charset = Charset.forName(System.getProperty("file.encoding"));
112             }
113             type = matcher.group(2).toLowerCase().charAt(0);
114             encodedPart = matcher.group(3);
115
116             if (type == 'q') {
117                 encodedPart = encodedPart.replace('_', ' ');
118                 // _ are WS and must be converted before normal decoding
119
result.append(QuotedPrintable.decode(encodedPart, charset));
120             } else {
121                 result.append(charset.decode(Base64.decode(encodedPart)));
122             }
123
124             lastMatchEnd = matcher.end();
125         }
126
127         result.append(input.subSequence(lastMatchEnd, input.length()));
128
129         return result;
130     }
131
132     /**
133      * Takes a text in form of a CharSequence encoded in the given charset (e.g. ISO-8859-1)
134      * and makes it US-ASCII compatible and RFC822 compatible for the use as e.g. subject
135      * with special characters.
136      * <br>
137      * This algorithm tries to achieve several goals when decoding:
138      * <li>never encode a single character but try to encode whole words</li>
139      * <li>if two words must be encoded and there a no more than 10 characters
140      * inbetween, encode everything in one single encoded word</li>
141      * <li>an encoded word must never be longer than 76 characters in total</li>
142      * <li>ensure that no encodedWord is in a line-wrap (RFC822 advices to no have more than 78
143      * characters in a headerline)</li>
144      *
145      * @param input the headerline
146      * @param charset the used charset (e.g. ISO-8859-1)
147      * @param type the encoding to be used
148      * @return input encoded in EncodedWords
149      */

150     public static StringBuffer JavaDoc encode(
151         CharSequence JavaDoc input,
152         Charset JavaDoc charset,
153         int type) {
154         StringBuffer JavaDoc result = new StringBuffer JavaDoc(input.length());
155         LinkedList JavaDoc words = new LinkedList JavaDoc();
156         String JavaDoc encodedWordPrototype;
157         if (type == QUOTED_PRINTABLE) {
158             encodedWordPrototype = "=?" + charset.displayName() + "?q?";
159         } else {
160             encodedWordPrototype = "=?" + charset.displayName() + "?b?";
161         }
162         int maxLength = 75 - (encodedWordPrototype.length() + 2);
163
164         // First find words which need to be encoded
165
Matcher JavaDoc matcher = wordTokenizerPattern.matcher(input);
166         while (matcher.find()) {
167             String JavaDoc word = matcher.group(1);
168             for (int i = 0; i < word.length(); i++) {
169                 if (word.charAt(i) > 127) {
170                     words.add(new int[] { matcher.start(), matcher.end()});
171                     break;
172                 }
173             }
174         }
175
176         // No need to create encodedWords
177
if (words.size() == 0) {
178             return result.append(input);
179         }
180
181         // Second group them together if possible (see goals above)
182
Iterator JavaDoc it = words.iterator();
183         int[] last = (int[]) it.next();
184         while (it.hasNext()) {
185             int[] act = (int[]) it.next();
186             if ((last[1] - last[0] + act[1] - act[0] <= maxLength)
187                 && (act[0] - last[1] < 10)) {
188                 it.remove();
189                 last[1] = act[1];
190             } else {
191                 last = act;
192             }
193         }
194
195         // Create encodedWords
196
it = words.iterator();
197         int lastWordEnd = 0;
198         while (it.hasNext()) {
199             int[] act = (int[]) it.next();
200
201             // create encoded part
202
CharSequence JavaDoc rawWord = input.subSequence(act[0], act[1]);
203             CharSequence JavaDoc encodedPart;
204             if (type == QUOTED_PRINTABLE) {
205                 // Replace <space> with _
206
Matcher JavaDoc wsMatcher = whitespacePattern.matcher(rawWord);
207                 rawWord = wsMatcher.replaceAll("_");
208
209                 encodedPart = QuotedPrintable.encode(rawWord, charset);
210             } else {
211                 encodedPart =
212                     Base64.encode(charset.encode(CharBuffer.wrap(rawWord)));
213             }
214
215             // append encodedWord(s)
216
result.append(input.subSequence(lastWordEnd, act[0]));
217             result.append(encodedWordPrototype);
218             result.append(encodedPart);
219             result.append("?=");
220
221             lastWordEnd = act[1];
222         }
223         result.append(input.subSequence(lastWordEnd, input.length()));
224
225         return result;
226     }
227
228
229 }
230
Popular Tags