1 36 package org.columba.ristretto.coder; 37 38 import java.nio.CharBuffer ; 39 import java.nio.charset.Charset ; 40 import java.nio.charset.UnsupportedCharsetException ; 41 import java.util.Iterator ; 42 import java.util.LinkedList ; 43 import java.util.logging.Logger ; 44 import java.util.regex.Matcher ; 45 import java.util.regex.Pattern ; 46 47 54 public class EncodedWord { 55 56 57 private static final Logger LOG = Logger.getLogger("org.columba.ristretto.coder"); 58 59 60 63 public static final int QUOTED_PRINTABLE = 0; 64 65 68 public static final int BASE64 = 1; 69 70 71 private static final Pattern encodedWordPattern = 74 Pattern.compile("=\\?([^?]+)\\?([bBqQ])\\?([^?]+)\\?="); 75 76 77 private static final Pattern spacePattern = Pattern.compile("\\s*"); 79 80 private static final Pattern wordTokenizerPattern = 82 Pattern.compile("\\b([^\\s]+[\\s]*)"); 83 84 private static final Pattern whitespacePattern = Pattern.compile(" "); 85 86 92 public static StringBuffer decode(CharSequence input) { 93 StringBuffer result = new StringBuffer (input.length()); 94 int lastMatchEnd = 0; 95 Matcher matcher = encodedWordPattern.matcher(input); 96 Charset charset; 97 char type; 98 String encodedPart; 99 100 while (matcher.find()) { 101 CharSequence inbetween = 102 input.subSequence(lastMatchEnd, matcher.start()); 103 if (!spacePattern.matcher(inbetween).matches()) { 104 result.append(inbetween); 105 } 106 107 108 try { 109 charset = Charset.forName(matcher.group(1)); 110 } catch ( UnsupportedCharsetException e ) { 111 charset = Charset.forName(System.getProperty("file.encoding")); 112 } 113 type = matcher.group(2).toLowerCase().charAt(0); 114 encodedPart = matcher.group(3); 115 116 if (type == 'q') { 117 encodedPart = encodedPart.replace('_', ' '); 118 result.append(QuotedPrintable.decode(encodedPart, charset)); 120 } else { 121 result.append(charset.decode(Base64.decode(encodedPart))); 122 } 123 124 lastMatchEnd = matcher.end(); 125 } 126 127 result.append(input.subSequence(lastMatchEnd, input.length())); 128 129 return result; 130 } 131 132 150 public static StringBuffer encode( 151 CharSequence input, 152 Charset charset, 153 int type) { 154 StringBuffer result = new StringBuffer (input.length()); 155 LinkedList words = new LinkedList (); 156 String encodedWordPrototype; 157 if (type == QUOTED_PRINTABLE) { 158 encodedWordPrototype = "=?" + charset.displayName() + "?q?"; 159 } else { 160 encodedWordPrototype = "=?" + charset.displayName() + "?b?"; 161 } 162 int maxLength = 75 - (encodedWordPrototype.length() + 2); 163 164 Matcher matcher = wordTokenizerPattern.matcher(input); 166 while (matcher.find()) { 167 String word = matcher.group(1); 168 for (int i = 0; i < word.length(); i++) { 169 if (word.charAt(i) > 127) { 170 words.add(new int[] { matcher.start(), matcher.end()}); 171 break; 172 } 173 } 174 } 175 176 if (words.size() == 0) { 178 return result.append(input); 179 } 180 181 Iterator it = words.iterator(); 183 int[] last = (int[]) it.next(); 184 while (it.hasNext()) { 185 int[] act = (int[]) it.next(); 186 if ((last[1] - last[0] + act[1] - act[0] <= maxLength) 187 && (act[0] - last[1] < 10)) { 188 it.remove(); 189 last[1] = act[1]; 190 } else { 191 last = act; 192 } 193 } 194 195 it = words.iterator(); 197 int lastWordEnd = 0; 198 while (it.hasNext()) { 199 int[] act = (int[]) it.next(); 200 201 CharSequence rawWord = input.subSequence(act[0], act[1]); 203 CharSequence encodedPart; 204 if (type == QUOTED_PRINTABLE) { 205 Matcher wsMatcher = whitespacePattern.matcher(rawWord); 207 rawWord = wsMatcher.replaceAll("_"); 208 209 encodedPart = QuotedPrintable.encode(rawWord, charset); 210 } else { 211 encodedPart = 212 Base64.encode(charset.encode(CharBuffer.wrap(rawWord))); 213 } 214 215 result.append(input.subSequence(lastWordEnd, act[0])); 217 result.append(encodedWordPrototype); 218 result.append(encodedPart); 219 result.append("?="); 220 221 lastWordEnd = act[1]; 222 } 223 result.append(input.subSequence(lastWordEnd, input.length())); 224 225 return result; 226 } 227 228 229 } 230 | Popular Tags |