1 16 package com.blandware.atleap.common.parsers; 17 18 import com.blandware.atleap.common.Constants; 19 import com.blandware.atleap.common.parsers.excel.ExcelPlainTextExtractor; 20 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException; 21 import com.blandware.atleap.common.parsers.exception.UnsupportedMimeTypeException; 22 import com.blandware.atleap.common.parsers.html.HTMLPlainTextExtractor; 23 import com.blandware.atleap.common.parsers.pdf.PDFPlainTextExtractor; 24 import com.blandware.atleap.common.parsers.ppt.PowerPointPlainTextExtractor; 25 import com.blandware.atleap.common.parsers.rtf.RTFPlainTextExtractor; 26 import com.blandware.atleap.common.parsers.txt.TXTPlainTextExtractor; 27 import com.blandware.atleap.common.parsers.word.WordPlainTextExtractor; 28 import com.blandware.atleap.common.parsers.xml.XMLPlainTextExtractor; 29 30 import java.io.*; 31 32 68 public class PlainTextExtractor { 69 70 74 protected String usedEncoding = null; 75 76 79 public PlainTextExtractor() {} 80 81 96 public void extract(InputStream input, String mimeType, Writer output, 97 String encoding) throws UnsupportedMimeTypeException, 98 PlainTextExtractorException { 99 SpecificPlainTextExtractor extractor = null; 100 101 if (mimeType == null) { 102 throw new IllegalArgumentException ("mimeType parameter is null"); 103 } 104 if (mimeType.equals("application/msword")) { 105 extractor = new WordPlainTextExtractor(); 106 } else if (mimeType.equals("application/vnd.ms-excel")) { 107 extractor = new ExcelPlainTextExtractor(); 108 } else if (mimeType.equals("application/vnd.ms-powerpoint")) { 109 extractor = new PowerPointPlainTextExtractor(); 110 } else if (mimeType.equals("application/pdf")) { 111 extractor = new PDFPlainTextExtractor(); 112 } else if (mimeType.equals("application/rtf")) { 113 extractor = new RTFPlainTextExtractor(); 114 } else if (mimeType.equals("text/html")) { 115 extractor = new HTMLPlainTextExtractor(); 116 } else if (mimeType.equals("application/xhtml+xml")) { 117 extractor = new HTMLPlainTextExtractor(); 118 } else if (mimeType.equals("text/xml")) { 119 extractor = new XMLPlainTextExtractor(); 120 } else if (mimeType.equals("text/plain")) { 121 extractor = new TXTPlainTextExtractor(); 122 } else { 123 throw new UnsupportedMimeTypeException("This mimeType is not supported: " 124 + mimeType); 125 } 126 if (extractor != null) { 127 extractor.extract(input, output, encoding); 128 usedEncoding = extractor.getUsedEncoding(); 129 } else { 130 usedEncoding = null; 131 } 132 } 133 134 150 public String extract(InputStream input, String mimeType, String encoding) 151 throws UnsupportedMimeTypeException, PlainTextExtractorException { 152 StringWriter writer = new StringWriter(); 153 extract(input, mimeType, writer, encoding); 154 return writer.toString(); 155 } 156 157 169 public void extract(InputStream input, String mimeType, Writer output) 170 throws UnsupportedMimeTypeException, PlainTextExtractorException { 171 extract(input, mimeType, output, null); 172 } 173 174 186 public String extract(InputStream input, String mimeType) 187 throws UnsupportedMimeTypeException, PlainTextExtractorException { 188 return extract(input, mimeType, (String )null); 189 } 190 191 209 public void extract(String input, String mimeType, Writer output, 210 String encoding) throws UnsupportedMimeTypeException, 211 PlainTextExtractorException { 212 try { 213 extract(stringToInputStream(input, encoding), mimeType, 214 output, encoding); 215 } catch (UnsupportedEncodingException e) { 216 throw new PlainTextExtractorException(e); 217 } 218 } 219 220 238 public String extract(String input, String mimeType, String encoding) 239 throws UnsupportedMimeTypeException, PlainTextExtractorException { 240 try { 241 return extract(stringToInputStream(input, encoding), mimeType, 242 encoding); 243 } catch (UnsupportedEncodingException e) { 244 throw new PlainTextExtractorException(e); 245 } 246 } 247 248 262 public void extract(String input, String mimeType, Writer output) 263 throws UnsupportedMimeTypeException, PlainTextExtractorException { 264 extract(input, mimeType, output, null); 265 } 266 267 281 public String extract(String input, String mimeType) 282 throws UnsupportedMimeTypeException, PlainTextExtractorException { 283 return extract(input, mimeType, (String ) null); 284 } 285 286 299 public String getUsedEncoding() { 300 return usedEncoding; 301 } 302 303 313 protected InputStream stringToInputStream(String input, String encoding) 314 throws UnsupportedEncodingException { 315 if (encoding == null || encoding.trim().length() == 0) { 316 encoding = Constants.DEFAULT_ENCODING; 317 } 318 return new ByteArrayInputStream(input.getBytes(encoding)); 319 } 320 } 321 | Popular Tags |