1 16 package com.blandware.atleap.common.parsers.pdf; 17 18 import com.blandware.atleap.common.parsers.SpecificPlainTextExtractor; 19 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException; 20 import org.pdfbox.pdmodel.PDDocument; 21 import org.pdfbox.util.PDFTextStripper; 22 23 import java.io.InputStream ; 24 import java.io.Writer ; 25 26 34 public class PDFPlainTextExtractor implements SpecificPlainTextExtractor { 35 public PDFPlainTextExtractor() { 36 } 37 38 47 public void extract(InputStream input, Writer output, String encoding) 48 throws PlainTextExtractorException { 49 try { 52 PDDocument document = null; 53 PDFTextStripper stripper = new PDFTextStripper(); 54 55 document = PDDocument.load(input); 56 if (document.isEncrypted()) { 57 document.decrypt(""); 59 } 60 stripper.writeText(document, output); 61 document.close(); 62 } catch (Throwable e) { 63 throw new PlainTextExtractorException(e); 64 } 65 } 66 67 70 public String getUsedEncoding() { 71 return null; 72 } 73 } 74 | Popular Tags |