1 16 package org.outerj.daisy.textextraction.impl; 17 18 import org.pdfbox.pdmodel.PDDocument; 19 import org.pdfbox.pdfparser.PDFParser; 20 import org.pdfbox.util.PDFTextStripper; 21 22 import java.io.CharArrayWriter ; 23 import java.io.InputStream ; 24 25 public class PDFTextExtractor implements MimetypeTextExtractor { 26 public String getText(InputStream is) throws Exception { 27 PDDocument pdfDocument = null; 28 try { 29 PDFParser parser = new PDFParser(is); 30 parser.parse(); 31 32 pdfDocument = parser.getPDDocument(); 33 34 CharArrayWriter writer = new CharArrayWriter (); 35 PDFTextStripper stripper = new PDFTextStripper(); 36 stripper.writeText(pdfDocument, writer); 37 38 return writer.toString(); 39 } finally { 40 if( pdfDocument != null ) 41 pdfDocument.close(); 42 } 43 } 44 } 45 | Popular Tags |