1 17 package org.alfresco.repo.content.transform; 18 19 import java.io.InputStream ; 20 import java.util.Map ; 21 22 import org.alfresco.repo.content.MimetypeMap; 23 import org.alfresco.service.cmr.repository.ContentReader; 24 import org.alfresco.service.cmr.repository.ContentWriter; 25 import org.pdfbox.pdmodel.PDDocument; 26 import org.pdfbox.util.PDFTextStripper; 27 28 34 public class PdfBoxContentTransformer extends AbstractContentTransformer 35 { 36 39 public double getReliability(String sourceMimetype, String targetMimetype) 40 { 41 43 if (!MimetypeMap.MIMETYPE_PDF.equals(sourceMimetype) || 44 !MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype)) 45 { 46 return 0.0; 48 } 49 else 50 { 51 return 1.0; 52 } 53 } 54 55 protected void transformInternal( 56 ContentReader reader, 57 ContentWriter writer, 58 Map <String , Object > options) throws Exception 59 { 60 PDDocument pdf = null; 61 InputStream is = null; 62 try 63 { 64 is = reader.getContentInputStream(); 65 pdf = PDDocument.load(is); 67 PDFTextStripper stripper = new PDFTextStripper(); 69 String text = stripper.getText(pdf); 70 71 writer.putContent(text); 73 } 74 finally 75 { 76 if (pdf != null) 77 { 78 try { pdf.close(); } catch (Throwable e) {e.printStackTrace(); } 79 } 80 if (is != null) 81 { 82 try { is.close(); } catch (Throwable e) {e.printStackTrace(); } 83 } 84 } 85 } 86 } 87 | Popular Tags |