1 17 package org.alfresco.repo.content.metadata; 18 19 import java.io.IOException ; 20 import java.io.InputStream ; 21 import java.io.Serializable ; 22 import java.util.Calendar ; 23 import java.util.Map ; 24 25 import org.alfresco.model.ContentModel; 26 import org.alfresco.repo.content.MimetypeMap; 27 import org.alfresco.service.cmr.repository.ContentReader; 28 import org.alfresco.service.namespace.QName; 29 import org.pdfbox.pdmodel.PDDocument; 30 import org.pdfbox.pdmodel.PDDocumentInformation; 31 32 36 public class PdfBoxMetadataExtracter extends AbstractMetadataExtracter 37 { 38 public PdfBoxMetadataExtracter() 39 { 40 super(MimetypeMap.MIMETYPE_PDF, 1.0, 1000); 41 } 42 43 public void extractInternal(ContentReader reader, Map <QName, Serializable > destination) throws Throwable 44 { 45 PDDocument pdf = null; 46 InputStream is = null; 47 try 48 { 49 is = reader.getContentInputStream(); 50 pdf = PDDocument.load(is); 52 PDDocumentInformation docInfo = pdf.getDocumentInformation(); 54 55 trimPut(ContentModel.PROP_AUTHOR, docInfo.getAuthor(), destination); 56 trimPut(ContentModel.PROP_TITLE, docInfo.getTitle(), destination); 57 trimPut(ContentModel.PROP_DESCRIPTION, docInfo.getSubject(), destination); 58 59 Calendar created = docInfo.getCreationDate(); 60 if (created != null) 61 destination.put(ContentModel.PROP_CREATED, created.getTime()); 62 } 63 finally 64 { 65 if (is != null) 66 { 67 try { is.close(); } catch (IOException e) {} 68 } 69 if (pdf != null) 70 { 71 try { pdf.close(); } catch (Throwable e) { e.printStackTrace(); } 72 } 73 } 74 } 75 } 76 | Popular Tags |