1 40 41 package org.dspace.content.packager; 42 43 import java.io.IOException ; 44 import java.io.InputStream ; 45 import java.io.OutputStream ; 46 import java.sql.SQLException ; 47 import java.util.Calendar ; 48 49 import org.apache.log4j.Logger; 50 import org.dspace.authorize.AuthorizeException; 51 import org.dspace.content.Bitstream; 52 import org.dspace.content.BitstreamFormat; 53 import org.dspace.content.Bundle; 54 import org.dspace.content.Collection; 55 import org.dspace.content.DCDate; 56 import org.dspace.content.DSpaceObject; 57 import org.dspace.content.Item; 58 import org.dspace.content.WorkspaceItem; 59 import org.dspace.content.crosswalk.CrosswalkException; 60 import org.dspace.content.crosswalk.MetadataValidationException; 61 import org.dspace.core.Constants; 62 import org.dspace.core.Context; 63 import org.dspace.core.LogManager; 64 import org.dspace.core.SelfNamedPlugin; 65 import org.dspace.core.Utils; 66 import org.pdfbox.cos.COSDocument; 67 import org.pdfbox.pdfparser.PDFParser; 68 import org.pdfbox.pdmodel.PDDocument; 69 import org.pdfbox.pdmodel.PDDocumentInformation; 70 71 86 public class PDFPackager 87 extends SelfNamedPlugin 88 implements PackageIngester, PackageDisseminator 89 { 90 91 private static Logger log = Logger.getLogger(PDFPackager.class); 92 93 private final static String BITSTREAM_FORMAT_NAME = "Adobe PDF"; 94 95 private static String aliases[] = { "PDF", "Adobe PDF", "pdf", "application/pdf" }; 96 97 public static String [] getPluginNames() 98 { 99 return aliases; 100 } 101 102 private static void setFormatToMIMEType(Context context, Bitstream bs, String mimeType) 104 throws SQLException 105 { 106 BitstreamFormat bf[] = BitstreamFormat.findNonInternal(context); 107 for (int i = 0; i < bf.length; ++i) 108 { 109 if (bf[i].getMIMEType().equalsIgnoreCase(mimeType)) 110 { 111 bs.setFormat(bf[i]); 112 break; 113 } 114 } 115 } 116 117 136 public WorkspaceItem ingest(Context context, Collection collection, 137 InputStream pkg, PackageParameters params, 138 String license) 139 throws PackageValidationException, CrosswalkException, 140 AuthorizeException, SQLException , IOException 141 { 142 InputStream bis = null; 143 COSDocument cos = null; 144 boolean success = false; 145 Bundle original = null; 146 Bitstream bs = null; 147 WorkspaceItem wi = null; 148 149 170 171 try 172 { 173 wi = WorkspaceItem.create(context, collection, false); 176 Item myitem = wi.getItem(); 177 original = myitem.createBundle("ORIGINAL"); 178 bs = original.createBitstream(pkg); 179 pkg.close(); 180 bs.setName("package.pdf"); 181 setFormatToMIMEType(context, bs, "application/pdf"); 182 bs.update(); 183 log.debug("Created bitstream ID="+String.valueOf(bs.getID())+", parsing..."); 184 185 crosswalkPDF(context, myitem, bs.retrieve()); 186 187 wi.update(); 188 context.commit(); 189 success = true; 190 log.info(LogManager.getHeader(context, "ingest", 191 "Created new Item, db ID="+String.valueOf(myitem.getID())+ 192 ", WorkspaceItem ID="+String.valueOf(wi.getID()))); 193 return wi; 194 } 195 finally 196 { 197 try 198 { 199 if (bis != null) 201 bis.close(); 202 if (cos != null) 203 cos.close(); 204 } 205 catch (IOException ie) 206 { } 207 208 if (!success) 210 { 211 if (original != null && bs != null) 212 original.removeBitstream(bs); 213 if (wi != null) 214 wi.deleteAll(); 215 } 216 context.commit(); 217 } 218 } 219 220 223 public Item replace(Context ctx, Item item, InputStream pckage, PackageParameters params) 224 throws PackageValidationException, CrosswalkException, 225 AuthorizeException, SQLException , IOException , 226 UnsupportedOperationException 227 { 228 throw new UnsupportedOperationException ("The replace operation is not implemented."); 229 } 230 231 236 public void disseminate(Context context, DSpaceObject dso, 237 PackageParameters params, OutputStream out) 238 throws PackageValidationException, CrosswalkException, 239 AuthorizeException, SQLException , IOException 240 { 241 if (dso.getType() != Constants.ITEM) 242 throw new PackageValidationException("This disseminator can only handle objects of type ITEM."); 243 244 Item item = (Item)dso; 245 try 246 { 247 BitstreamFormat pdff = BitstreamFormat.findByShortDescription(context, 248 BITSTREAM_FORMAT_NAME); 249 if (pdff == null) 250 throw new PackageValidationException("Cannot find BitstreamFormat \""+BITSTREAM_FORMAT_NAME+"\""); 251 Bitstream pkgBs = PackageUtils.getBitstreamByFormat(item, pdff, Constants.DEFAULT_BUNDLE_NAME); 252 if (pkgBs == null) 253 throw new PackageValidationException("Cannot find Bitstream with format \""+BITSTREAM_FORMAT_NAME+"\""); 254 Utils.copy(pkgBs.retrieve(), out); 255 } 256 finally {} 257 } 258 259 264 public String getMIMEType(PackageParameters params) 265 { 266 return "application/pdf"; 267 } 268 269 private void crosswalkPDF(Context context, Item item, InputStream metadata) 270 throws CrosswalkException, IOException , SQLException , AuthorizeException 271 { 272 COSDocument cos = null; 273 274 try 275 { 276 PDFParser parser = new PDFParser(metadata); 277 parser.parse(); 278 cos = parser.getDocument(); 279 280 if(cos.getEncryptionDictionary() != null) 282 throw new MetadataValidationException("This packager cannot accept an encrypted PDF document."); 283 284 301 PDDocument pd = new PDDocument(cos); 302 PDDocumentInformation docinfo = pd.getDocumentInformation(); 303 String title = docinfo.getTitle(); 304 305 if (title == null) 307 throw new MetadataValidationException("This PDF file is unacceptable, it does not have a value for \"Title\" in its Info dictionary."); 308 log.debug("PDF Info dict title=\""+title+"\""); 309 item.addDC("title", null, "en", title); 310 String value; 311 Calendar date; 312 if ((value = docinfo.getAuthor()) != null) 313 { 314 item.addDC("contributor", "author", null, value); 315 log.debug("PDF Info dict author=\""+value+"\""); 316 } 317 if ((value = docinfo.getCreator()) != null) 318 item.addDC("description", "provenance", "en", 319 "Application that created the original document: "+value); 320 if ((value = docinfo.getProducer()) != null) 321 item.addDC("description", "provenance", "en", 322 "Original document converted to PDF by: "+value); 323 if ((value = docinfo.getSubject()) != null) 324 item.addDC("description", "abstract", null, value); 325 if ((value = docinfo.getKeywords()) != null) 326 item.addDC("subject", "other", null, value); 327 328 Calendar calValue; 331 if ((calValue = docinfo.getCreationDate()) == null) 332 calValue = docinfo.getModificationDate(); 333 if (calValue != null) 334 item.addDC("date", "created", null, 335 (new DCDate(calValue.getTime())).toString()); 336 item.update(); 337 } 338 finally 339 { 340 if (cos != null) 341 cos.close(); 342 } 343 } 344 } 345 | Popular Tags |