1 40 package org.dspace.app.mets; 41 42 import java.io.File ; 43 import java.io.FileInputStream ; 44 import java.io.FileOutputStream ; 45 import java.io.IOException ; 46 import java.io.InputStream ; 47 import java.io.OutputStream ; 48 import java.net.URLEncoder ; 49 import java.sql.SQLException ; 50 import java.util.Date ; 51 import java.util.Properties ; 52 53 import org.apache.commons.cli.CommandLine; 54 import org.apache.commons.cli.CommandLineParser; 55 import org.apache.commons.cli.HelpFormatter; 56 import org.apache.commons.cli.Options; 57 import org.apache.commons.cli.PosixParser; 58 import org.dspace.authorize.AuthorizeException; 59 import org.dspace.authorize.AuthorizeManager; 60 import org.dspace.content.Bitstream; 61 import org.dspace.content.BitstreamFormat; 62 import org.dspace.content.Bundle; 63 import org.dspace.content.Collection; 64 import org.dspace.content.DCValue; 65 import org.dspace.content.DSpaceObject; 66 import org.dspace.content.Item; 67 import org.dspace.content.ItemIterator; 68 import org.dspace.core.ConfigurationManager; 69 import org.dspace.core.Constants; 70 import org.dspace.core.Context; 71 import org.dspace.core.Utils; 72 import org.dspace.handle.HandleManager; 73 import org.dspace.app.webui.util.UIUtil; 74 75 import edu.harvard.hul.ois.mets.Agent; 76 import edu.harvard.hul.ois.mets.AmdSec; 77 import edu.harvard.hul.ois.mets.BinData; 78 import edu.harvard.hul.ois.mets.Checksumtype; 79 import edu.harvard.hul.ois.mets.Div; 80 import edu.harvard.hul.ois.mets.DmdSec; 81 import edu.harvard.hul.ois.mets.FLocat; 82 import edu.harvard.hul.ois.mets.FileGrp; 83 import edu.harvard.hul.ois.mets.FileSec; 84 import edu.harvard.hul.ois.mets.Loctype; 85 import edu.harvard.hul.ois.mets.MdWrap; 86 import edu.harvard.hul.ois.mets.Mdtype; 87 import edu.harvard.hul.ois.mets.Mets; 88 import edu.harvard.hul.ois.mets.MetsHdr; 89 import edu.harvard.hul.ois.mets.Name; 90 import edu.harvard.hul.ois.mets.RightsMD; 91 import edu.harvard.hul.ois.mets.Role; 92 import edu.harvard.hul.ois.mets.StructMap; 93 import edu.harvard.hul.ois.mets.Type; 94 import edu.harvard.hul.ois.mets.XmlData; 95 import edu.harvard.hul.ois.mets.helper.Base64; 96 import edu.harvard.hul.ois.mets.helper.MetsException; 97 import edu.harvard.hul.ois.mets.helper.MetsValidator; 98 import edu.harvard.hul.ois.mets.helper.MetsWriter; 99 import edu.harvard.hul.ois.mets.helper.PCData; 100 import edu.harvard.hul.ois.mets.helper.PreformedXML; 101 102 108 public class METSExport 109 { 110 private static int licenseFormat = -1; 111 112 private static Properties dcToMODS; 113 114 public static void main(String [] args) throws Exception 115 { 116 Context context = new Context(); 117 118 init(context); 119 120 CommandLineParser parser = new PosixParser(); 122 123 Options options = new Options(); 124 125 options.addOption("c", "collection", true, 126 "Handle of collection to export"); 127 options.addOption("i", "item", true, "Handle of item to export"); 128 options.addOption("a", "all", false, "Export all items in the archive"); 129 options.addOption("d", "destination", true, "Destination directory"); 130 options.addOption("h", "help", false, "Help"); 131 132 CommandLine line = parser.parse(options, args); 133 134 if (line.hasOption('h')) 135 { 136 HelpFormatter myhelp = new HelpFormatter(); 137 myhelp.printHelp("metsexport", options); 138 System.out 139 .println("\nExport a collection: metsexport -c hdl:123.456/789"); 140 System.out 141 .println("Export an item: metsexport -i hdl:123.456/890"); 142 System.out.println("Export everything: metsexport -a"); 143 144 System.exit(0); 145 } 146 147 String dest = ""; 148 149 if (line.hasOption('d')) 150 { 151 dest = line.getOptionValue('d'); 152 153 if (!dest.endsWith(File.separator)) 155 { 156 dest = dest + File.separator; 157 } 158 } 159 160 if (line.hasOption('i')) 161 { 162 String handle = getHandleArg(line.getOptionValue('i')); 163 164 DSpaceObject o = HandleManager.resolveToObject(context, handle); 166 167 if ((o != null) && o instanceof Item) 168 { 169 writeAIP(context, (Item) o, dest); 170 System.exit(0); 171 } 172 else 173 { 174 System.err.println(line.getOptionValue('i') 175 + " is not a valid item Handle"); 176 System.exit(1); 177 } 178 } 179 180 ItemIterator items = null; 181 182 if (line.hasOption('c')) 183 { 184 String handle = getHandleArg(line.getOptionValue('c')); 185 186 DSpaceObject o = HandleManager.resolveToObject(context, handle); 188 189 if ((o != null) && o instanceof Collection) 190 { 191 items = ((Collection) o).getItems(); 192 } 193 else 194 { 195 System.err.println(line.getOptionValue('c') 196 + " is not a valid collection Handle"); 197 System.exit(1); 198 } 199 } 200 201 if (line.hasOption('a')) 202 { 203 items = Item.findAll(context); 204 } 205 206 if (items == null) 207 { 208 System.err.println("Nothing to export specified!"); 209 System.exit(1); 210 } 211 212 while (items.hasNext()) 213 { 214 writeAIP(context, items.next(), dest); 215 } 216 217 context.abort(); 218 } 219 220 226 private static void init(Context context) throws SQLException , IOException 227 { 228 if (licenseFormat != -1) 230 { 231 return; 232 } 233 234 BitstreamFormat bf = BitstreamFormat.findByShortDescription(context, 236 "License"); 237 licenseFormat = bf.getID(); 238 239 String configFile = ConfigurationManager.getProperty("dspace.dir") 241 + File.separator + "config" + File.separator + "dc2mods.cfg"; 242 243 InputStream is = new FileInputStream (configFile); 245 dcToMODS = new Properties (); 246 dcToMODS.load(is); 247 } 248 249 261 public static void writeAIP(Context context, Item item, String dest) 262 throws SQLException , IOException , AuthorizeException, MetsException 263 { 264 System.out.println("Exporting item hdl:" + item.getHandle()); 265 266 java.io.File aipDir = new java.io.File (dest 268 + URLEncoder.encode("hdl:" + item.getHandle(), "UTF-8")); 269 270 if (!aipDir.mkdir()) 271 { 272 throw new IOException ("Couldn't create " + aipDir.toString()); 274 } 275 276 FileOutputStream out = new FileOutputStream (aipDir.toString() 278 + java.io.File.separator + "mets.xml"); 279 writeMETS(context, item, out, false); 280 out.close(); 281 282 Bundle[] bundles = item.getBundles(); 284 285 for (int i = 0; i < bundles.length; i++) 286 { 287 Bitstream[] bitstreams = bundles[i].getBitstreams(); 288 289 for (int b = 0; b < bitstreams.length; b++) 290 { 291 if ((bitstreams[b].getFormat().getID() != licenseFormat) 293 && AuthorizeManager.authorizeActionBoolean(context, 294 bitstreams[b], Constants.READ)) 295 { 296 out = new FileOutputStream (aipDir.toString() 297 + java.io.File.separator 298 + bitstreams[b].getName()); 299 300 InputStream in = bitstreams[b].retrieve(); 301 Utils.bufferedCopy(in, out); 302 out.close(); 303 in.close(); 304 } 305 } 306 } 307 } 308 309 323 public static void writeMETS(Context context, Item item, OutputStream os, boolean fullURL) 324 throws SQLException , IOException , AuthorizeException 325 { 326 try 327 { 328 init(context); 329 330 Mets mets = new Mets(); 332 333 mets.setOBJID("hdl:" + item.getHandle()); 335 mets.setLABEL("DSpace Item"); 336 mets.setSchema("mods", "http://www.loc.gov/mods/v3", 337 "http://www.loc.gov/standards/mods/v3/mods-3-0.xsd"); 338 339 MetsHdr metsHdr = new MetsHdr(); 341 metsHdr.setCREATEDATE(new Date ()); 345 Agent agent = new Agent(); 347 agent.setROLE(Role.CUSTODIAN); 348 agent.setTYPE(Type.ORGANIZATION); 349 350 Name name = new Name(); 351 name.getContent() 352 .add( 353 new PCData(ConfigurationManager 354 .getProperty("dspace.name"))); 355 agent.getContent().add(name); 356 357 metsHdr.getContent().add(agent); 358 359 mets.getContent().add(metsHdr); 360 361 DmdSec dmdSec = new DmdSec(); 362 dmdSec.setID("DMD_hdl_" + item.getHandle()); 363 364 MdWrap mdWrap = new MdWrap(); 365 mdWrap.setMDTYPE(Mdtype.MODS); 366 367 XmlData xmlData = new XmlData(); 368 createMODS(item, xmlData); 369 370 mdWrap.getContent().add(xmlData); 371 dmdSec.getContent().add(mdWrap); 372 mets.getContent().add(dmdSec); 373 374 AmdSec amdSec = new AmdSec(); 376 amdSec.setID("TMD_hdl_" + item.getHandle()); 377 378 InputStream licenseStream = findLicense(context, item); 381 382 if (licenseStream != null) 383 { 384 RightsMD rightsMD = new RightsMD(); 385 MdWrap rightsMDWrap = new MdWrap(); 386 rightsMDWrap.setMIMETYPE("text/plain"); 387 rightsMDWrap.setMDTYPE(Mdtype.OTHER); 388 rightsMDWrap.setOTHERMDTYPE("TEXT"); 389 390 BinData binData = new BinData(); 391 Base64 base64 = new Base64(licenseStream); 392 393 binData.getContent().add(base64); 394 rightsMDWrap.getContent().add(binData); 395 rightsMD.getContent().add(rightsMDWrap); 396 amdSec.getContent().add(rightsMD); 397 } 398 399 mets.getContent().add(amdSec); 401 402 FileSec fileSec = new FileSec(); 404 boolean fileSecEmpty = true; 405 406 Bundle[] bundles = item.getBundles(); 407 408 for (int i = 0; i < bundles.length; i++) 409 { 410 Bitstream[] bitstreams = bundles[i].getBitstreams(); 411 412 if (bitstreams[0].getFormat().getID() == licenseFormat) 415 { 416 continue; 417 } 418 419 FileGrp fileGrp = new FileGrp(); 421 422 if ((bundles[i].getName() != null) 424 && !bundles[i].getName().equals("")) 425 { 426 fileGrp.setUSE(bundles[i].getName()); 427 } 428 429 for (int bits = 0; bits < bitstreams.length; bits++) 430 { 431 String bitstreamPID = ConfigurationManager 433 .getProperty("dspace.url") 434 + "/bitstream/" 435 + item.getHandle() 436 + "/" 437 + bitstreams[bits].getSequenceID() 438 + "/" 439 + UIUtil.encodeBitstreamName(bitstreams[bits].getName(), 440 "UTF-8"); 441 442 edu.harvard.hul.ois.mets.File file = new edu.harvard.hul.ois.mets.File(); 443 444 449 String xmlIDstart = item.getHandle().replaceAll("/", "_") 450 + "_"; 451 452 file.setID(xmlIDstart + bitstreams[bits].getSequenceID()); 453 454 String groupID = "GROUP_" + xmlIDstart 455 + bitstreams[bits].getSequenceID(); 456 457 462 if ((bundles[i].getName() != null) 463 && (bundles[i].getName().equals("THUMBNAIL") || bundles[i] 464 .getName().equals("TEXT"))) 465 { 466 Bitstream original = findOriginalBitstream(item, 470 bitstreams[bits]); 471 472 if (original != null) 473 { 474 groupID = "GROUP_" + xmlIDstart 475 + original.getSequenceID(); 476 } 477 } 478 479 file.setGROUPID(groupID); 480 file.setOWNERID(bitstreamPID); 481 482 file 485 .setMIMETYPE(bitstreams[bits].getFormat() 486 .getMIMEType()); 487 488 file.setSIZE(bitstreams[bits].getSize()); 490 file.setCHECKSUM(bitstreams[bits].getChecksum()); 491 file.setCHECKSUMTYPE(Checksumtype.MD5); 492 493 FLocat flocat = new FLocat(); 497 flocat.setLOCTYPE(Loctype.URL); 498 if (fullURL) 499 { 500 flocat.setXlinkHref(bitstreamPID); 501 } 502 else 503 { 504 flocat.setXlinkHref(bitstreams[bits].getName()); 505 } 506 507 file.getContent().add(flocat); 509 fileGrp.getContent().add(file); 510 } 511 512 fileSec.getContent().add(fileGrp); 514 fileSecEmpty = false; 515 } 516 517 if (!fileSecEmpty) 519 { 520 mets.getContent().add(fileSec); 521 } 522 523 StructMap structMap = new StructMap(); 525 Div div = new Div(); 526 structMap.getContent().add(div); 527 mets.getContent().add(structMap); 528 529 530 mets.validate(new MetsValidator()); 531 532 mets.write(new MetsWriter(os)); 533 } 534 catch (MetsException e) 535 { 536 e.printStackTrace(); 539 throw new IOException (e.getMessage()); 540 } 541 } 542 543 555 private static InputStream findLicense(Context context, Item item) 556 throws SQLException , IOException , AuthorizeException 557 { 558 Bundle[] bundles = item.getBundles(); 559 560 for (int i = 0; i < bundles.length; i++) 561 { 562 Bitstream[] bitstreams = bundles[i].getBitstreams(); 564 565 if (bitstreams[0].getFormat().getID() == licenseFormat) 566 { 567 return bitstreams[0].retrieve(); 569 } 570 } 571 572 return null; 574 } 575 576 587 private static Bitstream findOriginalBitstream(Item item, Bitstream derived) 588 throws SQLException 589 { 590 Bundle[] bundles = item.getBundles(); 591 592 String originalFilename = derived.getName().substring(0, 595 derived.getName().length() - 4); 596 597 for (int i = 0; i < bundles.length; i++) 599 { 600 if ((bundles[i].getName() != null) 601 && bundles[i].getName().equals("ORIGINAL")) 602 { 603 Bitstream[] bitstreams = bundles[i].getBitstreams(); 605 606 for (int bsnum = 0; bsnum < bitstreams.length; bsnum++) 607 { 608 if (bitstreams[bsnum].getName().equals(originalFilename)) 609 { 610 return bitstreams[bsnum]; 611 } 612 } 613 } 614 } 615 616 return null; 618 } 619 620 629 private static void createMODS(Item item, XmlData xmlData) 630 { 631 DCValue[] dc = item.getDC(Item.ANY, Item.ANY, Item.ANY); 632 633 StringBuffer modsXML = new StringBuffer (); 634 635 for (int i = 0; i < dc.length; i++) 636 { 637 String propName = ((dc[i].qualifier == null) ? dc[i].element 639 : (dc[i].element + "." + dc[i].qualifier)); 640 641 String modsMapping = dcToMODS.getProperty(propName); 642 643 if (modsMapping == null) 644 { 645 System.err.println("WARNING: No MODS mapping for " + propName); 646 } 647 else 648 { 649 modsXML.append(modsMapping.replaceAll("%s", Utils 651 .addEntities(dc[i].value))); 652 modsXML.append("\n"); } 654 } 655 656 PreformedXML pXML = new PreformedXML(modsXML.toString()); 657 xmlData.getContent().add(pXML); 658 } 659 660 668 private static String getHandleArg(String original) 669 { 670 if (original.startsWith("hdl:")) 671 { 672 return original.substring(4); 673 } 674 675 if (original.startsWith("http://hdl.handle.net/")) 676 { 677 return original.substring(22); 678 } 679 680 return original; 681 } 682 } 683 | Popular Tags |