1 40 41 package org.dspace.content.packager; 42 43 import java.io.ByteArrayInputStream ; 44 import java.io.File ; 45 import java.io.IOException ; 46 import java.io.InputStream ; 47 import java.sql.SQLException ; 48 import java.util.ArrayList ; 49 import java.util.Enumeration ; 50 import java.util.Iterator ; 51 import java.util.List ; 52 53 import org.apache.commons.codec.binary.Base64; 54 import org.apache.log4j.Logger; 55 import org.dspace.authorize.AuthorizeException; 56 import org.dspace.content.Bitstream; 57 import org.dspace.content.DSpaceObject; 58 import org.dspace.content.Item; 59 import org.dspace.content.crosswalk.CrosswalkException; 60 import org.dspace.content.crosswalk.CrosswalkObjectNotSupported; 61 import org.dspace.content.crosswalk.MetadataValidationException; 62 import org.dspace.content.crosswalk.IngestionCrosswalk; 63 import org.dspace.core.ConfigurationManager; 64 import org.dspace.core.Constants; 65 import org.dspace.core.Context; 66 import org.dspace.core.PluginManager; 67 import org.jdom.Document; 68 import org.jdom.Element; 69 import org.jdom.JDOMException; 70 import org.jdom.Namespace; 71 import org.jdom.input.SAXBuilder; 72 import org.jdom.output.Format; 73 import org.jdom.output.XMLOutputter; 74 import org.jdom.xpath.XPath; 75 76 119 public class METSManifest 120 { 121 126 public interface Mdref 127 { 128 146 public InputStream getInputStream(Element mdRef) 147 throws MetadataValidationException, IOException , SQLException , AuthorizeException; 148 } 149 150 151 private static Logger log = Logger.getLogger(METSManifest.class); 152 153 154 public final static String MANIFEST_FILE = "mets.xml"; 155 156 159 private final static String CONFIG_METADATA_PREFIX = "mets.submission.crosswalk."; 160 161 162 private final static String CONFIG_XSD_PREFIX = "mets.xsd."; 163 164 165 private static Namespace dcNS = Namespace 166 .getNamespace("http://purl.org/dc/elements/1.1/"); 167 168 169 private static Namespace dcTermNS = Namespace 170 .getNamespace("http://purl.org/dc/terms/"); 171 172 173 public static Namespace metsNS = Namespace 174 .getNamespace("mets", "http://www.loc.gov/METS/"); 175 176 177 private static Namespace xlinkNS = Namespace 178 .getNamespace("xlink", "http://www.w3.org/1999/xlink"); 179 180 181 private Element mets = null; 182 183 184 private List mdFiles = null; 185 186 187 private List contentFiles = null; 188 189 190 private SAXBuilder parser = null; 191 192 private static String localSchemas; 195 static 196 { 197 String dspace_dir = ConfigurationManager.getProperty("dspace.dir"); 198 File xsdPath1 = new File (dspace_dir+"/config/schemas/"); 199 File xsdPath2 = new File (dspace_dir+"/config/"); 200 201 Enumeration pe = ConfigurationManager.propertyNames(); 202 StringBuffer result = new StringBuffer (); 203 while (pe.hasMoreElements()) 204 { 205 String key = (String )pe.nextElement(); 211 if (key.startsWith(CONFIG_XSD_PREFIX)) 212 { 213 String spec = ConfigurationManager.getProperty(key); 214 String val[] = spec.trim().split("\\s+"); 215 if (val.length == 2) 216 { 217 File xsd = new File (xsdPath1, val[1]); 218 if (!xsd.exists()) 219 xsd = new File (xsdPath2, val[1]); 220 if (!xsd.exists()) 221 log.warn("Schema file not found for config entry=\""+spec+"\""); 222 else 223 { 224 try 225 { 226 String u = xsd.toURL().toString(); 227 if (result.length() > 0) 228 result.append(" "); 229 result.append(val[0]).append(" ").append(u); 230 } 231 catch (java.net.MalformedURLException e) 232 { 233 log.warn("Skipping badly formed XSD URL: "+e.toString()); 234 } 235 } 236 } 237 else 238 log.warn("Schema config entry has wrong format, entry=\""+spec+"\""); 239 } 240 } 241 localSchemas = result.toString(); 242 log.debug("Got local schemas = \""+localSchemas+"\""); 243 } 244 245 250 private METSManifest(SAXBuilder builder, Element mets) 251 { 252 super(); 253 this.mets = mets; 254 parser = builder; 255 } 256 257 267 public static METSManifest create(InputStream is, boolean validate) 268 throws IOException , 269 MetadataValidationException 270 { 271 SAXBuilder builder = new SAXBuilder(validate); 272 273 if (validate) 275 builder.setFeature("http://apache.org/xml/features/validation/schema", 276 true); 277 278 if (localSchemas.length() > 0) 281 builder.setProperty( 282 "http://apache.org/xml/properties/schema/external-schemaLocation", 283 localSchemas); 284 285 Document metsDocument; 287 288 try 289 { 290 metsDocument = builder.build(is); 291 292 298 } 299 catch (JDOMException je) 300 { 301 throw new MetadataValidationException("Error validating METS in " 302 + is.toString(), je); 303 } 304 305 return new METSManifest(builder, metsDocument.getRootElement()); 306 } 307 308 312 public String getProfile() 313 { 314 return mets.getAttributeValue("PROFILE"); 315 } 316 317 322 public List getContentFiles() 323 throws MetadataValidationException 324 { 325 if (contentFiles != null) 326 return contentFiles; 327 328 Element fileSec = mets.getChild("fileSec", metsNS); 329 if (fileSec == null) 330 throw new MetadataValidationException("Invalid METS Manifest: DSpace requires a fileSec element, but it is missing."); 331 332 contentFiles = new ArrayList (); 333 Iterator fgi = fileSec.getChildren("fileGrp", metsNS).iterator(); 334 while (fgi.hasNext()) 335 { 336 Element fg = (Element)fgi.next(); 337 Iterator fi = fg.getChildren("file", metsNS).iterator(); 338 while (fi.hasNext()) 339 { 340 Element f = (Element)fi.next(); 341 contentFiles.add(f); 342 } 343 } 344 return contentFiles; 345 } 346 347 353 public List getMdFiles() 354 throws MetadataValidationException 355 { 356 if (mdFiles == null) 357 { 358 try 359 { 360 XPath xpath = XPath.newInstance("descendant::mets:mdRef"); 363 xpath.addNamespace(metsNS); 364 mdFiles = xpath.selectNodes(mets); 365 } 366 catch (JDOMException je) 367 { 368 throw new MetadataValidationException("Failed while searching for mdRef elements in manifest: ", je); 369 } 370 } 371 return mdFiles; 372 } 373 374 386 public Element getOriginalFile(Element file) 387 { 388 String groupID = file.getAttributeValue("GROUPID"); 389 if (groupID == null || groupID.equals("")) 390 return null; 391 392 try 393 { 394 XPath xpath = XPath.newInstance( 395 "mets:fileSec/mets:fileGrp[@USE=\"CONTENT\"]/mets:file[@GROUPID=\""+groupID+"\"]"); 396 xpath.addNamespace(metsNS); 397 List oFiles = xpath.selectNodes(mets); 398 if (oFiles.size() > 0) 399 { 400 log.debug("Got ORIGINAL file for derived="+file.toString()); 401 return (Element)oFiles.get(0); 402 } 403 else 404 return null; 405 } 406 catch (JDOMException je) 407 { 408 log.warn("Got exception on XPATH looking for Original file, "+je.toString()); 409 return null; 410 } 411 } 412 413 private static String normalizeBundleName(String in) 416 { 417 if (in.equals("CONTENT")) 418 return Constants.CONTENT_BUNDLE_NAME; 419 else if (in.equals("MANIFESTMD")) 420 return Constants.METADATA_BUNDLE_NAME; 421 return in; 422 } 423 424 429 public static String getBundleName(Element file) 430 throws MetadataValidationException 431 { 432 Element fg = file.getParentElement(); 433 String fgUse = fg.getAttributeValue("USE"); 434 if (fgUse == null) 435 throw new MetadataValidationException("Invalid METS Manifest: every fileGrp element must have a USE attribute."); 436 return normalizeBundleName(fgUse); 437 } 438 439 448 public static String getFileName(Element file) 449 throws MetadataValidationException 450 { 451 Element ref; 452 if (file.getName().equals("file")) 453 { 454 ref = file.getChild("FLocat", metsNS); 455 if (ref == null) 456 { 457 if (file.getChild("FContent", metsNS) == null) 459 throw new MetadataValidationException("Invalid METS Manifest: Every file element must have FLocat child."); 460 else 461 throw new MetadataValidationException("Invalid METS Manifest: file element has forbidden FContent child, only FLocat is allowed."); 462 } 463 } 464 else if (file.getName().equals("mdRef")) 465 ref = file; 466 else 467 throw new MetadataValidationException("getFileName() called with recognized element type: "+file.toString()); 468 String loctype = ref.getAttributeValue("LOCTYPE"); 469 if (loctype != null && loctype.equals("URL")) 470 { 471 String result = ref.getAttributeValue("href", xlinkNS); 472 if (result == null) 473 throw new MetadataValidationException("Invalid METS Manifest: FLocat/mdRef is missing the required xlink:href attribute."); 474 return result; 475 } 476 throw new MetadataValidationException("Invalid METS Manifest: FLocat/mdRef does not have LOCTYPE=\"URL\" attribute."); 477 } 478 479 486 public Element getPrimaryBitstream() 487 throws MetadataValidationException 488 { 489 Element firstDiv = getFirstDiv(); 490 Element fptr = firstDiv.getChild("fptr", metsNS); 491 if (fptr == null) 492 return null; 493 String id = fptr.getAttributeValue("FILEID"); 494 if (id == null) 495 throw new MetadataValidationException("fptr for Primary Bitstream is missing the required FILEID attribute."); 496 Element result = getElementByXPath("descendant::mets:file[@ID=\""+id+"\"]", false); 497 if (result == null) 498 throw new MetadataValidationException("Cannot find file element for Primary Bitstream: looking for ID="+id); 499 return result; 500 } 501 502 505 public String getMdType(Element mdSec) 506 throws MetadataValidationException 507 { 508 Element md = mdSec.getChild("mdRef", metsNS); 509 if (md == null) 510 md = mdSec.getChild("mdWrap", metsNS); 511 if (md == null) 512 throw new MetadataValidationException("Invalid METS Manifest: ?mdSec element has neither mdRef nor mdWrap child."); 513 String result = md.getAttributeValue("MDTYPE"); 514 if (result != null && result.equals("OTHER")) 515 result = md.getAttributeValue("OTHERMDTYPE"); 516 if (result == null) 517 throw new MetadataValidationException("Invalid METS Manifest: "+md.getName()+" has no MDTYPE or OTHERMDTYPE attribute."); 518 return result; 519 } 520 521 525 public String getMdContentMimeType(Element mdSec) 526 throws MetadataValidationException 527 { 528 Element mdWrap = mdSec.getChild("mdWrap", metsNS); 529 if (mdWrap != null) 530 { 531 String mimeType = mdWrap.getAttributeValue("MIMETYPE"); 532 if (mimeType == null && mdWrap.getChild("xmlData", metsNS) != null) 533 mimeType = "text/xml"; 534 return mimeType; 535 } 536 Element mdRef = mdSec.getChild("mdRef", metsNS); 537 if (mdRef != null) 538 return mdRef.getAttributeValue("MIMETYPE"); 539 return null; 540 } 541 542 549 public List getMdContentAsXml(Element mdSec, Mdref callback) 550 throws MetadataValidationException, IOException , SQLException , AuthorizeException 551 { 552 try 553 { 554 Element mdRef = null; 555 Element mdWrap = mdSec.getChild("mdWrap", metsNS); 556 if (mdWrap != null) 557 { 558 Element xmlData = mdWrap.getChild("xmlData", metsNS); 559 if (xmlData == null) 560 { 561 Element bin = mdWrap.getChild("binData", metsNS); 562 if (bin == null) 563 throw new MetadataValidationException("Invalid METS Manifest: mdWrap element with neither xmlData nor binData child."); 564 565 else 567 { 568 String mimeType = mdWrap.getAttributeValue("MIMETYPE"); 569 if (mimeType != null && mimeType.equalsIgnoreCase("text/xml")) 570 { 571 byte value[] = Base64.decodeBase64(bin.getText().getBytes()); 572 Document mdd = parser.build(new ByteArrayInputStream (value)); 573 List result = new ArrayList (1); 574 result.add(mdd.getRootElement()); 575 return result; 576 } 577 else 578 { 579 log.warn("Ignoring binData section because MIMETYPE is not XML, but: "+mimeType); 580 return new ArrayList (0); 581 } 582 } 583 } 584 else 585 { 586 return xmlData.getChildren(); 587 } 588 } 589 else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null) 590 { 591 String mimeType = mdRef.getAttributeValue("MIMETYPE"); 592 if (mimeType != null && mimeType.equalsIgnoreCase("text/xml")) 593 { 594 Document mdd = parser.build(callback.getInputStream(mdRef)); 595 List result = new ArrayList (1); 596 result.add(mdd.getRootElement()); 597 return result; 598 } 599 else 600 { 601 log.warn("Ignoring mdRef section because MIMETYPE is not XML, but: "+mimeType); 602 return new ArrayList (0); 603 } 604 } 605 else 606 throw new MetadataValidationException("Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child."); 607 } 608 catch (JDOMException je) 609 { 610 throw new MetadataValidationException("Error parsing or validating metadata section in mdRef or binData within "+mdSec.toString(), je); 611 } 612 613 } 614 615 622 public InputStream getMdContentAsStream(Element mdSec, Mdref callback) 623 throws MetadataValidationException, IOException , SQLException , AuthorizeException 624 { 625 Element mdRef = null; 626 Element mdWrap = mdSec.getChild("mdWrap", metsNS); 627 if (mdWrap != null) 628 { 629 Element xmlData = mdWrap.getChild("xmlData", metsNS); 630 if (xmlData == null) 631 { 632 Element bin = mdWrap.getChild("binData", metsNS); 633 if (bin == null) 634 throw new MetadataValidationException("Invalid METS Manifest: mdWrap element with neither xmlData nor binData child."); 635 636 else 637 { 638 byte value[] = Base64.decodeBase64(bin.getText().getBytes()); 639 return new ByteArrayInputStream (value); 640 } 641 } 642 else 643 { 644 XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); 645 return new ByteArrayInputStream ( 646 outputPretty.outputString(xmlData.getChildren()).getBytes()); 647 } 648 } 649 else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null) 650 { 651 return callback.getInputStream(mdRef); 652 } 653 else 654 throw new MetadataValidationException("Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child."); 655 } 656 657 658 private void crosswalkMdContent(Element mdSec, Mdref callback, 663 IngestionCrosswalk xwalk, Context context, DSpaceObject dso) 664 throws CrosswalkException, IOException , SQLException , AuthorizeException 665 { 666 List xml = getMdContentAsXml(mdSec,callback); 667 668 try 670 { 671 xwalk.ingest(context, dso, xml); 672 } 673 catch (CrosswalkObjectNotSupported e) 674 { 675 log.warn("Skipping metadata for inappropriate type of object: Object="+dso.toString()+", error="+e.toString()); 676 } 677 } 678 679 private Element getFirstDiv() 683 throws MetadataValidationException 684 { 685 Element sm = mets.getChild("structMap", metsNS); 686 if (sm == null) 687 throw new MetadataValidationException("METS document is missing the required structMap element."); 688 689 Element result = sm.getChild("div", metsNS); 690 if (result == null) 691 throw new MetadataValidationException("METS document is missing the required first div element in first structMap."); 692 693 log.debug("Got firstDiv result="+result.toString()); 694 return (Element)result; 695 } 696 697 private Element getElementByXPath(String path, boolean nullOk) 700 throws MetadataValidationException 701 { 702 try 703 { 704 XPath xpath = XPath.newInstance(path); 705 xpath.addNamespace(metsNS); 706 xpath.addNamespace(xlinkNS); 707 Object result = xpath.selectSingleNode(mets); 708 if (result == null && nullOk) 709 return null; 710 else if (result instanceof Element) 711 return (Element)result; 712 else 713 throw new MetadataValidationException("METSManifest: Failed to resolve XPath, path=\""+path+"\""); 714 } 715 catch (JDOMException je) 716 { 717 throw new MetadataValidationException("METSManifest: Failed to resolve XPath, path=\""+path+"\"", je); 718 } 719 } 720 721 private IngestionCrosswalk getCrosswalk(String type) 728 { 729 String xwalkName = ConfigurationManager.getProperty(CONFIG_METADATA_PREFIX + type); 730 if (xwalkName == null) 731 xwalkName = type; 732 return (IngestionCrosswalk) 733 PluginManager.getNamedPlugin(IngestionCrosswalk.class, xwalkName); 734 } 735 736 743 public Element[] getItemDmds() 744 throws MetadataValidationException 745 { 746 Element firstDiv = getFirstDiv(); 748 String dmds = firstDiv.getAttributeValue("DMDID"); 749 if (dmds == null) 750 throw new MetadataValidationException("Invalid METS: Missing reference to Item descriptive metadata, first div on first structmap must have a DMDID attribute."); 751 String dmdID[] = dmds.split("\\s+"); 752 Element result[] = new Element[dmdID.length]; 753 754 for (int i = 0; i < dmdID.length; ++i) 755 result[i] = getElementByXPath("mets:dmdSec[@ID=\""+dmdID[i]+"\"]", false); 756 return result; 757 } 758 759 764 public Element[] getItemRightsMD() 765 throws MetadataValidationException 766 { 767 Element firstDiv = getFirstDiv(); 769 String amds = firstDiv.getAttributeValue("ADMID"); 770 if (amds == null) 771 { 772 log.debug("getItemRightsMD: No ADMID references found."); 773 return new Element[0]; 774 } 775 String amdID[] = amds.split("\\s+"); 776 List resultList = new ArrayList (); 777 for (int i = 0; i < amdID.length; ++i) 778 { 779 List rmds = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false). 780 getChildren("rightsMD", metsNS); 781 if (rmds.size() > 0) 782 resultList.addAll(rmds); 783 } 784 return (Element[])resultList.toArray(new Element[resultList.size()]); 785 } 786 787 790 public void crosswalkItem(Context context, Item item, Element dmd, Mdref callback) 791 throws MetadataValidationException, 792 CrosswalkException, IOException , SQLException , AuthorizeException 793 { 794 String type = getMdType(dmd); 795 IngestionCrosswalk xwalk = getCrosswalk(type); 796 797 if (xwalk == null) 798 throw new MetadataValidationException("Cannot process METS Manifest: "+ 799 "No crosswalk found for MDTYPE="+type); 800 crosswalkMdContent(dmd, callback, xwalk, context, item); 801 } 802 803 811 public void crosswalkBitstream(Context context, Bitstream bitstream, 812 String fileId, Mdref callback) 813 throws MetadataValidationException, 814 CrosswalkException, IOException , SQLException , AuthorizeException 815 { 816 Element file = getElementByXPath("descendant::mets:file[@ID=\""+fileId+"\"]", false); 817 if (file == null) 818 throw new MetadataValidationException("Failed in Bitstream crosswalk, Could not find file element with ID="+fileId); 819 820 String amds = file.getAttributeValue("ADMID"); 823 if (amds == null) 824 { 825 log.warn("Got no bitstream ADMID, file@ID="+fileId); 826 return; 827 } 828 String amdID[] = amds.split("\\s+"); 829 for (int i = 0; i < amdID.length; ++i) 830 { 831 List techMDs = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false). 832 getChildren("techMD", metsNS); 833 Iterator ti = techMDs.iterator(); 834 while (ti.hasNext()) 835 { 836 Element techMD = (Element)ti.next(); 837 if (techMD != null) 838 { 839 String type = getMdType(techMD); 840 IngestionCrosswalk xwalk = getCrosswalk(type); 841 log.debug("Got bitstream techMD of type="+type+", for file ID="+fileId); 842 843 if (xwalk == null) 844 throw new MetadataValidationException("Cannot process METS Manifest: "+ 845 "No crosswalk found for techMD MDTYPE="+type); 846 crosswalkMdContent(techMD, callback, xwalk, context, bitstream); 847 } 848 } 849 } 850 } 851 852 857 public String getHandle() 858 throws MetadataValidationException 859 { 860 863 String handle = mets.getAttributeValue("OBJID"); 864 865 if (handle != null && handle.startsWith("hdl:")) 866 { 867 return handle.substring(4); 868 } 869 else 870 { 871 throw new MetadataValidationException("Item has no valid Handle (OBJID)"); 872 } 873 } 874 } 875 | Popular Tags |