KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > content > packager > METSManifest


1 /*
2  * METSManifest.java
3  *
4  * Version: $Revision: 1.1 $
5  *
6  * Date: $Date: 2006/03/17 00:04:38 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40
41 package org.dspace.content.packager;
42
43 import java.io.ByteArrayInputStream JavaDoc;
44 import java.io.File JavaDoc;
45 import java.io.IOException JavaDoc;
46 import java.io.InputStream JavaDoc;
47 import java.sql.SQLException JavaDoc;
48 import java.util.ArrayList JavaDoc;
49 import java.util.Enumeration JavaDoc;
50 import java.util.Iterator JavaDoc;
51 import java.util.List JavaDoc;
52
53 import org.apache.commons.codec.binary.Base64;
54 import org.apache.log4j.Logger;
55 import org.dspace.authorize.AuthorizeException;
56 import org.dspace.content.Bitstream;
57 import org.dspace.content.DSpaceObject;
58 import org.dspace.content.Item;
59 import org.dspace.content.crosswalk.CrosswalkException;
60 import org.dspace.content.crosswalk.CrosswalkObjectNotSupported;
61 import org.dspace.content.crosswalk.MetadataValidationException;
62 import org.dspace.content.crosswalk.IngestionCrosswalk;
63 import org.dspace.core.ConfigurationManager;
64 import org.dspace.core.Constants;
65 import org.dspace.core.Context;
66 import org.dspace.core.PluginManager;
67 import org.jdom.Document;
68 import org.jdom.Element;
69 import org.jdom.JDOMException;
70 import org.jdom.Namespace;
71 import org.jdom.input.SAXBuilder;
72 import org.jdom.output.Format;
73 import org.jdom.output.XMLOutputter;
74 import org.jdom.xpath.XPath;
75
76 /**
77  * <P>
78  * Manage the METS manifest document for METS importer classes,
79  * such as the package importer <code>org.dspace.content.packager.MetsSubmission</code>
80  * and the federated importer <code>org.dspace.app.mets.FederatedMETSImport</code>
81  * </P>
82  * <P>
83  * It can parse the METS document, build an internal model, and give the importers
84  * access to that model. It also crosswalks
85  * all of the descriptive and administrative metadata in the METS
86  * manifest into the target DSpace Item, under control of the importer.
87  * </P>
88  *
89  * <P>
90  * It reads the following DSpace Configuration entries:
91  * </P>
92  * <UL>
93  * <LI>Local XML schema (XSD) declarations, in the general format:
94  * <br><code>mets.xsd.<em>identifier</em> = <em>namespace</em> <em>xsd-URL</em></code>
95  * <br> eg. <code>mets.xsd.dc = http://purl.org/dc/elements/1.1/ dc.xsd</code>
96  * <br>Add a separate config entry for each schema.
97  * </LI>
98  * <p><LI>Crosswalk plugin mappings:
99  * These tell it the name of the crosswalk plugin to invoke for metadata sections
100  * with a particular value of <code>MDTYPE</code> (or <code>OTHERMDTYPE</code>)
101  * By default, the crosswalk mechanism will look for a plugin with the
102  * same name as the metadata type (e.g. <code>"MODS"</code>,
103  * <code>"DC"</code>). This example line invokes the <code>QDC</code>
104  * plugin when <code>MDTYPE="DC"</code>
105  * <br><code>mets.submission.crosswalk.DC = QDC </code>
106  * <br> general format is:
107  * <br><code>mets.submission.crosswalk.<em>mdType</em> = <em>pluginName</em> </code>
108  * </LI>
109  * </UL>
110  *
111  *
112  * @author Robert Tansley
113  * @author WeiHua Huang
114  * @author Rita Lee
115  * @author Larry Stone
116  * @see org.dspace.content.packager.MetsSubmission
117  * @see org.dspace.app.mets.FederatedMETSImport
118  */

119 public class METSManifest
120 {
121     /**
122      * Callback interface to retrieve data streams in mdRef elements.
123      * "Package" or file reader returns an input stream for the
124      * given relative path, e.g. to dereference <code>mdRef</code> elements.
125      */

126     public interface Mdref
127     {
128         /**
129          * Make the contents of an external resource mentioned in
130          * an <code>mdRef</code> element available as an <code>InputStream</code>.
131          * The implementation must use the information in the
132          * <code>mdRef</code> element, and the state in the object that
133          * implements this interface, to find the actual metadata content.
134          * <p>
135          * For example, an implementation that ingests a directory of
136          * files on the local filesystem would get a relative pathname
137          * out of the <code>mdRef</code> and open that file.
138          *
139          * @param mdRef JDOM element of mdRef in the METS manifest.
140          * @return stream containing the metadata mentioned in mdRef.
141          * @throw MetadataValidationException if the mdRef is unacceptable or missing required information.
142          * @throw IOException if it is returned by services called by this method.
143          * @throw SQLException if it is returned by services called by this method.
144          * @throw AuthorizeException if it is returned by services called by this method.
145          */

146         public InputStream JavaDoc getInputStream(Element mdRef)
147             throws MetadataValidationException, IOException JavaDoc, SQLException JavaDoc, AuthorizeException;
148     }
149
150     /** log4j category */
151     private static Logger log = Logger.getLogger(METSManifest.class);
152
153     /** Canonical filename of METS manifest within a package or as a bitstream. */
154     public final static String JavaDoc MANIFEST_FILE = "mets.xml";
155
156     /** Prefix of DSpace configuration lines that map METS metadata type to
157      * crosswalk plugin names.
158      */

159     private final static String JavaDoc CONFIG_METADATA_PREFIX = "mets.submission.crosswalk.";
160
161     /** prefix of config lines identifying local XML Schema (XSD) files */
162     private final static String JavaDoc CONFIG_XSD_PREFIX = "mets.xsd.";
163
164     /** Dublin core element namespace */
165     private static Namespace dcNS = Namespace
166             .getNamespace("http://purl.org/dc/elements/1.1/");
167
168     /** Dublin core term namespace (for qualified DC) */
169     private static Namespace dcTermNS = Namespace
170             .getNamespace("http://purl.org/dc/terms/");
171
172     /** METS namespace -- includes "mets" prefix for use in XPaths */
173     public static Namespace metsNS = Namespace
174             .getNamespace("mets", "http://www.loc.gov/METS/");
175
176     /** XLink namespace -- includes "xlink" prefix prefix for use in XPaths */
177     private static Namespace xlinkNS = Namespace
178             .getNamespace("xlink", "http://www.w3.org/1999/xlink");
179
180     /** root element of the current METS manifest. */
181     private Element mets = null;
182
183     /** all mdRef elements in the manifest */
184     private List JavaDoc mdFiles = null;
185
186     /** <file> elements in "original" filegroup (bundle) */
187     private List JavaDoc contentFiles = null;
188
189     /** builder to use for mdRef streams, inherited from create() */
190     private SAXBuilder parser = null;
191
192     // Create list of local schemas at load time, since it depends only
193
// on the DSpace configuration.
194
private static String JavaDoc localSchemas;
195     static
196     {
197         String JavaDoc dspace_dir = ConfigurationManager.getProperty("dspace.dir");
198         File JavaDoc xsdPath1 = new File JavaDoc(dspace_dir+"/config/schemas/");
199         File JavaDoc xsdPath2 = new File JavaDoc(dspace_dir+"/config/");
200
201         Enumeration JavaDoc pe = ConfigurationManager.propertyNames();
202         StringBuffer JavaDoc result = new StringBuffer JavaDoc();
203         while (pe.hasMoreElements())
204         {
205             // config lines have the format:
206
// mets.xsd.{identifier} = {namespace} {xsd-URL}
207
// e.g.
208
// mets.xsd.dc = http://purl.org/dc/elements/1.1/ dc.xsd
209
// (filename is relative to {dspace_dir}/config/schemas/)
210
String JavaDoc key = (String JavaDoc)pe.nextElement();
211             if (key.startsWith(CONFIG_XSD_PREFIX))
212             {
213                 String JavaDoc spec = ConfigurationManager.getProperty(key);
214                 String JavaDoc val[] = spec.trim().split("\\s+");
215                 if (val.length == 2)
216                 {
217                     File JavaDoc xsd = new File JavaDoc(xsdPath1, val[1]);
218                     if (!xsd.exists())
219                          xsd = new File JavaDoc(xsdPath2, val[1]);
220                     if (!xsd.exists())
221                         log.warn("Schema file not found for config entry=\""+spec+"\"");
222                     else
223                     {
224                         try
225                         {
226                             String JavaDoc u = xsd.toURL().toString();
227                             if (result.length() > 0)
228                                 result.append(" ");
229                             result.append(val[0]).append(" ").append(u);
230                         }
231                         catch (java.net.MalformedURLException JavaDoc e)
232                         {
233                             log.warn("Skipping badly formed XSD URL: "+e.toString());
234                         }
235                     }
236                 }
237                 else
238                     log.warn("Schema config entry has wrong format, entry=\""+spec+"\"");
239             }
240         }
241         localSchemas = result.toString();
242         log.debug("Got local schemas = \""+localSchemas+"\"");
243     }
244
245     /**
246      * Default constructor, only called internally.
247      * @param builder XML parser (for parsing mdRef'd files and binData)
248      * @param mets parsed METS document
249      */

250     private METSManifest(SAXBuilder builder, Element mets)
251     {
252         super();
253         this.mets = mets;
254         parser = builder;
255     }
256
257     /**
258      * Create a new manifest object from a serialized METS XML document.
259      * Parse document read from the input stream, optionally validating.
260      * @param is input stream containing serialized XML
261      * @param validate if true, enable XML validation using schemas
262      * in document. Also validates any sub-documents.
263      * @throws MetadataValidationException if there is any error parsing
264      * or validating the METS.
265      * @return new METSManifest object.
266      */

267     public static METSManifest create(InputStream JavaDoc is, boolean validate)
268             throws IOException JavaDoc,
269             MetadataValidationException
270     {
271         SAXBuilder builder = new SAXBuilder(validate);
272
273         // Set validation feature
274
if (validate)
275             builder.setFeature("http://apache.org/xml/features/validation/schema",
276                     true);
277
278         // Tell the parser where local copies of schemas are, to speed up
279
// validation. Local XSDs are identified in the configuration file.
280
if (localSchemas.length() > 0)
281             builder.setProperty(
282                     "http://apache.org/xml/properties/schema/external-schemaLocation",
283                     localSchemas);
284
285         // Parse the METS file
286
Document metsDocument;
287
288         try
289         {
290             metsDocument = builder.build(is);
291
292             // XXX for temporary debugging
293
/*
294             XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
295             log.debug("Got METS DOCUMENT:");
296             log.debug(outputPretty.outputString(metsDocument));
297               */

298         }
299         catch (JDOMException je)
300         {
301             throw new MetadataValidationException("Error validating METS in "
302                     + is.toString(), je);
303         }
304
305         return new METSManifest(builder, metsDocument.getRootElement());
306     }
307
308     /**
309      * Gets name of the profile to which this METS document conforms.
310      * @return value the PROFILE attribute of mets element, or null if none.
311      */

312     public String JavaDoc getProfile()
313     {
314         return mets.getAttributeValue("PROFILE");
315     }
316
317     /**
318      * Gets all <code>file</code> elements which make up
319      * the item's content.
320      * @return a List of <code>Element</code>s.
321      */

322     public List JavaDoc getContentFiles()
323         throws MetadataValidationException
324     {
325         if (contentFiles != null)
326             return contentFiles;
327
328         Element fileSec = mets.getChild("fileSec", metsNS);
329         if (fileSec == null)
330             throw new MetadataValidationException("Invalid METS Manifest: DSpace requires a fileSec element, but it is missing.");
331
332         contentFiles = new ArrayList JavaDoc();
333         Iterator JavaDoc fgi = fileSec.getChildren("fileGrp", metsNS).iterator();
334         while (fgi.hasNext())
335         {
336             Element fg = (Element)fgi.next();
337             Iterator JavaDoc fi = fg.getChildren("file", metsNS).iterator();
338             while (fi.hasNext())
339             {
340                 Element f = (Element)fi.next();
341                 contentFiles.add(f);
342             }
343         }
344         return contentFiles;
345     }
346
347     /**
348      * Gets list of all <code>mdRef</code> elements in the METS
349      * document. Used by ingester to e.g. check that all
350      * required files are present.
351      * @return a List of <code>Element</code>s.
352      */

353     public List JavaDoc getMdFiles()
354         throws MetadataValidationException
355     {
356         if (mdFiles == null)
357         {
358             try
359             {
360                 // Use a special namespace with known prefix
361
// so we get the right prefix.
362
XPath xpath = XPath.newInstance("descendant::mets:mdRef");
363                 xpath.addNamespace(metsNS);
364                 mdFiles = xpath.selectNodes(mets);
365             }
366             catch (JDOMException je)
367             {
368                 throw new MetadataValidationException("Failed while searching for mdRef elements in manifest: ", je);
369             }
370         }
371         return mdFiles;
372     }
373
374     /**
375      * Get the "original" file element for a derived file.
376      * Finds the original from which this was derived by matching the GROUPID
377      * attribute that binds it to its original. For instance, the file for
378      * a thumbnail image would have the same GROUPID as its full-size version.
379      * <p>
380      * NOTE: This pattern of relating derived files through the GROUPID
381      * attribute is peculiar to the DSpace METS SIP profile, and may not be
382      * generally useful with other sorts of METS documents.
383      * @param file METS file element of derived file
384      * @return file Element of original or null if none found.
385      */

386     public Element getOriginalFile(Element file)
387     {
388         String JavaDoc groupID = file.getAttributeValue("GROUPID");
389         if (groupID == null || groupID.equals(""))
390             return null;
391
392         try
393         {
394             XPath xpath = XPath.newInstance(
395 "mets:fileSec/mets:fileGrp[@USE=\"CONTENT\"]/mets:file[@GROUPID=\""+groupID+"\"]");
396             xpath.addNamespace(metsNS);
397             List JavaDoc oFiles = xpath.selectNodes(mets);
398             if (oFiles.size() > 0)
399             {
400                 log.debug("Got ORIGINAL file for derived="+file.toString());
401                 return (Element)oFiles.get(0);
402             }
403             else
404                 return null;
405         }
406         catch (JDOMException je)
407         {
408             log.warn("Got exception on XPATH looking for Original file, "+je.toString());
409             return null;
410         }
411     }
412
413     // translate bundle name from METS to DSpace; METS may be "CONTENT"
414
// or "ORIGINAL" for the DSPace "ORIGINAL", rest are left alone.
415
private static String JavaDoc normalizeBundleName(String JavaDoc in)
416     {
417         if (in.equals("CONTENT"))
418             return Constants.CONTENT_BUNDLE_NAME;
419         else if (in.equals("MANIFESTMD"))
420             return Constants.METADATA_BUNDLE_NAME;
421         return in;
422     }
423
424     /**
425      * Get the DSpace bundle name corresponding to the <code>USE</code> attribute of the file group enclosing this <code>file</code> element.
426      * @return DSpace bundle name
427      * @throws MetadataValidationException when there is no USE attribute on the enclosing fileGrp.
428      */

429     public static String JavaDoc getBundleName(Element file)
430         throws MetadataValidationException
431     {
432         Element fg = file.getParentElement();
433         String JavaDoc fgUse = fg.getAttributeValue("USE");
434         if (fgUse == null)
435             throw new MetadataValidationException("Invalid METS Manifest: every fileGrp element must have a USE attribute.");
436         return normalizeBundleName(fgUse);
437     }
438
439     /**
440      * Get the "local" file name of this <code>file</code> or <code>mdRef</code> element.
441      * By "local" we mean the reference to the actual resource containing
442      * the data for this file, e.g. a relative path within a Zip or tar archive
443      * if the METS is serving as a manifest for that sort of package.
444      * @return "local" file name (i.e. relative to package or content
445      * directory) corresponding to this <code>file</code> or <code>mdRef</code> element.
446      * @throws MetadataValidationException when there is not enough information to find a resource identifier.
447      */

448     public static String JavaDoc getFileName(Element file)
449         throws MetadataValidationException
450     {
451         Element ref;
452         if (file.getName().equals("file"))
453         {
454             ref = file.getChild("FLocat", metsNS);
455             if (ref == null)
456             {
457                 // check for forbidden FContent child first:
458
if (file.getChild("FContent", metsNS) == null)
459                     throw new MetadataValidationException("Invalid METS Manifest: Every file element must have FLocat child.");
460                 else
461                     throw new MetadataValidationException("Invalid METS Manifest: file element has forbidden FContent child, only FLocat is allowed.");
462             }
463         }
464         else if (file.getName().equals("mdRef"))
465             ref = file;
466         else
467             throw new MetadataValidationException("getFileName() called with recognized element type: "+file.toString());
468         String JavaDoc loctype = ref.getAttributeValue("LOCTYPE");
469         if (loctype != null && loctype.equals("URL"))
470         {
471             String JavaDoc result = ref.getAttributeValue("href", xlinkNS);
472             if (result == null)
473                 throw new MetadataValidationException("Invalid METS Manifest: FLocat/mdRef is missing the required xlink:href attribute.");
474             return result;
475         }
476         throw new MetadataValidationException("Invalid METS Manifest: FLocat/mdRef does not have LOCTYPE=\"URL\" attribute.");
477     }
478
479     /**
480      * Returns file element corresponding to primary bitstream.
481      * There is <i>ONLY</i> a primary bitstream if the first <code>div</code> under
482      * first </code>structMap</code> has an </code>fptr</code>.
483      *
484      * @return file element of Item's primary bitstream, or null if there is none.
485      */

486     public Element getPrimaryBitstream()
487         throws MetadataValidationException
488     {
489         Element firstDiv = getFirstDiv();
490         Element fptr = firstDiv.getChild("fptr", metsNS);
491         if (fptr == null)
492             return null;
493         String JavaDoc id = fptr.getAttributeValue("FILEID");
494         if (id == null)
495             throw new MetadataValidationException("fptr for Primary Bitstream is missing the required FILEID attribute.");
496         Element result = getElementByXPath("descendant::mets:file[@ID=\""+id+"\"]", false);
497         if (result == null)
498             throw new MetadataValidationException("Cannot find file element for Primary Bitstream: looking for ID="+id);
499         return result;
500     }
501
502     /** Get the metadata type from within a *mdSec element.
503      * @return metadata type name.
504      */

505     public String JavaDoc getMdType(Element mdSec)
506         throws MetadataValidationException
507     {
508         Element md = mdSec.getChild("mdRef", metsNS);
509         if (md == null)
510             md = mdSec.getChild("mdWrap", metsNS);
511         if (md == null)
512             throw new MetadataValidationException("Invalid METS Manifest: ?mdSec element has neither mdRef nor mdWrap child.");
513         String JavaDoc result = md.getAttributeValue("MDTYPE");
514         if (result != null && result.equals("OTHER"))
515             result = md.getAttributeValue("OTHERMDTYPE");
516         if (result == null)
517             throw new MetadataValidationException("Invalid METS Manifest: "+md.getName()+" has no MDTYPE or OTHERMDTYPE attribute.");
518         return result;
519     }
520
521     /**
522      * Returns MIME type of metadata content, if available.
523      * @return MIMEtype word, or null if none is available.
524      */

525     public String JavaDoc getMdContentMimeType(Element mdSec)
526         throws MetadataValidationException
527     {
528         Element mdWrap = mdSec.getChild("mdWrap", metsNS);
529         if (mdWrap != null)
530         {
531             String JavaDoc mimeType = mdWrap.getAttributeValue("MIMETYPE");
532             if (mimeType == null && mdWrap.getChild("xmlData", metsNS) != null)
533             mimeType = "text/xml";
534             return mimeType;
535         }
536         Element mdRef = mdSec.getChild("mdRef", metsNS);
537         if (mdRef != null)
538             return mdRef.getAttributeValue("MIMETYPE");
539         return null;
540     }
541
542     /**
543      * Return contents of *md element as List of XML Element objects.
544      * Gets content, dereferecing mdRef if necessary, or decoding and parsing
545      * a binData that contains XML.
546      * @return contents of metadata section, or empty list if no XML content is available.
547      * @throws MetadataValidationException if METS is invalid, or there is an error parsing the XML.
548      */

549     public List JavaDoc getMdContentAsXml(Element mdSec, Mdref callback)
550         throws MetadataValidationException, IOException JavaDoc, SQLException JavaDoc, AuthorizeException
551     {
552         try
553         {
554             Element mdRef = null;
555             Element mdWrap = mdSec.getChild("mdWrap", metsNS);
556             if (mdWrap != null)
557             {
558                 Element xmlData = mdWrap.getChild("xmlData", metsNS);
559                 if (xmlData == null)
560                 {
561                     Element bin = mdWrap.getChild("binData", metsNS);
562                     if (bin == null)
563                         throw new MetadataValidationException("Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");
564
565                     // if binData is actually XML, return it; otherwise ignore.
566
else
567                     {
568                         String JavaDoc mimeType = mdWrap.getAttributeValue("MIMETYPE");
569                         if (mimeType != null && mimeType.equalsIgnoreCase("text/xml"))
570                         {
571                             byte value[] = Base64.decodeBase64(bin.getText().getBytes());
572                             Document mdd = parser.build(new ByteArrayInputStream JavaDoc(value));
573                             List JavaDoc result = new ArrayList JavaDoc(1);
574                             result.add(mdd.getRootElement());
575                             return result;
576                         }
577                         else
578                         {
579                             log.warn("Ignoring binData section because MIMETYPE is not XML, but: "+mimeType);
580                             return new ArrayList JavaDoc(0);
581                         }
582                    }
583                 }
584                 else
585                 {
586                     return xmlData.getChildren();
587                 }
588             }
589             else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null)
590             {
591                 String JavaDoc mimeType = mdRef.getAttributeValue("MIMETYPE");
592                 if (mimeType != null && mimeType.equalsIgnoreCase("text/xml"))
593                 {
594                     Document mdd = parser.build(callback.getInputStream(mdRef));
595                     List JavaDoc result = new ArrayList JavaDoc(1);
596                     result.add(mdd.getRootElement());
597                     return result;
598                 }
599                 else
600                 {
601                     log.warn("Ignoring mdRef section because MIMETYPE is not XML, but: "+mimeType);
602                     return new ArrayList JavaDoc(0);
603                 }
604             }
605             else
606                 throw new MetadataValidationException("Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
607         }
608         catch (JDOMException je)
609         {
610             throw new MetadataValidationException("Error parsing or validating metadata section in mdRef or binData within "+mdSec.toString(), je);
611         }
612
613     }
614
615     /**
616      * Return contents of *md element as stream.
617      * Gets content, dereferecing mdRef if necessary, or decoding
618      * a binData element if necessary.
619      * @return Stream containing contents of metadata section. Never returns null.
620      * @throws MetadataValidationException if METS format does not contain any metadata.
621      */

622     public InputStream JavaDoc getMdContentAsStream(Element mdSec, Mdref callback)
623         throws MetadataValidationException, IOException JavaDoc, SQLException JavaDoc, AuthorizeException
624     {
625         Element mdRef = null;
626         Element mdWrap = mdSec.getChild("mdWrap", metsNS);
627         if (mdWrap != null)
628         {
629             Element xmlData = mdWrap.getChild("xmlData", metsNS);
630             if (xmlData == null)
631             {
632                 Element bin = mdWrap.getChild("binData", metsNS);
633                 if (bin == null)
634                     throw new MetadataValidationException("Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");
635
636                 else
637                 {
638                     byte value[] = Base64.decodeBase64(bin.getText().getBytes());
639                     return new ByteArrayInputStream JavaDoc(value);
640                 }
641             }
642             else
643             {
644                 XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
645                 return new ByteArrayInputStream JavaDoc(
646                         outputPretty.outputString(xmlData.getChildren()).getBytes());
647             }
648         }
649         else if ((mdRef = mdSec.getChild("mdRef", metsNS)) != null)
650         {
651             return callback.getInputStream(mdRef);
652         }
653         else
654             throw new MetadataValidationException("Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
655     }
656
657
658     // special call to crosswalk the guts of a metadata *Sec (dmdSec, amdSec)
659
// because mdRef and mdWrap have to be handled differently.
660
// It's a lot like getMdContentAsXml but cannot use that because xwalk
661
// should be called with root element OR list depending on what was given.
662
private void crosswalkMdContent(Element mdSec, Mdref callback,
663                 IngestionCrosswalk xwalk, Context context, DSpaceObject dso)
664         throws CrosswalkException, IOException JavaDoc, SQLException JavaDoc, AuthorizeException
665     {
666         List JavaDoc xml = getMdContentAsXml(mdSec,callback);
667
668         // if we get inappropriate metadata, e.g. PREMIS for Item, let it go.
669
try
670         {
671             xwalk.ingest(context, dso, xml);
672         }
673         catch (CrosswalkObjectNotSupported e)
674         {
675             log.warn("Skipping metadata for inappropriate type of object: Object="+dso.toString()+", error="+e.toString());
676         }
677     }
678
679     // return first <div> of first <structMap>;
680
// in DSpace profile, this is where item-wide dmd and other metadata
681
// lives as IDrefs.
682
private Element getFirstDiv()
683         throws MetadataValidationException
684     {
685         Element sm = mets.getChild("structMap", metsNS);
686         if (sm == null)
687             throw new MetadataValidationException("METS document is missing the required structMap element.");
688
689         Element result = sm.getChild("div", metsNS);
690         if (result == null)
691             throw new MetadataValidationException("METS document is missing the required first div element in first structMap.");
692
693         log.debug("Got firstDiv result="+result.toString());
694         return (Element)result;
695     }
696
697     // return a single Element node found by one-off path.
698
// use only when path varies each time you call it.
699
private Element getElementByXPath(String JavaDoc path, boolean nullOk)
700         throws MetadataValidationException
701     {
702         try
703         {
704             XPath xpath = XPath.newInstance(path);
705             xpath.addNamespace(metsNS);
706             xpath.addNamespace(xlinkNS);
707             Object JavaDoc result = xpath.selectSingleNode(mets);
708             if (result == null && nullOk)
709                 return null;
710             else if (result instanceof Element)
711                 return (Element)result;
712             else
713                 throw new MetadataValidationException("METSManifest: Failed to resolve XPath, path=\""+path+"\"");
714         }
715         catch (JDOMException je)
716         {
717             throw new MetadataValidationException("METSManifest: Failed to resolve XPath, path=\""+path+"\"", je);
718         }
719     }
720
721     // Find crosswalk for the indicated metadata type (e.g. "DC", "MODS")
722
// The crosswalk plugin name MAY be indirected in config file,
723
// through an entry like
724
// mets.submission.crosswalk.{mdType} = {pluginName}
725
// e.g.
726
// mets.submission.crosswalk.DC = mysite-QDC
727
private IngestionCrosswalk getCrosswalk(String JavaDoc type)
728     {
729         String JavaDoc xwalkName = ConfigurationManager.getProperty(CONFIG_METADATA_PREFIX + type);
730         if (xwalkName == null)
731             xwalkName = type;
732         return (IngestionCrosswalk)
733           PluginManager.getNamedPlugin(IngestionCrosswalk.class, xwalkName);
734     }
735
736     /**
737      * Gets all dmdSec elements containing metadata for the DSpace Item.
738      *
739      * @return array of Elements, each a dmdSec. May be empty but NOT null.
740      * @throws MetadataValidationException if the METS is missing a reference to item-wide
741      * DMDs in the correct place.
742      */

743     public Element[] getItemDmds()
744         throws MetadataValidationException
745     {
746         // div@DMDID is actually IDREFS, a space-separated list of IDs:
747
Element firstDiv = getFirstDiv();
748         String JavaDoc dmds = firstDiv.getAttributeValue("DMDID");
749         if (dmds == null)
750             throw new MetadataValidationException("Invalid METS: Missing reference to Item descriptive metadata, first div on first structmap must have a DMDID attribute.");
751         String JavaDoc dmdID[] = dmds.split("\\s+");
752         Element result[] = new Element[dmdID.length];
753
754         for (int i = 0; i < dmdID.length; ++i)
755             result[i] = getElementByXPath("mets:dmdSec[@ID=\""+dmdID[i]+"\"]", false);
756         return result;
757     }
758
759     /**
760      * Return rights metadata section(s) relevant to item as a whole.
761      * @return array of rightsMd elements, possibly empty but never null.
762      * @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing.
763      */

764     public Element[] getItemRightsMD()
765         throws MetadataValidationException
766     {
767         // div@ADMID is actually IDREFS, a space-separated list of IDs:
768
Element firstDiv = getFirstDiv();
769         String JavaDoc amds = firstDiv.getAttributeValue("ADMID");
770         if (amds == null)
771         {
772             log.debug("getItemRightsMD: No ADMID references found.");
773             return new Element[0];
774         }
775         String JavaDoc amdID[] = amds.split("\\s+");
776         List JavaDoc resultList = new ArrayList JavaDoc();
777         for (int i = 0; i < amdID.length; ++i)
778         {
779             List JavaDoc rmds = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false).
780                             getChildren("rightsMD", metsNS);
781             if (rmds.size() > 0)
782                 resultList.addAll(rmds);
783         }
784         return (Element[])resultList.toArray(new Element[resultList.size()]);
785     }
786
787     /**
788      * Invokes appropriate crosswalks on Item-wide descriptive metadata.
789      */

790     public void crosswalkItem(Context context, Item item, Element dmd, Mdref callback)
791         throws MetadataValidationException,
792                CrosswalkException, IOException JavaDoc, SQLException JavaDoc, AuthorizeException
793     {
794         String JavaDoc type = getMdType(dmd);
795         IngestionCrosswalk xwalk = getCrosswalk(type);
796
797         if (xwalk == null)
798             throw new MetadataValidationException("Cannot process METS Manifest: "+
799                 "No crosswalk found for MDTYPE="+type);
800         crosswalkMdContent(dmd, callback, xwalk, context, item);
801     }
802
803     /**
804      * Crosswalk the metadata associated with a particular <code>file</code>
805      * element into the bitstream it corresponds to.
806      * @param context a dspace context.
807      * @param bs bitstream target of the crosswalk
808      * @param fileId value of ID attribute in the file element responsible
809      * for the contents of that bitstream.
810      */

811     public void crosswalkBitstream(Context context, Bitstream bitstream,
812                                    String JavaDoc fileId, Mdref callback)
813         throws MetadataValidationException,
814                CrosswalkException, IOException JavaDoc, SQLException JavaDoc, AuthorizeException
815     {
816         Element file = getElementByXPath("descendant::mets:file[@ID=\""+fileId+"\"]", false);
817         if (file == null)
818             throw new MetadataValidationException("Failed in Bitstream crosswalk, Could not find file element with ID="+fileId);
819
820         // In DSpace METS SIP spec, admin metadata is only "highly
821
// recommended", not "required", so it is OK if there is no ADMID.
822
String JavaDoc amds = file.getAttributeValue("ADMID");
823         if (amds == null)
824         {
825             log.warn("Got no bitstream ADMID, file@ID="+fileId);
826             return;
827         }
828         String JavaDoc amdID[] = amds.split("\\s+");
829         for (int i = 0; i < amdID.length; ++i)
830         {
831             List JavaDoc techMDs = getElementByXPath("mets:amdSec[@ID=\""+amdID[i]+"\"]", false).
832                                  getChildren("techMD", metsNS);
833             Iterator JavaDoc ti = techMDs.iterator();
834             while (ti.hasNext())
835             {
836                 Element techMD = (Element)ti.next();
837                 if (techMD != null)
838                 {
839                     String JavaDoc type = getMdType(techMD);
840                     IngestionCrosswalk xwalk = getCrosswalk(type);
841                     log.debug("Got bitstream techMD of type="+type+", for file ID="+fileId);
842                      
843                     if (xwalk == null)
844                         throw new MetadataValidationException("Cannot process METS Manifest: "+
845                             "No crosswalk found for techMD MDTYPE="+type);
846                     crosswalkMdContent(techMD, callback, xwalk, context, bitstream);
847                 }
848             }
849         }
850     }
851
852     /**
853      * Find Handle (if any) identifier labelling this manifest.
854      * @return handle (never null)
855      * @throws MetadataValidationException if no handle available.
856      */

857     public String JavaDoc getHandle()
858         throws MetadataValidationException
859     {
860         // TODO: XXX Make configurable? Handle optionally passed in?
861
// FIXME: Not sure if OBJID is really the right place
862

863         String JavaDoc handle = mets.getAttributeValue("OBJID");
864
865         if (handle != null && handle.startsWith("hdl:"))
866         {
867             return handle.substring(4);
868         }
869         else
870         {
871             throw new MetadataValidationException("Item has no valid Handle (OBJID)");
872         }
873     }
874 }
875
Popular Tags