KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > app > mets > METSExport


1 /*
2  * METSExport.java
3  *
4  * Version: $Revision: 1.12 $
5  *
6  * Date: $Date: 2005/10/18 19:46:13 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40 package org.dspace.app.mets;
41
42 import java.io.File JavaDoc;
43 import java.io.FileInputStream JavaDoc;
44 import java.io.FileOutputStream JavaDoc;
45 import java.io.IOException JavaDoc;
46 import java.io.InputStream JavaDoc;
47 import java.io.OutputStream JavaDoc;
48 import java.net.URLEncoder JavaDoc;
49 import java.sql.SQLException JavaDoc;
50 import java.util.Date JavaDoc;
51 import java.util.Properties JavaDoc;
52
53 import org.apache.commons.cli.CommandLine;
54 import org.apache.commons.cli.CommandLineParser;
55 import org.apache.commons.cli.HelpFormatter;
56 import org.apache.commons.cli.Options;
57 import org.apache.commons.cli.PosixParser;
58 import org.dspace.authorize.AuthorizeException;
59 import org.dspace.authorize.AuthorizeManager;
60 import org.dspace.content.Bitstream;
61 import org.dspace.content.BitstreamFormat;
62 import org.dspace.content.Bundle;
63 import org.dspace.content.Collection;
64 import org.dspace.content.DCValue;
65 import org.dspace.content.DSpaceObject;
66 import org.dspace.content.Item;
67 import org.dspace.content.ItemIterator;
68 import org.dspace.core.ConfigurationManager;
69 import org.dspace.core.Constants;
70 import org.dspace.core.Context;
71 import org.dspace.core.Utils;
72 import org.dspace.handle.HandleManager;
73 import org.dspace.app.webui.util.UIUtil;
74
75 import edu.harvard.hul.ois.mets.Agent;
76 import edu.harvard.hul.ois.mets.AmdSec;
77 import edu.harvard.hul.ois.mets.BinData;
78 import edu.harvard.hul.ois.mets.Checksumtype;
79 import edu.harvard.hul.ois.mets.Div;
80 import edu.harvard.hul.ois.mets.DmdSec;
81 import edu.harvard.hul.ois.mets.FLocat;
82 import edu.harvard.hul.ois.mets.FileGrp;
83 import edu.harvard.hul.ois.mets.FileSec;
84 import edu.harvard.hul.ois.mets.Loctype;
85 import edu.harvard.hul.ois.mets.MdWrap;
86 import edu.harvard.hul.ois.mets.Mdtype;
87 import edu.harvard.hul.ois.mets.Mets;
88 import edu.harvard.hul.ois.mets.MetsHdr;
89 import edu.harvard.hul.ois.mets.Name;
90 import edu.harvard.hul.ois.mets.RightsMD;
91 import edu.harvard.hul.ois.mets.Role;
92 import edu.harvard.hul.ois.mets.StructMap;
93 import edu.harvard.hul.ois.mets.Type;
94 import edu.harvard.hul.ois.mets.XmlData;
95 import edu.harvard.hul.ois.mets.helper.Base64;
96 import edu.harvard.hul.ois.mets.helper.MetsException;
97 import edu.harvard.hul.ois.mets.helper.MetsValidator;
98 import edu.harvard.hul.ois.mets.helper.MetsWriter;
99 import edu.harvard.hul.ois.mets.helper.PCData;
100 import edu.harvard.hul.ois.mets.helper.PreformedXML;
101
102 /**
103  * Tool for exporting DSpace AIPs with the metadata serialised in METS format
104  *
105  * @author Robert Tansley
106  * @version $Revision: 1.12 $
107  */

108 public class METSExport
109 {
110     private static int licenseFormat = -1;
111
112     private static Properties JavaDoc dcToMODS;
113
114     public static void main(String JavaDoc[] args) throws Exception JavaDoc
115     {
116         Context context = new Context();
117
118         init(context);
119
120         // create an options object and populate it
121
CommandLineParser parser = new PosixParser();
122
123         Options options = new Options();
124
125         options.addOption("c", "collection", true,
126                 "Handle of collection to export");
127         options.addOption("i", "item", true, "Handle of item to export");
128         options.addOption("a", "all", false, "Export all items in the archive");
129         options.addOption("d", "destination", true, "Destination directory");
130         options.addOption("h", "help", false, "Help");
131
132         CommandLine line = parser.parse(options, args);
133
134         if (line.hasOption('h'))
135         {
136             HelpFormatter myhelp = new HelpFormatter();
137             myhelp.printHelp("metsexport", options);
138             System.out
139                     .println("\nExport a collection: metsexport -c hdl:123.456/789");
140             System.out
141                     .println("Export an item: metsexport -i hdl:123.456/890");
142             System.out.println("Export everything: metsexport -a");
143
144             System.exit(0);
145         }
146
147         String JavaDoc dest = "";
148
149         if (line.hasOption('d'))
150         {
151             dest = line.getOptionValue('d');
152
153             // Make sure it ends with a file separator
154
if (!dest.endsWith(File.separator))
155             {
156                 dest = dest + File.separator;
157             }
158         }
159
160         if (line.hasOption('i'))
161         {
162             String JavaDoc handle = getHandleArg(line.getOptionValue('i'));
163
164             // Exporting a single item
165
DSpaceObject o = HandleManager.resolveToObject(context, handle);
166
167             if ((o != null) && o instanceof Item)
168             {
169                 writeAIP(context, (Item) o, dest);
170                 System.exit(0);
171             }
172             else
173             {
174                 System.err.println(line.getOptionValue('i')
175                         + " is not a valid item Handle");
176                 System.exit(1);
177             }
178         }
179
180         ItemIterator items = null;
181
182         if (line.hasOption('c'))
183         {
184             String JavaDoc handle = getHandleArg(line.getOptionValue('c'));
185
186             // Exporting a collection's worth of items
187
DSpaceObject o = HandleManager.resolveToObject(context, handle);
188
189             if ((o != null) && o instanceof Collection)
190             {
191                 items = ((Collection) o).getItems();
192             }
193             else
194             {
195                 System.err.println(line.getOptionValue('c')
196                         + " is not a valid collection Handle");
197                 System.exit(1);
198             }
199         }
200
201         if (line.hasOption('a'))
202         {
203             items = Item.findAll(context);
204         }
205
206         if (items == null)
207         {
208             System.err.println("Nothing to export specified!");
209             System.exit(1);
210         }
211
212         while (items.hasNext())
213         {
214             writeAIP(context, items.next(), dest);
215         }
216
217         context.abort();
218     }
219
220     /**
221      * Initialise various variables, read in config etc.
222      *
223      * @param context
224      * DSpace context
225      */

226     private static void init(Context context) throws SQLException JavaDoc, IOException JavaDoc
227     {
228         // Don't init again if initialised already
229
if (licenseFormat != -1)
230         {
231             return;
232         }
233
234         // Find the License format
235
BitstreamFormat bf = BitstreamFormat.findByShortDescription(context,
236                 "License");
237         licenseFormat = bf.getID();
238
239         // get path to DC->MODS map info file
240
String JavaDoc configFile = ConfigurationManager.getProperty("dspace.dir")
241                 + File.separator + "config" + File.separator + "dc2mods.cfg";
242
243         // Read it in
244
InputStream JavaDoc is = new FileInputStream JavaDoc(configFile);
245         dcToMODS = new Properties JavaDoc();
246         dcToMODS.load(is);
247     }
248
249     /**
250      * Write out the AIP for the given item to the given directory. A new
251      * directory will be created with the Handle (URL-encoded) as the directory
252      * name, and inside, a mets.xml file written, together with the bitstreams.
253      *
254      * @param context
255      * DSpace context to use
256      * @param item
257      * Item to write
258      * @param dest
259      * destination directory
260      */

261     public static void writeAIP(Context context, Item item, String JavaDoc dest)
262             throws SQLException JavaDoc, IOException JavaDoc, AuthorizeException, MetsException
263     {
264         System.out.println("Exporting item hdl:" + item.getHandle());
265
266         // Create aip directory
267
java.io.File JavaDoc aipDir = new java.io.File JavaDoc(dest
268                 + URLEncoder.encode("hdl:" + item.getHandle(), "UTF-8"));
269
270         if (!aipDir.mkdir())
271         {
272             // Couldn't make the directory for some reason
273
throw new IOException JavaDoc("Couldn't create " + aipDir.toString());
274         }
275
276         // Write the METS file
277
FileOutputStream JavaDoc out = new FileOutputStream JavaDoc(aipDir.toString()
278                 + java.io.File.separator + "mets.xml");
279         writeMETS(context, item, out, false);
280         out.close();
281
282         // Write bitstreams
283
Bundle[] bundles = item.getBundles();
284
285         for (int i = 0; i < bundles.length; i++)
286         {
287             Bitstream[] bitstreams = bundles[i].getBitstreams();
288
289             for (int b = 0; b < bitstreams.length; b++)
290             {
291                 // Skip license bitstream and unauthorized resources
292
if ((bitstreams[b].getFormat().getID() != licenseFormat)
293                         && AuthorizeManager.authorizeActionBoolean(context,
294                                 bitstreams[b], Constants.READ))
295                 {
296                     out = new FileOutputStream JavaDoc(aipDir.toString()
297                             + java.io.File.separator
298                             + bitstreams[b].getName());
299
300                     InputStream JavaDoc in = bitstreams[b].retrieve();
301                     Utils.bufferedCopy(in, out);
302                     out.close();
303                     in.close();
304                 }
305             }
306         }
307     }
308
309     /**
310      * Write METS metadata corresponding to the metadata for an item
311      *
312      * @param context
313      * DSpace context
314      * @param item
315      * DSpace item to create METS object for
316      * @param os
317      * A stream to write METS package to (UTF-8 encoding will be used)
318      * @param fullURL
319      * if <code>true</code>, the &lt;FLocat&gt; values for each
320      * bitstream will be the full URL for that bitstream. Otherwise,
321      * only the filename itself will be used.
322      */

323     public static void writeMETS(Context context, Item item, OutputStream JavaDoc os, boolean fullURL)
324             throws SQLException JavaDoc, IOException JavaDoc, AuthorizeException
325     {
326         try
327         {
328             init(context);
329
330             // Create the METS file
331
Mets mets = new Mets();
332
333             // Top-level stuff
334
mets.setOBJID("hdl:" + item.getHandle());
335             mets.setLABEL("DSpace Item");
336             mets.setSchema("mods", "http://www.loc.gov/mods/v3",
337                     "http://www.loc.gov/standards/mods/v3/mods-3-0.xsd");
338
339             // MetsHdr
340
MetsHdr metsHdr = new MetsHdr();
341             metsHdr.setCREATEDATE(new Date JavaDoc()); // FIXME: CREATEDATE is now:
342
// maybe should be item create
343
// date?
344

345             // Agent
346
Agent agent = new Agent();
347             agent.setROLE(Role.CUSTODIAN);
348             agent.setTYPE(Type.ORGANIZATION);
349
350             Name name = new Name();
351             name.getContent()
352                     .add(
353                             new PCData(ConfigurationManager
354                                     .getProperty("dspace.name")));
355             agent.getContent().add(name);
356
357             metsHdr.getContent().add(agent);
358
359             mets.getContent().add(metsHdr);
360
361             DmdSec dmdSec = new DmdSec();
362             dmdSec.setID("DMD_hdl_" + item.getHandle());
363
364             MdWrap mdWrap = new MdWrap();
365             mdWrap.setMDTYPE(Mdtype.MODS);
366
367             XmlData xmlData = new XmlData();
368             createMODS(item, xmlData);
369
370             mdWrap.getContent().add(xmlData);
371             dmdSec.getContent().add(mdWrap);
372             mets.getContent().add(dmdSec);
373
374             // amdSec
375
AmdSec amdSec = new AmdSec();
376             amdSec.setID("TMD_hdl_" + item.getHandle());
377
378             // FIXME: techMD here
379
// License as <rightsMD><mdWrap><binData>base64encoded</binData>...
380
InputStream JavaDoc licenseStream = findLicense(context, item);
381
382             if (licenseStream != null)
383             {
384                 RightsMD rightsMD = new RightsMD();
385                 MdWrap rightsMDWrap = new MdWrap();
386                 rightsMDWrap.setMIMETYPE("text/plain");
387                 rightsMDWrap.setMDTYPE(Mdtype.OTHER);
388                 rightsMDWrap.setOTHERMDTYPE("TEXT");
389
390                 BinData binData = new BinData();
391                 Base64 base64 = new Base64(licenseStream);
392
393                 binData.getContent().add(base64);
394                 rightsMDWrap.getContent().add(binData);
395                 rightsMD.getContent().add(rightsMDWrap);
396                 amdSec.getContent().add(rightsMD);
397             }
398
399             // FIXME: History data???? Nooooo!!!!
400
mets.getContent().add(amdSec);
401
402             // fileSec
403
FileSec fileSec = new FileSec();
404             boolean fileSecEmpty = true;
405
406             Bundle[] bundles = item.getBundles();
407
408             for (int i = 0; i < bundles.length; i++)
409             {
410                 Bitstream[] bitstreams = bundles[i].getBitstreams();
411
412                 // First: we skip the license bundle, since it's included
413
// elsewhere
414
if (bitstreams[0].getFormat().getID() == licenseFormat)
415                 {
416                     continue;
417                 }
418
419                 // Create a fileGrp
420
FileGrp fileGrp = new FileGrp();
421
422                 // Bundle name for USE attribute
423
if ((bundles[i].getName() != null)
424                         && !bundles[i].getName().equals(""))
425                 {
426                     fileGrp.setUSE(bundles[i].getName());
427                 }
428
429                 for (int bits = 0; bits < bitstreams.length; bits++)
430                 {
431                     // What's the persistent(-ish) ID?
432
String JavaDoc bitstreamPID = ConfigurationManager
433                             .getProperty("dspace.url")
434                             + "/bitstream/"
435                             + item.getHandle()
436                             + "/"
437                             + bitstreams[bits].getSequenceID()
438                             + "/"
439                             + UIUtil.encodeBitstreamName(bitstreams[bits].getName(),
440                                     "UTF-8");
441
442                     edu.harvard.hul.ois.mets.File file = new edu.harvard.hul.ois.mets.File();
443
444                     /*
445                      * ID: we use the unique part of the persistent ID, i.e. the
446                      * Handle + sequence number, but with _'s instead of /'s so
447                      * it's a legal xsd:ID.
448                      */

449                     String JavaDoc xmlIDstart = item.getHandle().replaceAll("/", "_")
450                             + "_";
451
452                     file.setID(xmlIDstart + bitstreams[bits].getSequenceID());
453
454                     String JavaDoc groupID = "GROUP_" + xmlIDstart
455                             + bitstreams[bits].getSequenceID();
456
457                     /*
458                      * If we're in THUMBNAIL or TEXT bundles, the bitstream is
459                      * extracted text or a thumbnail, so we use the name to work
460                      * out which bitstream to be in the same group as
461                      */

462                     if ((bundles[i].getName() != null)
463                             && (bundles[i].getName().equals("THUMBNAIL") || bundles[i]
464                                     .getName().equals("TEXT")))
465                     {
466                         // Try and find the original bitstream, and chuck the
467
// derived
468
// bitstream in the same group
469
Bitstream original = findOriginalBitstream(item,
470                                 bitstreams[bits]);
471
472                         if (original != null)
473                         {
474                             groupID = "GROUP_" + xmlIDstart
475                                     + original.getSequenceID();
476                         }
477                     }
478
479                     file.setGROUPID(groupID);
480                     file.setOWNERID(bitstreamPID);
481
482                     // FIXME: ADMID should point to appropriate TechMD section
483
// above
484
file
485                             .setMIMETYPE(bitstreams[bits].getFormat()
486                                     .getMIMEType());
487
488                     // FIXME: CREATED: no date
489
file.setSIZE(bitstreams[bits].getSize());
490                     file.setCHECKSUM(bitstreams[bits].getChecksum());
491                     file.setCHECKSUMTYPE(Checksumtype.MD5);
492
493                     // FLocat: filename is as in records, or full URL
494
// FIXME: Duplicate filenames and characters illegal to
495
// local OS may cause problems
496
FLocat flocat = new FLocat();
497                     flocat.setLOCTYPE(Loctype.URL);
498                     if (fullURL)
499                     {
500                         flocat.setXlinkHref(bitstreamPID);
501                     }
502                     else
503                     {
504                         flocat.setXlinkHref(bitstreams[bits].getName());
505                     }
506
507                     // Add FLocat to File, and File to FileGrp
508
file.getContent().add(flocat);
509                     fileGrp.getContent().add(file);
510                 }
511
512                 // Add fileGrp to fileSec
513
fileSec.getContent().add(fileGrp);
514                 fileSecEmpty = false;
515             }
516
517             // Add fileSec to document
518
if (!fileSecEmpty)
519             {
520                 mets.getContent().add(fileSec);
521             }
522             
523             // FIXME: Add Structmap here, but it is empty and we won't use it now.
524
StructMap structMap = new StructMap();
525             Div div = new Div();
526             structMap.getContent().add(div);
527             mets.getContent().add(structMap);
528
529             
530             mets.validate(new MetsValidator());
531
532             mets.write(new MetsWriter(os));
533         }
534         catch (MetsException e)
535         {
536             // We don't pass up a MetsException, so callers don't need to
537
// know the details of the METS toolkit
538
e.printStackTrace();
539             throw new IOException JavaDoc(e.getMessage());
540         }
541     }
542
543     /**
544      * Utility to find the license bitstream from an item
545      *
546      * @param context
547      * DSpace context
548      * @param item
549      * the item
550      * @return the license as a string
551      *
552      * @throws IOException
553      * if the license bitstream can't be read
554      */

555     private static InputStream JavaDoc findLicense(Context context, Item item)
556             throws SQLException JavaDoc, IOException JavaDoc, AuthorizeException
557     {
558         Bundle[] bundles = item.getBundles();
559
560         for (int i = 0; i < bundles.length; i++)
561         {
562             // Assume license will be in its own bundle
563
Bitstream[] bitstreams = bundles[i].getBitstreams();
564
565             if (bitstreams[0].getFormat().getID() == licenseFormat)
566             {
567                 // Read the license into a string
568
return bitstreams[0].retrieve();
569             }
570         }
571
572         // Oops! No license!
573
return null;
574     }
575
576     /**
577      * For a bitstream that's a thumbnail or extracted text, find the
578      * corresponding bitstream in the ORIGINAL bundle
579      *
580      * @param item
581      * the item we're dealing with
582      * @param derived
583      * the derived bitstream
584      *
585      * @return the corresponding original bitstream (or null)
586      */

587     private static Bitstream findOriginalBitstream(Item item, Bitstream derived)
588                     throws SQLException JavaDoc
589     {
590         Bundle[] bundles = item.getBundles();
591
592         // Filename of original will be filename of the derived bitstream
593
// minus the extension (last 4 chars - .jpg or .txt)
594
String JavaDoc originalFilename = derived.getName().substring(0,
595                 derived.getName().length() - 4);
596
597         // First find "original" bundle
598
for (int i = 0; i < bundles.length; i++)
599         {
600             if ((bundles[i].getName() != null)
601                     && bundles[i].getName().equals("ORIGINAL"))
602             {
603                 // Now find the corresponding bitstream
604
Bitstream[] bitstreams = bundles[i].getBitstreams();
605
606                 for (int bsnum = 0; bsnum < bitstreams.length; bsnum++)
607                 {
608                     if (bitstreams[bsnum].getName().equals(originalFilename))
609                     {
610                         return bitstreams[bsnum];
611                     }
612                 }
613             }
614         }
615
616         // Didn't find it
617
return null;
618     }
619
620     /**
621      * Create MODS metadata from the DC in the item, and add to the given
622      * XmlData METS object.
623      *
624      * @param item
625      * the item
626      * @param xmlData
627      * xmlData to add MODS to.
628      */

629     private static void createMODS(Item item, XmlData xmlData)
630     {
631         DCValue[] dc = item.getDC(Item.ANY, Item.ANY, Item.ANY);
632
633         StringBuffer JavaDoc modsXML = new StringBuffer JavaDoc();
634
635         for (int i = 0; i < dc.length; i++)
636         {
637             // Get the property name - element[.qualifier]
638
String JavaDoc propName = ((dc[i].qualifier == null) ? dc[i].element
639                     : (dc[i].element + "." + dc[i].qualifier));
640
641             String JavaDoc modsMapping = dcToMODS.getProperty(propName);
642
643             if (modsMapping == null)
644             {
645                 System.err.println("WARNING: No MODS mapping for " + propName);
646             }
647             else
648             {
649                 // Replace '%s' with DC value (with entities encoded)
650
modsXML.append(modsMapping.replaceAll("%s", Utils
651                         .addEntities(dc[i].value)));
652                 modsXML.append("\n"); // For readability
653
}
654         }
655
656         PreformedXML pXML = new PreformedXML(modsXML.toString());
657         xmlData.getContent().add(pXML);
658     }
659
660     /**
661      * Get the handle from the command line in the form 123.456/789. Doesn't
662      * matter if incoming handle has 'hdl:' or 'http://hdl....' before it.
663      *
664      * @param original
665      * Handle as passed in by user
666      * @return Handle as can be looked up in our table
667      */

668     private static String JavaDoc getHandleArg(String JavaDoc original)
669     {
670         if (original.startsWith("hdl:"))
671         {
672             return original.substring(4);
673         }
674
675         if (original.startsWith("http://hdl.handle.net/"))
676         {
677             return original.substring(22);
678         }
679
680         return original;
681     }
682 }
683
Popular Tags