KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > content > crosswalk > MODSDisseminationCrosswalk


1 /*
2  * MODSDisseminationCrosswalk.java
3  *
4  * Version: $Revision: 1.2 $
5  *
6  * Date: $Date: 2006/03/27 02:57:09 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40
41 package org.dspace.content.crosswalk;
42
43 import java.io.InputStream JavaDoc;
44 import java.io.IOException JavaDoc;
45 import java.sql.SQLException JavaDoc;
46 import java.util.Iterator JavaDoc;
47 import java.util.List JavaDoc;
48 import java.util.ArrayList JavaDoc;
49 import java.util.HashMap JavaDoc;
50 import java.util.Properties JavaDoc;
51 import java.util.Enumeration JavaDoc;
52 import java.io.OutputStream JavaDoc;
53 import java.io.StringReader JavaDoc;
54 import java.io.File JavaDoc;
55 import java.io.FileInputStream JavaDoc;
56
57 import java.sql.SQLException JavaDoc;
58 import org.apache.log4j.Logger;
59
60 import org.dspace.core.Context;
61 import org.dspace.core.Constants;
62 import org.dspace.content.Item;
63 import org.dspace.content.DCDate;
64 import org.dspace.content.DCValue;
65 import org.dspace.content.DSpaceObject;
66 import org.dspace.authorize.AuthorizeException;
67 import org.dspace.core.ConfigurationManager;
68 import org.dspace.core.SelfNamedPlugin;
69
70 import org.jdom.*;
71 import org.jdom.output.XMLOutputter;
72 import org.jdom.output.Format;
73 import org.jdom.input.SAXBuilder;
74 import org.jdom.input.JDOMParseException;
75 import org.jdom.xpath.XPath;
76
77 /**
78  * Configurable MODS Crosswalk
79  * <p>
80  * This class supports multiple dissemination crosswalks from DSpace
81  * internal data to the MODS XML format
82  * (see <a HREF="http://www.loc.gov/standards/mods/">http://www.loc.gov/standards/mods/</a>.)
83  * <p>
84  * It registers multiple Plugin names, which it reads from
85  * the DSpace configuration as follows:
86  *
87  * <h3>Configuration</h3>
88  * Every key starting with <code>"crosswalk.mods.properties."</code> describes a
89  * MODS crosswalk. Everything after the last period is the <em>plugin name</em>,
90  * and the value is the pathname (relative to <code><em>dspace.dir</em>/config</code>)
91  * of the crosswalk configuration file.
92  * <p>
93  * You can have two names point to the same crosswalk,
94  * just add two configuration entries with the same value, e.g.
95  * <pre>
96  * crosswalk.mods.properties.MODS = crosswalks/mods.properties
97  * crosswalk.mods.properties.default = crosswalks/mods.properties
98  * </pre>
99  * The first line creates a plugin with the name <code>"MODS"</code>
100  * which is configured from the file <em>dspace-dir</em><code>/config/crosswalks/mods.properties</code>.
101  * <p>
102  * Since there is significant overhead in reading the properties file to
103  * configure the crosswalk, and a crosswalk instance may be used any number
104  * of times, we recommend caching one instance of the crosswalk for each
105  * name and simply reusing those instances. The PluginManager does this
106  * by default.
107  *
108  * @author Larry Stone
109  * @version $Revision: 1.2 $
110  */

111 public class MODSDisseminationCrosswalk extends SelfNamedPlugin
112     implements DisseminationCrosswalk
113 {
114     /** log4j category */
115     private static Logger log = Logger.getLogger(MODSDisseminationCrosswalk.class);
116
117     private final static String JavaDoc CONFIG_PREFIX = "crosswalk.mods.properties.";
118
119     /**
120      * Fill in the plugin alias table from DSpace configuration entries
121      * for configuration files for flavors of MODS crosswalk:
122      */

123     private static String JavaDoc aliases[] = null;
124     static
125     {
126         List JavaDoc aliasList = new ArrayList JavaDoc();
127         Enumeration JavaDoc pe = ConfigurationManager.propertyNames();
128         while (pe.hasMoreElements())
129         {
130             String JavaDoc key = (String JavaDoc)pe.nextElement();
131             if (key.startsWith(CONFIG_PREFIX))
132                 aliasList.add(key.substring(CONFIG_PREFIX.length()));
133         }
134         aliases = (String JavaDoc[])aliasList.toArray(new String JavaDoc[aliasList.size()]);
135     }
136
137     public static String JavaDoc[] getPluginNames()
138     {
139         return aliases;
140     }
141
142     /**
143      * MODS namespace.
144      */

145     public static final Namespace MODS_NS =
146         Namespace.getNamespace("mods", "http://www.loc.gov/mods/v3");
147
148     private static final Namespace XLINK_NS =
149         Namespace.getNamespace("xlink", "http://www.w3.org/1999/xlink");
150
151     private static final Namespace namespaces[] = { MODS_NS, XLINK_NS };
152
153     /** URL of MODS XML Schema */
154     public static final String JavaDoc MODS_XSD =
155         "http://www.loc.gov/standards/mods/v3/mods-3-1.xsd";
156
157     private static final String JavaDoc schemaLocation =
158         MODS_NS.getURI()+" "+MODS_XSD;
159
160     private static XMLOutputter outputUgly = new XMLOutputter();
161     private static XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
162     private static SAXBuilder builder = new SAXBuilder();
163
164     private HashMap JavaDoc modsMap = null;
165
166     /**
167      * Container for crosswalk mapping: expressed as "triple" of:
168      * 1. QDC field name (really field.qualifier).
169      * 2. XML subtree to add to MODS record.
170      * 3. XPath expression showing places to plug in the value.
171      */

172     static class modsTriple
173     {
174         public String JavaDoc qdc = null;
175         public Element xml = null;
176         public XPath xpath = null;
177
178         /**
179          * Initialize from text versions of QDC, XML and XPath.
180          * The DC stays a string; parse the XML with appropriate
181          * namespaces; "compile" the XPath.
182          */

183         public static modsTriple create(String JavaDoc qdc, String JavaDoc xml, String JavaDoc xpath)
184         {
185             modsTriple result = new modsTriple();
186
187             final String JavaDoc prolog = "<mods xmlns:"+MODS_NS.getPrefix()+"=\""+MODS_NS.getURI()+"\" "+
188                             "xmlns:"+XLINK_NS.getPrefix()+"=\""+XLINK_NS.getURI()+"\">";
189             final String JavaDoc postlog = "</mods>";
190             try
191             {
192                 result.qdc = qdc;
193                 result.xpath = XPath.newInstance(xpath);
194                 result.xpath.addNamespace(MODS_NS.getPrefix(), MODS_NS.getURI());
195                 result.xpath.addNamespace(XLINK_NS);
196                 Document d = builder.build(new StringReader JavaDoc(prolog+xml+postlog));
197                 result.xml = (Element)d.getRootElement().getContent(0);
198             }
199             catch (JDOMException je)
200             {
201                 log.error("Error initializing modsTriple(\""+qdc+"\",\""+xml+"\",\""+xpath+"\"): got "+je.toString());
202                 return null;
203             }
204             catch (IOException JavaDoc je)
205             {
206                 log.error("Error initializing modsTriple(\""+qdc+"\",\""+xml+"\",\""+xpath+"\"): got "+je.toString());
207                 return null;
208             }
209             return result;
210         }
211     }
212
213     /**
214      * Initialize Crosswalk table from a properties file
215      * which itself is the value of the DSpace configuration property
216      * "crosswalk.mods.properties.X", where "X" is the alias name of this instance.
217      * Each instance may be configured with a separate mapping table.
218      *
219      * The MODS crosswalk configuration properties follow the format:
220      *
221      * {field-name} = {XML-fragment} | {XPath}
222      *
223      * 1. qualified DC field name is of the form
224      * {MDschema}.{element}.{qualifier}
225      *
226      * e.g. dc.contributor.author
227      *
228      * 2. XML fragment is prototype of metadata element, with empty or "%s"
229      * placeholders for value(s). NOTE: Leave the %s's in becaue
230      * it's much easier then to see if something is broken.
231      *
232      * 3. XPath expression listing point(s) in the above XML where
233      * the value is to be inserted. Context is the element itself.
234      *
235      * Example properties line:
236      *
237      * dc.description.abstract = <mods:abstract>%s</mods:abstract> | text()
238      *
239      */

240     private void initMap()
241         throws CrosswalkInternalException
242     {
243         if (modsMap != null)
244             return;
245         String JavaDoc myAlias = getPluginInstanceName();
246         if (myAlias == null)
247         {
248             log.error("Must use PluginManager to instantiate MODSDisseminationCrosswalk so the class knows its name.");
249             return;
250         }
251         String JavaDoc cmPropName = CONFIG_PREFIX+myAlias;
252         String JavaDoc propsFilename = ConfigurationManager.getProperty(cmPropName);
253         if (propsFilename == null)
254         {
255             String JavaDoc msg = "MODS crosswalk missing "+
256                 "configuration file for crosswalk named \""+myAlias+"\"";
257             log.error(msg);
258             throw new CrosswalkInternalException(msg);
259         }
260         else
261         {
262             String JavaDoc parent = ConfigurationManager.getProperty("dspace.dir") +
263                 File.separator + "config" + File.separator;
264             File JavaDoc propsFile = new File JavaDoc(parent, propsFilename);
265             Properties JavaDoc modsConfig = new Properties JavaDoc();
266             try
267             {
268                 modsConfig.load(new FileInputStream JavaDoc(propsFile));
269             }
270             catch (IOException JavaDoc e)
271             {
272                 log.error("Error opening or reading MODS properties file: "+propsFile.toString()+": "+e.toString());
273                 throw new CrosswalkInternalException("MODS crosswalk cannot "+
274                     "open config file: "+e.toString());
275             }
276             modsMap = new HashMap JavaDoc();
277             Enumeration JavaDoc pe = modsConfig.propertyNames();
278             while (pe.hasMoreElements())
279             {
280                 String JavaDoc qdc = (String JavaDoc)pe.nextElement();
281                 String JavaDoc val = modsConfig.getProperty(qdc);
282                 String JavaDoc pair[] = val.split("\\s+\\|\\s+", 2);
283                 if (pair.length < 2)
284                     log.warn("Illegal MODS mapping in "+propsFile.toString()+", line = "+
285                             qdc + " = " + val);
286                 else
287                 {
288                     modsTriple trip = modsTriple.create(qdc, pair[0], pair[1]);
289                     if (trip != null)
290                         modsMap.put(qdc, trip);
291                 }
292             }
293         }
294     }
295
296     public Namespace[] getNamespaces()
297     {
298         return namespaces;
299     }
300
301     public String JavaDoc getSchemaLocation()
302     {
303         return schemaLocation;
304     }
305
306     /**
307      * Returns object's metadata in MODS format, as List of XML structure nodes.
308      */

309     public List JavaDoc disseminateList(DSpaceObject dso)
310         throws CrosswalkException,
311                IOException JavaDoc, SQLException JavaDoc, AuthorizeException
312     {
313         return disseminateListInternal(dso, true);
314     }
315
316     public Element disseminateElement(DSpaceObject dso)
317         throws CrosswalkException,
318                IOException JavaDoc, SQLException JavaDoc, AuthorizeException
319     {
320         Element root = new Element("mods", MODS_NS);
321         root.setAttribute("schemaLocation", schemaLocation, XSI_NS);
322         root.addContent(disseminateListInternal(dso,false));
323         return root;
324     }
325
326     private List JavaDoc disseminateListInternal(DSpaceObject dso, boolean addSchema)
327         throws CrosswalkException,
328                IOException JavaDoc, SQLException JavaDoc, AuthorizeException
329     {
330         if (dso.getType() != Constants.ITEM)
331             throw new CrosswalkObjectNotSupported("MODSDisseminationCrosswalk can only crosswalk an Item.");
332         Item item = (Item)dso;
333         initMap();
334
335         DCValue[] dc = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);
336         List JavaDoc result = new ArrayList JavaDoc(dc.length);
337         for (int i = 0; i < dc.length; i++)
338         {
339             // Compose qualified DC name - schema.element[.qualifier]
340
// e.g. "dc.title", "dc.subject.lcc", "lom.Classification.Keyword"
341
String JavaDoc qdc = dc[i].schema+"."+
342                          ((dc[i].qualifier == null) ? dc[i].element
343                             : (dc[i].element + "." + dc[i].qualifier));
344
345             modsTriple trip = (modsTriple)modsMap.get(qdc);
346             if (trip == null)
347                 log.warn("WARNING: "+getPluginInstanceName()+": No MODS mapping for \"" + qdc+"\"");
348             else
349             {
350                 try
351                 {
352                     Element me = (Element)trip.xml.clone();
353                     if (addSchema)
354                         me.setAttribute("schemaLocation", schemaLocation, XSI_NS);
355                     Iterator JavaDoc ni = trip.xpath.selectNodes(me).iterator();
356                     if (!ni.hasNext())
357                         log.warn("XPath \""+trip.xpath.getXPath()+
358                           "\" found no elements in \""+
359                           outputUgly.outputString(me)+
360                           "\", qdc="+qdc);
361                     while (ni.hasNext())
362                     {
363                         Object JavaDoc what = ni.next();
364                         if (what instanceof Element)
365                             ((Element)what).setText(dc[i].value);
366                         else if (what instanceof Attribute)
367                             ((Attribute)what).setValue(dc[i].value);
368                         else if (what instanceof Text)
369                             ((Text)what).setText(dc[i].value);
370                         else
371                             log.warn("Got unknown object from XPath, class="+what.getClass().getName());
372                     }
373                     result.add(me);
374                 }
375                 catch (JDOMException je)
376                 {
377                     log.error("Error following XPath in modsTriple: context="+
378                         outputUgly.outputString(trip.xml)+
379                         ", xpath="+trip.xpath.getXPath()+", exception="+
380                         je.toString());
381                 }
382             }
383         }
384         return result;
385     }
386
387     public boolean canDisseminate(DSpaceObject dso)
388     {
389         return true;
390     }
391
392     public boolean preferList()
393     {
394         return false;
395     }
396 }
397
Popular Tags