KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > content > crosswalk > QDCCrosswalk


1 /*
2  * QDCCrosswalk.java
3  *
4  * Version: $Revision: 1.4 $
5  *
6  * Date: $Date: 2006/06/07 23:26:27 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40
41 package org.dspace.content.crosswalk;
42
43 import java.io.IOException JavaDoc;
44 import java.sql.SQLException JavaDoc;
45 import java.util.Iterator JavaDoc;
46 import java.util.List JavaDoc;
47 import java.util.ArrayList JavaDoc;
48 import java.util.HashMap JavaDoc;
49 import java.util.Properties JavaDoc;
50 import java.util.Enumeration JavaDoc;
51 import java.io.StringReader JavaDoc;
52 import java.io.File JavaDoc;
53 import java.io.FileInputStream JavaDoc;
54
55 import java.sql.SQLException JavaDoc;
56
57 import org.apache.log4j.Logger;
58
59 import org.dspace.core.Context;
60 import org.dspace.core.Constants;
61 import org.dspace.content.Item;
62 import org.dspace.content.DCValue;
63 import org.dspace.content.DSpaceObject;
64 import org.dspace.content.MetadataSchema;
65 import org.dspace.authorize.AuthorizeException;
66 import org.dspace.core.ConfigurationManager;
67 import org.dspace.core.SelfNamedPlugin;
68
69 import org.jdom.*;
70 import org.jdom.output.XMLOutputter;
71 import org.jdom.output.Format;
72 import org.jdom.input.SAXBuilder;
73 import org.jdom.input.JDOMParseException;
74
75 /**
76  * Configurable QDC Crosswalk
77  * <p>
78  * This class supports multiple dissemination crosswalks from DSpace
79  * internal data to the Qualified Dublin Core XML format
80  * (see <a HREF="http://dublincore.org/">http://dublincore.org/</a>
81  * <p>
82  * It registers multiple Plugin names, which it reads from
83  * the DSpace configuration as follows:
84  *
85  * <h3>Configuration</h3>
86  * Every key starting with <code>"crosswalk.qdc.properties."</code> describes a
87  * QDC crosswalk. Everything after the last period is the <em>plugin instance</em>,
88  * and the value is the pathname (relative to <code><em>dspace.dir</em>/config</code>)
89  * of the crosswalk configuration file.
90  * <p>
91  * You can have two aliases point to the same crosswalk,
92  * just add two configuration entries with the same value, e.g.
93  * <pre>
94  * crosswalk.qdc.properties.QDC = xwalk/qdc.properties
95  * crosswalk.qdc.properties.default = xwalk/qdc.properties
96  * </pre>
97  * The first line creates a plugin with the name <code>"QDC"</code>
98  * which is configured from the file <em>dspace-dir</em><code>/xwalk/qdc.properties</code>.
99  * <p>
100  * Since there is significant overhead in reading the properties file to
101  * configure the crosswalk, and a crosswalk instance may be used any number
102  * of times, we recommend caching one instance of the crosswalk for each
103  * alias and simply reusing those instances. The PluginManager does
104  * this by default.
105  * <p>
106  * Each named crosswalk has two other types of configuration lines:
107  * <p>
108  * XML Namespaces: all XML namespace prefixes used in the XML fragments below
109  * <em>must</em> be defined in the configuration as follows. Add a line of
110  * the form: <pre>
111  * crosswalk.qdc.namespace.{NAME}.{prefix} = {namespace-URI}</pre>
112  * e.g. for the namespaces <code>dc</code> and <code>dcterms</code>
113  * in the plugin named <code>QDC</code>, add these lines:
114  * <pre>crosswalk.qdc.namespace.QDC.dc = http://purl.org/dc/elements/1.1/
115  * crosswalk.qdc.namespace.QDC.dcterms = http://purl.org/dc/terms/</pre>
116  *
117  * <p>
118  * Finally, you need to declare an XML Schema URI for the plugin, with
119  * a line of the form <pre>
120  * crosswalk.qdc.schema.{NAME} = {schema-URI}</pre>
121  * for example,
122  * <pre>crosswalk.qdc.schemaLocation.QDC = \
123  * http://purl.org/dc/terms/ \
124  * http://dublincore.org/schemas/xmls/qdc/2003/04/02/qualifieddc.xsd</pre>
125  *
126  * @author Larry Stone
127  * @version $Revision: 1.4 $
128  */

129 public class QDCCrosswalk extends SelfNamedPlugin
130     implements DisseminationCrosswalk, IngestionCrosswalk
131 {
132     /** log4j category */
133     private static Logger log = Logger.getLogger(QDCCrosswalk.class);
134
135     // map of qdc to JDOM Element
136
private HashMap JavaDoc qdc2element = new HashMap JavaDoc();
137
138     // map of JDOM Element to qdc DCValue
139
private HashMap JavaDoc element2qdc = new HashMap JavaDoc();
140
141     // the XML namespaces from config file for this name.
142
private Namespace namespaces[] = null;
143
144     private static final Namespace DCTERMS_NS =
145         Namespace.getNamespace("dcterms", "http://purl.org/dc/terms/");
146
147     // sentinal: done init?
148
private boolean inited = false;
149
150     // my plugin name
151
private String JavaDoc myName = null;
152
153     // prefix of all DSpace Configuration entries.
154
private static final String JavaDoc CONFIG_PREFIX = "crosswalk.qdc";
155
156     // XML schemaLocation fragment for this crosswalk, from config.
157
private String JavaDoc schemaLocation = null;
158
159     private static final Namespace XLINK_NS =
160         Namespace.getNamespace("xlink", "http://www.w3.org/TR/xlink");
161
162     private static XMLOutputter outputUgly = new XMLOutputter();
163     private static XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
164     private static SAXBuilder builder = new SAXBuilder();
165
166     /**
167      * Fill in the plugin-name table from DSpace configuration entries
168      * for configuration files for flavors of QDC crosswalk:
169      */

170     private static String JavaDoc aliases[] = null;
171     static
172     {
173         List JavaDoc aliasList = new ArrayList JavaDoc();
174         Enumeration JavaDoc pe = ConfigurationManager.propertyNames();
175         String JavaDoc propname = CONFIG_PREFIX + ".properties.";
176         while (pe.hasMoreElements())
177         {
178             String JavaDoc key = (String JavaDoc)pe.nextElement();
179             if (key.startsWith(propname))
180                 aliasList.add(key.substring(propname.length()));
181         }
182         aliases = (String JavaDoc[])aliasList.toArray(new String JavaDoc[aliasList.size()]);
183     }
184
185     public static String JavaDoc[] getPluginNames()
186     {
187         return aliases;
188     }
189
190     // utility: return "fully qualified" name of XML element, for a
191
// hashtable key to use on ingesting elements.
192
// Format is {prefix:}name where prefix is optional.
193
private String JavaDoc makeQualifiedTagName(Element element)
194     {
195         String JavaDoc prefix = "";
196         Namespace ns = element.getNamespace();
197         if (ns != null)
198             prefix = ns.getPrefix() + ":";
199         
200         String JavaDoc tagName;
201         String JavaDoc nsQualifier = element.getAttributeValue("type", DisseminationCrosswalk.XSI_NS);
202         
203         if (nsQualifier == null || nsQualifier.length() < 1)
204         {
205             String JavaDoc qualifier = element.getAttributeValue("type");
206             if (qualifier == null || qualifier.length() < 1)
207             {
208                 tagName = prefix+element.getName();
209             }
210             else
211             {
212                 tagName = prefix+element.getName()+qualifier;
213             }
214         }
215         else
216         {
217             tagName = prefix+element.getName()+nsQualifier;
218         }
219         
220         return tagName;
221     }
222
223     /**
224      * Initialize Crosswalk table from a properties file
225      * which itself is the value of the DSpace configuration property
226      * "crosswalk.qdc.properties.X", where "X" is the alias name of this instance.
227      * Each instance may be configured with a separate mapping table.
228      *
229      * The QDC crosswalk configuration properties follow the format:
230      *
231      * {qdc-element} = {XML-fragment}
232      *
233      * 1. qualified DC field name is of the form (qualifier is optional)
234      * {MDschema}.{element}.{qualifier}
235      *
236      * e.g. dc.contributor.author
237      * dc.title
238      *
239      * 2. XML fragment is prototype of metadata element, with empty
240      * placeholders for value).
241      *
242      * Example properties line:
243      *
244      * dc.coverage.temporal = <dcterms:temporal />
245      */

246     private void init()
247         throws CrosswalkException, IOException JavaDoc
248     {
249         if (inited)
250             return;
251         inited = true;
252
253         myName = getPluginInstanceName();
254         if (myName == null)
255             throw new CrosswalkInternalException("Cannot determine plugin name, "+
256                        "You must use PluginManager to instantiate QDCCrosswalk so the instance knows its name.");
257
258         // grovel DSpace configuration for namespaces
259
List JavaDoc nsList = new ArrayList JavaDoc();
260         Enumeration JavaDoc pe = ConfigurationManager.propertyNames();
261         String JavaDoc propname = CONFIG_PREFIX + ".namespace."+ myName +".";
262         while (pe.hasMoreElements())
263         {
264             String JavaDoc key = (String JavaDoc)pe.nextElement();
265             if (key.startsWith(propname))
266                 nsList.add(Namespace.getNamespace(key.substring(propname.length()),
267                              ConfigurationManager.getProperty(key)));
268         }
269         nsList.add(Namespace.XML_NAMESPACE);
270         namespaces = (Namespace[])nsList.toArray(new Namespace[nsList.size()]);
271
272         // get XML schemaLocation fragment from config
273
schemaLocation = ConfigurationManager.getProperty(CONFIG_PREFIX + ".schemaLocation."+ myName);
274
275         // read properties
276
String JavaDoc cmPropName = CONFIG_PREFIX+".properties."+myName;
277         String JavaDoc propsFilename = ConfigurationManager.getProperty(cmPropName);
278         if (propsFilename == null)
279             throw new CrosswalkInternalException("Configuration error: "+
280                 "No properties file configured for QDC crosswalk named \""+myName+"\"");
281
282         String JavaDoc parent = ConfigurationManager.getProperty("dspace.dir") +
283             File.separator + "config" + File.separator;
284         File JavaDoc propsFile = new File JavaDoc(parent, propsFilename);
285         Properties JavaDoc qdcProps = new Properties JavaDoc();
286         qdcProps.load(new FileInputStream JavaDoc(propsFile));
287
288         // grovel properties to initialize qdc->element and element->qdc maps.
289
// evaluate the XML fragment with a wrapper including namespaces.
290
String JavaDoc postlog = "</wrapper>";
291         StringBuffer JavaDoc prologb = new StringBuffer JavaDoc("<wrapper");
292         for (int i = 0; i < namespaces.length; ++i)
293         {
294             prologb.append(" xmlns:");
295             prologb.append(namespaces[i].getPrefix());
296             prologb.append("=\"");
297             prologb.append(namespaces[i].getURI());
298             prologb.append("\"");
299         }
300         prologb.append(">");
301         String JavaDoc prolog = prologb.toString();
302         pe = qdcProps.propertyNames();
303         while (pe.hasMoreElements())
304         {
305             String JavaDoc qdc = (String JavaDoc)pe.nextElement();
306             String JavaDoc val = qdcProps.getProperty(qdc);
307             try
308             {
309                 Document d = builder.build(new StringReader JavaDoc(prolog+val+postlog));
310                 Element element = (Element)d.getRootElement().getContent(0);
311                 qdc2element.put(qdc, element);
312                 element2qdc.put(makeQualifiedTagName(element), qdc);
313                 log.debug("Building Maps: qdc=\""+qdc+"\", element=\""+element.toString()+"\"");
314             }
315             catch (org.jdom.JDOMException je)
316             {
317                 throw new CrosswalkInternalException("Failed parsing XML fragment in properties file: \""+prolog+val+postlog+"\": "+je.toString());
318             }
319         }
320     }
321
322     public Namespace[] getNamespaces()
323     {
324         try
325         {
326             init();
327         }
328         catch (Exception JavaDoc e)
329         {
330         }
331         return namespaces;
332     }
333
334     public String JavaDoc getSchemaLocation()
335     {
336         try
337         {
338             init();
339         }
340         catch (Exception JavaDoc e)
341         {
342         }
343         return schemaLocation;
344     }
345
346     /**
347      * Returns object's metadata in MODS format, as XML structure node.
348      */

349     public List JavaDoc disseminateList(DSpaceObject dso)
350         throws CrosswalkException,
351                IOException JavaDoc, SQLException JavaDoc, AuthorizeException
352     {
353         return disseminateListInternal(dso, true);
354     }
355
356     private List JavaDoc disseminateListInternal(DSpaceObject dso, boolean addSchema)
357         throws CrosswalkException,
358                IOException JavaDoc, SQLException JavaDoc, AuthorizeException
359     {
360         if (dso.getType() != Constants.ITEM)
361             throw new CrosswalkObjectNotSupported("QDCCrosswalk can only crosswalk an Item.");
362         Item item = (Item)dso;
363         init();
364
365         DCValue[] dc = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);
366         List JavaDoc result = new ArrayList JavaDoc(dc.length);
367         for (int i = 0; i < dc.length; i++)
368         {
369             // Compose qualified DC name - schema.element[.qualifier]
370
// e.g. "dc.title", "dc.subject.lcc", "lom.Classification.Keyword"
371
String JavaDoc qdc = dc[i].schema+"."+
372                          ((dc[i].qualifier == null) ? dc[i].element
373                             : (dc[i].element + "." + dc[i].qualifier));
374
375             Element elt = (Element)qdc2element.get(qdc);
376
377             // only complain about missing elements in the DC schema:
378
if (elt == null)
379             {
380                 if (dc[i].schema.equals(MetadataSchema.DC_SCHEMA))
381                     log.warn("WARNING: "+myName+": No QDC mapping for \"" + qdc+"\"");
382             }
383             else
384             {
385                 Element qe = (Element)elt.clone();
386                 qe.setText(dc[i].value);
387                 if (addSchema && schemaLocation != null)
388                     qe.setAttribute("schemaLocation", schemaLocation, XSI_NS);
389                 if (dc[i].language != null)
390                     qe.setAttribute("lang", dc[i].language, Namespace.XML_NAMESPACE);
391                 result.add(qe);
392             }
393         }
394         return result;
395     }
396
397     public Element disseminateElement(DSpaceObject dso)
398         throws CrosswalkException,
399                IOException JavaDoc, SQLException JavaDoc, AuthorizeException
400     {
401         init();
402         Element root = new Element("qualifieddc", DCTERMS_NS);
403         if (schemaLocation != null)
404             root.setAttribute("schemaLocation", schemaLocation, XSI_NS);
405         root.addContent(disseminateListInternal(dso, false));
406         return root;
407     }
408
409     public boolean canDisseminate(DSpaceObject dso)
410     {
411         return true;
412     }
413
414     public void ingest(Context context, DSpaceObject dso, Element root)
415         throws CrosswalkException, IOException JavaDoc, SQLException JavaDoc, AuthorizeException
416     {
417         init();
418
419         // NOTE: don't bother comparing namespace on root element
420
// because DCMI doesn't specify one, and every app uses its
421
// own.. just give up in the face of this madness and accept
422
// anything with the right name.
423
if (!(root.getName().equals("qualifieddc")))
424             throw new MetadataValidationException("Wrong root element for Qualified DC: "+root.toString());
425         ingest(context, dso, root.getChildren());
426     }
427
428     public void ingest(Context context, DSpaceObject dso, List JavaDoc ml)
429         throws CrosswalkException, IOException JavaDoc, SQLException JavaDoc, AuthorizeException
430     {
431         init();
432
433         // for now, forget about any targets but item.
434
if (dso.getType() != Constants.ITEM)
435             throw new CrosswalkInternalException("Wrong target object type, QDCCrosswalk can only crosswalk to an Item.");
436
437         Item item = (Item)dso;
438
439         Iterator JavaDoc mi = ml.iterator();
440         while (mi.hasNext())
441         {
442             Element me = (Element)mi.next();
443             String JavaDoc key = makeQualifiedTagName(me);
444
445             // if the root element gets passed here, recurse:
446
if (me.getName().equals("qualifieddc"))
447                 ingest(context, dso, me.getChildren());
448
449             else if (element2qdc.containsKey(key))
450             {
451                 String JavaDoc qdc[] = ((String JavaDoc)element2qdc.get(key)).split("\\.");
452
453                 // get language - prefer xml:lang, accept lang.
454
String JavaDoc lang = me.getAttributeValue("lang", Namespace.XML_NAMESPACE);
455                 if (lang == null)
456                     lang = me.getAttributeValue("lang");
457
458                 if (qdc.length == 3)
459                     item.addMetadata(qdc[0], qdc[1], qdc[2], lang, me.getText());
460                 else if (qdc.length == 2)
461                     item.addMetadata(qdc[0], qdc[1], null, lang, me.getText());
462                 else
463                     throw new CrosswalkInternalException("Unrecognized format in QDC element identifier for key=\""+key+"\", qdc=\""+(String JavaDoc)element2qdc.get(key)+"\"");
464             }
465             else
466                 log.warn("WARNING: "+myName+": No mapping for Element=\"" + key+"\" to qdc.");
467         }
468     }
469
470     public boolean preferList()
471     {
472         return true;
473     }
474 }
475
Popular Tags