KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > catcode > odf > ODFMetaFileAnalyzer


1 /*
2     OpenDocumentMetadata is an Object representing the metadata in an
3     OpenDocument file.
4
5     Copyright (C) 2005 J. David Eisenberg
6
7     This library is free software; you can redistribute it and/or
8     modify it under the terms of the GNU Lesser General Public
9     License as published by the Free Software Foundation; either
10     version 2.1 of the License, or (at your option) any later version.
11
12     This library is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15     Lesser General Public License for more details.
16
17     You should have received a copy of the GNU Lesser General Public
18     License along with this library; if not, write to the Free Software
19     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20     
21     Author: J. David Eisenberg
22     Contact: catcode@catcode.com
23
24 */

25 package com.catcode.odf;
26
27 import org.w3c.dom.Document JavaDoc;
28 import org.w3c.dom.Element JavaDoc;
29 import org.w3c.dom.NamedNodeMap JavaDoc;
30 import org.w3c.dom.Node JavaDoc;
31
32 import java.io.File JavaDoc;
33 import java.io.InputStream JavaDoc;
34 import java.io.IOException JavaDoc;
35
36 import java.lang.reflect.Method JavaDoc;
37 import java.lang.reflect.InvocationTargetException JavaDoc;
38
39 import java.util.zip.ZipFile JavaDoc;
40 import java.util.zip.ZipEntry JavaDoc;
41 import java.util.zip.ZipInputStream JavaDoc;
42
43 import javax.xml.parsers.DocumentBuilder JavaDoc;
44 import javax.xml.parsers.DocumentBuilderFactory JavaDoc;
45
46 import com.catcode.odf.OpenDocumentMetadata;
47
48 /**
49  * Analyze an OpenDocument meta file and create an
50  * <code>OpenDocumentMetadata</code> object.
51  *
52  * This code depends upon there being no duplicate
53  * element names between the Dublin Core and OpenDocument
54  * meta namespaces. If there are, this code will break.
55  *
56  * This code also depends upon there being no nested elements
57  * in the <code>&lt;office:meta&gt;</code> element.
58  *
59  * @author J. David Eisenberg
60  * @version 0.2, 2005-11-09
61  */

62 public class ODFMetaFileAnalyzer
63 {
64     protected String JavaDoc officeNamespace;
65     protected String JavaDoc dcNamespace;
66     protected String JavaDoc metaNamespace;
67
68     private static final String JavaDoc OPENDOCUMENT_URI =
69         "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
70     private static final String JavaDoc DC_URI =
71         "http://purl.org/dc/elements/1.1/";
72     private static final String JavaDoc META_URI =
73         "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
74     private static final String JavaDoc STATISTICS=
75         "document-statistic";
76     private static final String JavaDoc USER_DEFINED=
77         "user-defined";
78
79     private static Class JavaDoc metaDataClass = OpenDocumentMetadata.class;
80     private static Class JavaDoc[] stringParameter = {String JavaDoc.class};
81     private static Class JavaDoc[] intParameter = {int.class};
82
83
84     /**
85      * Analyze the metadata in an <code>InputStream</code>.
86      *
87      * <p>Algorithm:</p>
88      * <ol>
89      * <li>Parse the input stream into a <code>Document</code></li>
90      * <li>From the root element, determine the namespace prefixes for
91      * that correspond to <code>office:</code>, <code>meta:</code>, and
92      * <code>dc:</code>.</li>
93      * <li>For each child element of the <code>&lt;office:meta&gt;</code>
94      * element, process it with the
95      * {@link #processElement(Element,OpenDocumentMetadata)
96      * processElement()} method, except for
97      * <code>&lt;meta:document-statistic&gt;</code>, which is handled with
98      * the {@link #processStatistic(Element,OpenDocumentMetadata)
99      * processStatistic()}, and
100      * <code>&lt;meta:user-defined&gt;</code>, which is handled with the
101      * {@link #processUserDefined(Element,OpenDocumentMetadata)
102      * processUserDefined()} method.
103      * </li>
104      * </ol>
105      *
106      * @param metaStream an <code>InputStream</code> that contains OpenDocument
107      * meta-information.
108      * @return an <code>OpenDocumentMetadata</code> structure that
109      * represents the file's meta information.
110      */

111     public OpenDocumentMetadata analyzeMetaData( InputStream JavaDoc metaStream )
112     {
113         DocumentBuilder JavaDoc builder;
114         Document JavaDoc doc;
115         Node JavaDoc metaElement;
116         OpenDocumentMetadata metaDataResult;
117
118         try
119         {
120             metaDataResult = new OpenDocumentMetadata();
121             builder =
122                 DocumentBuilderFactory.newInstance().newDocumentBuilder();
123             doc = builder.parse( metaStream );
124             findNamespaces( doc.getDocumentElement() );
125             metaElement = doc.getElementsByTagName(
126                 officeNamespace + "meta").item(0);
127             if (metaElement != null)
128             {
129                 metaElement = metaElement.getFirstChild();
130                 while (metaElement != null)
131                 {
132                     if (metaElement.getNodeType() == Node.ELEMENT_NODE)
133                     {
134                         String JavaDoc name = metaElement.getNodeName();
135                         if (name.equals(metaNamespace + STATISTICS))
136                         {
137                             processStatistic( (Element JavaDoc) metaElement,
138                                 metaDataResult );
139                         }
140                         else if (name.equals( metaNamespace + USER_DEFINED))
141                         {
142                             processUserDefined( (Element JavaDoc) metaElement,
143                                 metaDataResult );
144                         }
145                         else
146                         {
147                             processElement( (Element JavaDoc) metaElement,
148                                 metaDataResult );
149                         }
150                     }
151                     metaElement = metaElement.getNextSibling();
152                 }
153             }
154         }
155         catch (Exception JavaDoc e)
156         {
157             metaDataResult = null;
158         }
159         
160         return metaDataResult;
161     }
162     
163     /**
164      * Analyze the metadata in a <code>InputStream</code> which
165      * is a .zip file.
166      *
167      * @param inputStream a <code>InputStream</code> that contains
168      * .zip with OpenDocument meta-information.
169      * @return an <code>OpenDocumentMetadata</code> structure that
170      * represents the file's meta information.
171      */

172     public OpenDocumentMetadata analyzeZip( InputStream JavaDoc inputStream )
173     {
174         ZipInputStream JavaDoc zipStream = new ZipInputStream JavaDoc( inputStream );
175         OpenDocumentMetadata metaDataResult = null;
176
177         try
178         {
179             while (zipStream.available() == 1)
180             {
181                 // read possible metaEntry
182
ZipEntry JavaDoc metaEntry = zipStream.getNextEntry();
183                 if (metaEntry != null && "meta.xml".equals(metaEntry.getName()))
184                 {
185                     // if real metaEntry we use content to do real analysis
186
metaDataResult = analyzeMetaData( zipStream );
187                     // analyze is made and we can break the loop
188
break;
189                 }
190             }
191         }
192         catch (IOException JavaDoc ioe)
193         {
194             // IO error
195
}
196         finally
197         {
198             try {
199                 // and finally we close stream
200
zipStream.close();
201             }
202             catch (IOException JavaDoc ioe) {
203                 // intentionally left blank
204
}
205         }
206         return metaDataResult;
207     }
208
209     /**
210      * Analyze the metadata in a <code>File</code> which is a .zip file.
211      *
212      * @param inputFile a <code>File</code> that contains OpenDocument
213      * meta-information.
214      * @return an <code>OpenDocumentMetadata</code> structure that
215      * represents the file's meta information.
216      */

217     public OpenDocumentMetadata analyzeZip( File JavaDoc inputFile )
218     {
219         ZipFile JavaDoc zipFile;
220         ZipEntry JavaDoc metaEntry;
221         InputStream JavaDoc metaStream = null;
222         OpenDocumentMetadata metaDataResult = null;
223
224         try
225         {
226             zipFile = new ZipFile JavaDoc( inputFile );
227             metaEntry = zipFile.getEntry("meta.xml");
228             if (metaEntry != null)
229             {
230                 metaStream = zipFile.getInputStream(metaEntry);
231             }
232         }
233         catch (IOException JavaDoc e)
234         {
235             metaStream = null;
236         }
237         if (metaStream != null)
238         {
239             metaDataResult = analyzeMetaData( metaStream );
240         }
241         return metaDataResult;
242     }
243
244     /**
245      * Put the content of this element into the metadata object.
246      *
247      * <p>The algorithm depends on reflection; element names must
248      * correspond to fields in OpenDocumentMetadata.</p>
249      * <ol>
250      * <li>Convert the element name to a "set" method name.</li>
251      * <li>Get the element content (first child, which should be text).</li>
252      * <li>Invoke the set method with the content as its parameter</li>
253      * </ol>
254      *
255      * @param element the <code>&lt;meta:...&gt;</code> or
256      * <code>&lt;dc:...&gt;</code> element.
257      * @param metaDataResult the metadata object to modify.
258      *
259      */

260     protected void processElement( Element JavaDoc element, OpenDocumentMetadata
261         metaDataResult )
262     {
263         String JavaDoc elementContent;
264         String JavaDoc[] theParameter = new String JavaDoc[1];
265         Node JavaDoc textChild;
266         String JavaDoc methodName = makeSetMethodName( getBaseName(element) );
267         try
268         {
269             Method JavaDoc setMethod = metaDataClass.getDeclaredMethod(
270                 methodName, stringParameter);
271             if (setMethod != null)
272             {
273                 textChild = element.getFirstChild();
274                 theParameter[0] = (textChild != null)
275                     ? textChild.getNodeValue().trim() : "";
276                 setMethod.invoke( metaDataResult, theParameter );
277             }
278         }
279         catch (InvocationTargetException JavaDoc e)
280         {
281             // no target - this should be fatal, but ignore it
282
}
283         catch (IllegalAccessException JavaDoc e)
284         {
285             // can't invoke the method, so do nothing
286
}
287         catch (NoSuchMethodException JavaDoc e)
288         {
289             // no method written to handle this element, so do nothing
290
}
291     }
292
293     /**
294      * Put the content of the statistic element's
295      * attributes into the metadata object.
296      *
297      * <p>The algorithm depends on reflection; attribute names must
298      * correspond to fields in OpenDocumentMetadata.</p>
299      * <ol>
300      * <li>Convert each attribute name to a "set" method name.</li>
301      * <li>Get the attribute value.</li>
302      * <li>Invoke the set method with the content as its parameter</li>
303      * </ol>
304      * @param element the <code>&lt;meta:document-statistic&gt;</code> element.
305      * @param metaDataResult the metadata object to be set.
306      *
307      */

308     protected void processStatistic( Element JavaDoc element, OpenDocumentMetadata
309         metaDataResult )
310     {
311         String JavaDoc attrValue;
312         String JavaDoc attrName;
313         String JavaDoc methodName;
314         Integer JavaDoc[] theParameter = new Integer JavaDoc[1];
315         NamedNodeMap JavaDoc attr;
316         attr = element.getAttributes();
317         for (int i=0; i < attr.getLength(); i++)
318         {
319             try
320             {
321                 attrName = getBaseName( attr.item(i) );
322                 methodName = makeSetMethodName( attrName );
323                 Method JavaDoc setMethod = metaDataClass.getDeclaredMethod(
324                     methodName, intParameter);
325                 if (setMethod != null)
326                 {
327                     attrValue = attr.item(i).getNodeValue();
328                     theParameter[0] = (attrValue != null)
329                         ? Integer.valueOf( attrValue ) : new Integer JavaDoc(0);
330                     setMethod.invoke( metaDataResult, theParameter );
331                 }
332             }
333             catch (InvocationTargetException JavaDoc e)
334             {
335                 // no target - this should be fatal, but ignore it
336
}
337             catch (IllegalAccessException JavaDoc e)
338             {
339                 // can't invoke the method, so do nothing
340
}
341             catch (NoSuchMethodException JavaDoc e)
342             {
343                 // no method written to handle this element, so do nothing
344
}
345         }
346         
347     }
348
349     /**
350      * Put the content of this element into the user-defined section
351      * of the metadata object.
352      *
353      * <p>This method presumes that the content of the element is its first
354      * child, which is a text node.</p>
355      *
356      * @param element the <code>&lt;meta:user-defined&gt;</code>
357      * element containing the information.
358      * @param metaDataResult the metadata object to modify.
359      *
360      */

361     protected void processUserDefined( Element JavaDoc element, OpenDocumentMetadata
362         metaDataResult )
363     {
364         String JavaDoc dataType;
365         String JavaDoc content;
366         String JavaDoc key;
367         
368         if (element.hasChildNodes())
369         {
370             content = element.getFirstChild().getNodeValue();
371             dataType = element.getAttribute( metaNamespace + "value-type" );
372             dataType = (dataType.equals("")) ? "string" : dataType;
373
374             key = element.getAttribute( metaNamespace + "name" );
375             if (key != "")
376             {
377                 if (dataType == "string" || dataType == "date")
378                 {
379                     metaDataResult.setUserDefined( key, content );
380                 }
381                 else if (dataType == "float")
382                 {
383                     metaDataResult.setUserDefined( key,
384                         Double.valueOf( content ) );
385                 }
386                 else if (dataType == "boolean")
387                 {
388                     metaDataResult.setUserDefined( key,
389                         Boolean.valueOf( content ) );
390                 }
391                 else if (dataType == "time")
392                 {
393                     metaDataResult.setUserDefined( key,
394                         Duration.parseDuration( content ) );
395                 }
396             }
397         }
398     }
399
400     /**
401      * Analyzes an Open Document meta file, presumed to be in .zip format.
402      * This is a wrapper for the non-static method.
403      *
404      * @param inputFile the <code>File</code> to analyze.
405      * @return an <code>OpenDocumentMetadata</code> object.
406      */

407     public static OpenDocumentMetadata analyzeFile( File JavaDoc inputFile )
408     {
409         ODFMetaFileAnalyzer mfa = new ODFMetaFileAnalyzer();
410         return mfa.analyzeZip( inputFile );
411     }
412
413     /**
414      * Finds the namespace prefixes associated with OpenDocument,
415      * Dublin Core, and OpenDocument meta elements.
416      *
417      * <p>This function presumes that all the namespaces are in the
418      * root element. If they aren't, this breaks.</p>
419      *
420      * @param rootElement the root element of the document.
421      */

422     protected void findNamespaces( Element JavaDoc rootElement )
423     {
424         NamedNodeMap JavaDoc attributes;
425         Node JavaDoc node;
426         String JavaDoc value;
427
428         attributes = rootElement.getAttributes();
429         for (int i=0; i < attributes.getLength(); i++)
430         {
431             node = attributes.item(i);
432             value = node.getNodeValue();
433
434             if (value.equals( DC_URI ))
435             {
436                 dcNamespace = extractNamespace( node.getNodeName() );
437             }
438             else if (value.equals( META_URI ))
439             {
440                 metaNamespace = extractNamespace( node.getNodeName() );
441             }
442             else if (value.equals( OPENDOCUMENT_URI ))
443             {
444                 officeNamespace = extractNamespace( node.getNodeName() );
445             }
446         }
447     }
448
449     /**
450      * Extract a namespace from a namespace attribute.
451      * @param namespaceAttrName an attribute name in the form
452      * <code>xmlns:aaaa</code>.
453      * @return the namespace, including the colon separator.
454      */

455     protected String JavaDoc extractNamespace( String JavaDoc namespaceAttrName )
456     {
457         String JavaDoc result;
458         int pos = namespaceAttrName.indexOf(":");
459
460         result = (pos > 0)
461                 ? namespaceAttrName.substring( pos + 1 ) + ":"
462                 : "";
463         return result;
464     }
465     
466     /**
467      * Get the "local" name in a non-namespace-aware parser.
468      * @param node the <code>Node</code> whose local name we want.
469      * @return the portion of the name after the <code>:</code>.
470      */

471     protected String JavaDoc getBaseName( Node JavaDoc node )
472     {
473         String JavaDoc result = node.getNodeName();
474         int pos = result.indexOf(":");
475         if (pos >= 0)
476         {
477             result = result.substring( pos + 1 );
478         }
479         return result;
480     }
481
482     /**
483      * Create a set method name corresponding to a meta-element.
484      *
485      * Takes the given name and changes all <code>-<i>letter</i></code>
486      * sequences to (capitalized) <code><i>Letter</i></code>, prepended
487      * by <code>set</code>. Thus, an element name of
488      * <code>initial-creator</code>
489      * converts to the method name <code>setInitialCreator</code>.
490      *
491      * @param elementName the name of the element to munge.
492      * @return the name of a set method
493      */

494     protected String JavaDoc makeSetMethodName( String JavaDoc elementName )
495     {
496         String JavaDoc[] part;
497         String JavaDoc result;
498         int i;
499         part = elementName.split("-");
500         result = "set";
501         for (i=0; i<part.length; i++)
502         {
503             result += part[i].substring(0,1).toUpperCase() +
504                 part[i].substring(1);
505         }
506         return result;
507     }
508     
509 }
510
Popular Tags