KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > mmbase > util > xml > DocumentReader


1 /*
2
3 This software is OSI Certified Open Source Software.
4 OSI Certified is a certification mark of the Open Source Initiative.
5
6 The license (Mozilla version 1.0) can be read at the MMBase site.
7 See http://www.MMBase.org/license
8
9 */

10 package org.mmbase.util.xml;
11
12 import java.util.*;
13 import java.util.regex.Pattern JavaDoc;
14
15 import org.xml.sax.ErrorHandler JavaDoc;
16 import org.xml.sax.EntityResolver JavaDoc;
17 import org.xml.sax.InputSource JavaDoc;
18
19 import org.w3c.dom.*;
20
21 import javax.xml.parsers.DocumentBuilder JavaDoc;
22 import javax.xml.parsers.DocumentBuilderFactory JavaDoc;
23 import javax.xml.parsers.ParserConfigurationException JavaDoc;
24
25 import org.mmbase.util.XMLEntityResolver;
26 import org.mmbase.util.XMLErrorHandler;
27
28 import org.mmbase.util.logging.Logging;
29 import org.mmbase.util.logging.Logger;
30
31 /**
32  * The DocumentReader class provides methods for loading a xml document in memory.
33  * It serves as the base class for DocumentWriter (which adds ways to write a document), and
34  * XMLBasicReader, which adds path-like methods with which to retrieve elements.
35  *
36  * This can also be a class for general static dom utilities.
37  *
38  *
39  * @author Case Roule
40  * @author Rico Jansen
41  * @author Pierre van Rooden
42  * @author Michiel Meeuwissen
43  * @version $Id: DocumentReader.java,v 1.29 2006/06/19 05:53:58 michiel Exp $
44  * @since MMBase-1.7
45  */

46 public class DocumentReader {
47     private static Logger log = Logging.getLoggerInstance(DocumentReader.class);
48
49     /** for the document builder of javax.xml. */
50     private static Map documentBuilders = Collections.synchronizedMap(new HashMap());
51
52     protected static final String JavaDoc FILENOTFOUND = "FILENOTFOUND://";
53
54     /** Public ID of the Error DTD version 1.0 */
55     public static final String JavaDoc PUBLIC_ID_ERROR_1_0 = "-//MMBase//DTD error 1.0//EN";
56     /** DTD resource filename of the Error DTD version 1.0 */
57     public static final String JavaDoc DTD_ERROR_1_0 = "error_1_0.dtd";
58
59     /** Public ID of the most recent Error DTD */
60     public static final String JavaDoc PUBLIC_ID_ERROR = PUBLIC_ID_ERROR_1_0;
61     /** DTD respource filename of the most recent Error DTD */
62     public static final String JavaDoc DTD_ERROR = DTD_ERROR_1_0;
63
64     /**
65      * Register the Public Ids for DTDs used by XMLBasicReader
66      * This method is called by XMLEntityResolver.
67      */

68     public static void registerPublicIDs() {
69         XMLEntityResolver.registerPublicID(PUBLIC_ID_ERROR_1_0, DTD_ERROR_1_0, DocumentReader.class);
70     }
71
72     protected Document document;
73
74     private String JavaDoc systemId;
75
76     static UtilReader.PropertiesMap utilProperties = null;
77     /**
78      * Returns the default setting for validation for DocumentReaders.
79      * @return true if validation is on
80      */

81     protected static final boolean validate() {
82         Object JavaDoc validate = utilProperties == null ? null : utilProperties.get("validate");
83         return validate == null || validate.equals("true");
84     }
85
86     /**
87      * Whether to validate given a request for that. So, the request is followed, unless it is configured to 'never' validate.
88      * @since MMBase-1.8
89      */

90     protected static final boolean validate(boolean requested) {
91         Object JavaDoc validate = utilProperties == null ? null : utilProperties.get("validate");
92         if (validate != null && validate.equals("never")) return false;
93         return requested;
94     }
95
96
97     /**
98      * Creates an empty document reader.
99      */

100     protected DocumentReader() {
101     }
102
103     /**
104      * Constructs the document by reading it from a source.
105      * @param source the input source from which to read the document
106      */

107     public DocumentReader(InputSource JavaDoc source) {
108         this(source, validate(), null);
109     }
110
111     /**
112      * Constructs the document by reading it from a source.
113      * @param source the input source from which to read the document
114      * @param validating whether to validate the document
115      */

116     public DocumentReader(InputSource JavaDoc source, boolean validating) {
117         this(source, validating, null);
118     }
119
120     /**
121      * Constructs the document by reading it from a source.
122      * You can pass a resolve class to this constructor, allowing you to indicate the package in which the dtd
123      * of the document read is to be found. The dtd sould be in the resources package under the package of the class passed.
124      * @param source the input source from which to read the document
125      * @param resolveBase the base class whose package is used to resolve dtds, set to null if unknown
126      */

127     public DocumentReader(InputSource JavaDoc source, Class JavaDoc resolveBase) {
128         this(source, DocumentReader.validate(), resolveBase);
129     }
130
131     /**
132      * Constructs the document by reading it from a source.
133      * You can pass a resolve class to this constructor, allowing you to indicate the package in which the dtd
134      * of the document read is to be found. The dtd sould be in the resources package under the package of the class passed.
135      * @param source the input source from which to read the document
136      * @param validating whether to validate the document
137      * @param resolveBase the base class whose package is used to resolve dtds, set to null if unknown
138      */

139     public DocumentReader(InputSource JavaDoc source, boolean validating, Class JavaDoc resolveBase) {
140         if (source == null) {
141             throw new IllegalArgumentException JavaDoc("InputSource cannot be null");
142         }
143         try {
144             systemId = source.getSystemId();
145             XMLEntityResolver resolver = null;
146             if (resolveBase != null) resolver = new XMLEntityResolver(validating, resolveBase);
147             DocumentBuilder JavaDoc dbuilder = getDocumentBuilder(validating, null/* no error handler */, resolver);
148             if(dbuilder == null) throw new RuntimeException JavaDoc("failure retrieving document builder");
149             if (log.isDebugEnabled()) log.debug("Reading " + source.getSystemId());
150             document = dbuilder.parse(source);
151         } catch(org.xml.sax.SAXException JavaDoc se) {
152             throw new RuntimeException JavaDoc("failure reading document: " + source.getSystemId() + "\n" + Logging.stackTrace(se));
153         } catch(java.io.IOException JavaDoc ioe) {
154             throw new RuntimeException JavaDoc("failure reading document: " + source.getSystemId() + "\n" + ioe, ioe);
155         }
156     }
157
158     /**
159      * @since MMBase-1.8
160      */

161     public DocumentReader(Document doc) {
162         document = doc;
163     }
164
165
166     private static boolean warnedJAXP12 = false;
167     /**
168      * Creates a DocumentBuilder using SAX.
169      * @param validating if true, the documentbuilder will validate documents read
170      * @param xsd Whether to use XSD for validating
171      * @param handler a ErrorHandler class to use for catching parsing errors, pass null to use a default handler
172      * @param resolver a EntityResolver class used for resolving the document's dtd, pass null to use a default resolver
173      * @return a DocumentBuilder instance, or null if none could be created
174      */

175     private static DocumentBuilder JavaDoc createDocumentBuilder(boolean validating, boolean xsd, ErrorHandler JavaDoc handler, EntityResolver JavaDoc resolver) {
176         DocumentBuilder JavaDoc db;
177         if (handler == null) handler = new XMLErrorHandler();
178         if (resolver == null) resolver = new XMLEntityResolver(validating);
179         try {
180             // get a new documentbuilder...
181
DocumentBuilderFactory JavaDoc dfactory = DocumentBuilderFactory.newInstance();
182             // get document builder AFTER setting the validation
183
dfactory.setValidating(validating);
184             if (validating && xsd) {
185                 try {
186                     dfactory.setAttribute("http://java.sun.com/xml/jaxp/properties/schemaLanguage",
187                                           "http://www.w3.org/2001/XMLSchema");
188                 } catch (IllegalArgumentException JavaDoc iae) {
189                     if (! warnedJAXP12) {
190                         log.warn("The XML parser does not support JAXP 1.2, XSD validation will not work.", iae);
191                         warnedJAXP12 = true;
192                     }
193                 }
194             }
195             dfactory.setNamespaceAware(true);
196
197             db = dfactory.newDocumentBuilder();
198
199             db.setErrorHandler(handler);
200
201             // set the entity resolver... which tell us where to find the dtd's
202
db.setEntityResolver(resolver);
203
204         } catch(ParserConfigurationException JavaDoc pce) {
205             log.error("a DocumentBuilder cannot be created which satisfies the configuration requested");
206             log.error(Logging.stackTrace(pce));
207             return null;
208         }
209         return db;
210     }
211
212     /**
213      * Creates a DocumentBuilder with default settings for handler, resolver, or validation,
214      * obtaining it from the cache if available.
215      * @return a DocumentBuilder instance, or null if none could be created
216      */

217     public static DocumentBuilder JavaDoc getDocumentBuilder() {
218         return getDocumentBuilder(validate(), null, null);
219     }
220
221
222     /**
223      * Obtain a DocumentBuilder
224      */

225     public static DocumentBuilder JavaDoc getDocumentBuilder(boolean validating) {
226         return DocumentReader.getDocumentBuilder(validating, null, null);
227     }
228
229     /**
230      * See {@link #getDocumentBuilder(boolean, ErrorHandler, EntityResolver)}
231      */

232     public static DocumentBuilder JavaDoc getDocumentBuilder(boolean validating, ErrorHandler JavaDoc handler, EntityResolver JavaDoc resolver) {
233         return getDocumentBuilder(validating, false, handler, resolver);
234     }
235
236     /**
237      * Creates a DocumentBuilder.
238      * DocumentBuilders that use the default error handler or entity resolver are cached (one for validating,
239      * one for non-validating document buidlers).
240      * @param validating if true, the documentbuilder will validate documents read
241      * @param xsd if true, validating will be done by an XML schema definiton.
242      * @param handler a ErrorHandler class to use for catching parsing errors, pass null to use the default handler
243      * @param resolver a EntityResolver class used for resolving the document's dtd, pass null to use the default resolver
244      * @return a DocumentBuilder instance, or null if none could be created
245      * @since MMBase-1.8.
246      */

247     public static DocumentBuilder JavaDoc getDocumentBuilder(boolean validating, boolean xsd, ErrorHandler JavaDoc handler, EntityResolver JavaDoc resolver) {
248         validating = validate(validating);
249         if (handler == null && resolver == null) {
250             String JavaDoc key = "" + validating + xsd;
251             DocumentBuilder JavaDoc db = (DocumentBuilder JavaDoc) documentBuilders.get(key);
252             if (db == null) {
253                 db = createDocumentBuilder(validating, xsd, null, null);
254                 documentBuilders.put(key, db);
255             }
256             return db;
257         } else {
258             return createDocumentBuilder(validating, xsd, handler, resolver);
259         }
260     }
261
262     /**
263      * Return the text value of a node.
264      * It includes the contents of all child textnodes and CDATA sections, but ignores
265      * everything else (such as comments)
266      * The code trims excessive whitespace unless it is included in a CDATA section.
267      *
268      * @param n the Node whose value to determine
269      * @return a String representing the node's textual value
270      */

271     public static String JavaDoc getNodeTextValue(Node n) {
272         NodeList nl = n.getChildNodes();
273         StringBuffer JavaDoc res = new StringBuffer JavaDoc();
274         for (int i = 0; i < nl.getLength(); i++) {
275             Node textnode = nl.item(i);
276             if (textnode.getNodeType() == Node.TEXT_NODE) {
277                 res.append(textnode.getNodeValue().trim());
278             } else if (textnode.getNodeType() == Node.CDATA_SECTION_NODE) {
279                 res.append(textnode.getNodeValue());
280             }
281         }
282         return res.toString();
283     }
284
285     /**
286      * @since MMBase-1.8.1
287      */

288     public static void setNodeTextValue(Node n, String JavaDoc value) {
289         NodeList textNodes = n.getChildNodes();
290         for (int j = 0; j < textNodes.getLength(); j++) {
291             n.removeChild(textNodes.item(j));
292         }
293         Text text = n.getOwnerDocument().createTextNode(value);
294         n.appendChild(text);
295     }
296
297     /**
298      * Returns whether an element has a certain attribute, either an unqualified attribute or an attribute that fits in the
299      * passed namespace
300      */

301     static public boolean hasAttribute(Element element, String JavaDoc nameSpace, String JavaDoc localName) {
302         return element.hasAttributeNS(nameSpace,localName) || element.hasAttribute(localName);
303     }
304
305     /**
306      * Returns the value of a certain attribute, either an unqualified attribute or an attribute that fits in the
307      * passed namespace
308      */

309     static public String JavaDoc getAttribute(Element element, String JavaDoc nameSpace, String JavaDoc localName) {
310         if (element.hasAttributeNS(nameSpace, localName)) {
311             return element.getAttributeNS(nameSpace, localName);
312         } else {
313             return element.getAttribute(localName);
314         }
315     }
316
317     /**
318      * Utility method to make a document of an element.
319      * @since MMBase-1.8
320      */

321     static public Document toDocument(Element element) {
322         DocumentBuilder JavaDoc documentBuilder = getDocumentBuilder(false, null, null);
323         DOMImplementation impl = documentBuilder.getDOMImplementation();
324         Document document = impl.createDocument(element.getNamespaceURI(), element.getLocalName(), null);
325         Element dest = document.getDocumentElement();
326         Element copy = (Element) document.importNode(element, false);
327         NamedNodeMap attributes = copy.getAttributes();
328         for (int i = 0; i < attributes.getLength(); i++) {
329             Attr attribute = (Attr) (attributes.item(i).cloneNode(true));
330             dest.setAttributeNode(attribute);
331
332         }
333         NodeList childs = element.getChildNodes();
334         for (int i = 0; i < childs.getLength() ; i++) {
335             Node child = document.importNode(childs.item(i), true);
336             dest.appendChild(child);
337         }
338         document.normalize();
339         return document;
340     }
341
342
343     /**
344      * Appends a child to a parent at the right position. The right position is defined by a comma
345      * separated list of regular expressions. If the the child matches the last element of the
346      * path, then the child is appended after similer childs, if not, then it will be appended
347      * before them.
348      *
349      * @param parent The parent element, to which a new child will be added
350      * @param newChild this new child
351      * @param path The beforementioned comma separated list of regexps. See also {@link java.util.regex.Pattern};
352      * @since MMBase-1.8
353      */

354     static public void appendChild(Element parent, Element newChild, String JavaDoc path) {
355         String JavaDoc[] p = path.split(",");
356         int i = 0;
357         Node refChild = null;
358         NodeList childs = parent.getChildNodes();
359         int j = 0;
360         Pattern JavaDoc pattern = null;
361         if (p.length > 0) pattern = Pattern.compile("\\A" + p[i] + "\\z");
362         boolean matching = false;
363         while (j < childs.getLength() && i < p.length) {
364             if (childs.item(j) instanceof Element) {
365                 Element child = (Element) childs.item(j);
366                 if (pattern.matcher(child.getTagName()).matches()) {
367                     j++;
368                     refChild = childs.item(j);
369                     matching = true;
370                 } else {
371                     if (! matching) { // append at the beginning, because actual child list does not start llike path
372
refChild = childs.item(j);
373                         break;
374                     }
375                     i++;
376                     pattern = i < p.length ? Pattern.compile("\\A" + p[i] + "\\z") : null;
377                 }
378             } else {
379                 j++;
380             }
381         }
382         parent.insertBefore(newChild, refChild);
383     }
384
385     /**
386      * Returns the systemID of the InputSource used to read the document.
387      * This is generally the document's file path.
388      * @return the systemID as a String
389      *
390      * @since MMBase-1.8
391      */

392     public String JavaDoc getSystemId() {
393         return systemId;
394     }
395
396     /**
397      * @since MMBase-1.8
398      */

399     public void setSystemId(String JavaDoc url) {
400         systemId = url;
401     }
402
403     /**
404      * @param e Element
405      * @return Tag name of the element
406      */

407     public String JavaDoc getElementName(Element e) {
408         return e.getLocalName();
409     }
410
411     /**
412      * @param path Path to the element
413      * @param attr Attribute name
414      * @return Value of attribute
415      */

416     public String JavaDoc getElementAttributeValue(String JavaDoc path, String JavaDoc attr) {
417         return getElementAttributeValue(getElementByPath(path),attr);
418     }
419
420
421     /**
422      * @param e Element
423      * @param attr Attribute name
424      * @return Value of attribute
425      */

426     public String JavaDoc getElementAttributeValue(Element e, String JavaDoc attr) {
427         if (e == null) {
428             return "";
429         } else {
430             return e.getAttribute(attr);
431         }
432     }
433
434     /**
435      * Determine the root element of the contained document
436      * @return root element
437      * @deprecated
438      */

439     public Element getRootElement() {
440         if (document == null) {
441             log.error("Document is not defined, cannot get root element");
442         }
443         return document.getDocumentElement();
444     }
445
446     /**
447      * @param path Dot-separated list of tags describing path from root element to requested element.
448      * NB the path starts with the name of the root element.
449      * @return Leaf element of the path
450      */

451     public Element getElementByPath(String JavaDoc path) {
452         if (document == null) {
453             log.error("Document is not defined, cannot get " + path);
454         }
455         return getElementByPath(document.getDocumentElement(),path);
456     }
457
458     /**
459      * @param e Element from which the "relative" path is starting.
460      * NB the path starts with the name of the root element.
461      * @param path Dot-separated list of tags describing path from root element to requested element
462      * @return Leaf element of the path
463      */

464     public Element getElementByPath(Element e, String JavaDoc path) {
465         StringTokenizer st = new StringTokenizer(path,".");
466         if (!st.hasMoreTokens()) {
467             // faulty path
468
log.error("No tokens in path");
469             return null;
470         } else {
471             String JavaDoc root = st.nextToken();
472             if (e.getLocalName().equals("error")) {
473                 // path should start with document root element
474
log.error("Error occurred : (" + getElementValue(e) + ")");
475                 return null;
476             } else if (!e.getLocalName().equals(root)) {
477                 // path should start with document root element
478
log.error("path ["+path+"] with root ("+root+") doesn't start with root element ("+e.getLocalName()+"): incorrect xml file" +
479                           "("+getSystemId()+")");
480                 return null;
481             }
482             OUTER:
483             while (st.hasMoreTokens()) {
484                 String JavaDoc tag = st.nextToken();
485                 NodeList nl = e.getChildNodes();
486                 for(int i = 0; i < nl.getLength(); i++) {
487                     if (! (nl.item(i) instanceof Element)) continue;
488                     e = (Element)nl.item(i);
489                     if (e.getLocalName().equals(tag)) continue OUTER;
490                 }
491                 // Handle error!
492
return null;
493             }
494             return e;
495         }
496     }
497
498
499     /**
500      * @param path Path to the element
501      * @return Text value of element
502      */

503     public String JavaDoc getElementValue(String JavaDoc path) {
504         return getElementValue(getElementByPath(path));
505     }
506
507     /**
508      * @param e Element
509      * @return Text value of element
510      */

511     public String JavaDoc getElementValue(Element e) {
512         if (e == null) {
513             return "";
514         } else {
515             return getNodeTextValue(e);
516         }
517     }
518
519     /**
520      * @param path Path to the element
521      * @return Iterator of child elements
522      */

523     public Iterator getChildElements(String JavaDoc path) {
524         return getChildElements(getElementByPath(path));
525     }
526
527     /**
528      * @param e Element
529      * @return Iterator of child elements
530      */

531     public Iterator getChildElements(Element e) {
532         return getChildElements(e,"*");
533     }
534
535     /**
536      * @param path Path to the element
537      * @param tag tag to match ("*" means all tags")
538      * @return Iterator of child elements with the given tag
539      */

540     public Iterator getChildElements(String JavaDoc path,String JavaDoc tag) {
541         return getChildElements(getElementByPath(path),tag);
542     }
543
544     /**
545      * @param e Element
546      * @param tag tag to match ("*" means all tags")
547      * @return Iterator of child elements with the given tag
548      * @todo XXXX MM: Since we have changed the return type from 1.7 to 1.8 anyway, why don't we return a List then?
549      */

550     public Iterator getChildElements(Element e,String JavaDoc tag) {
551         List v = new ArrayList();
552         boolean ignoretag = tag.equals("*");
553         if (e!=null) {
554             NodeList nl = e.getChildNodes();
555             for (int i = 0; i < nl.getLength(); i++) {
556                 Node n = nl.item(i);
557                 if (n.getNodeType() == Node.ELEMENT_NODE &&
558                     (ignoretag ||
559                      ((Element)n).getLocalName().equalsIgnoreCase(tag))) {
560                     v.add(n);
561                 }
562             }
563         }
564         return v.iterator();
565     }
566
567     public static void main(String JavaDoc[] argv) throws Exception JavaDoc {
568         if (argv.length == 0) {
569             System.out.println("Usage: java -Dmmbase.config=<config dir> org.mmbase.util.xml.DocumentReader <path to xml>");
570             System.out.println(" The mmbase config dir is used to resolve XSD's (in config/xmlns) and DTD's (in config/dtd).");
571             System.out.println(" Errors will be reported if the XML is invalid");
572
573             return;
574         }
575         Document d = org.mmbase.util.ResourceLoader.getDocument(new java.io.File JavaDoc(argv[0]).toURL(), true, null);
576         /*
577         DocumentReader doc = new DocumentReader(d);
578         System.out.println(XMLWriter.write(toDocument(doc.getRootElement()), true, false));
579         */

580     }
581
582 }
583
Popular Tags