XMLUtils


1   /* ====================================================================
2    * The LateralNZ Software License, Version 1.0
3    *
4    * Copyright (c) 2003 LateralNZ.  All rights reserved.
5    *
6    * Redistribution and use in source and binary forms, with or without
7    * modification, are permitted provided that the following conditions
8    * are met:
9    *
10   * 1. Redistributions of source code must retain the above copyright
11   *    notice, this list of conditions and the following disclaimer.
12   *
13   * 2. Redistributions in binary form must reproduce the above copyright
14   *    notice, this list of conditions and the following disclaimer in
15   *    the documentation and/or other materials provided with the
16   *    distribution.
17   *
18   * 3. The end-user documentation included with the redistribution,
19   *    if any, must include the following acknowledgment:
20   *       "This product includes software developed by 
21   *        LateralNZ (http://www.lateralnz.org/) and other third parties."
22   *    Alternately, this acknowledgment may appear in the software itself,
23   *    if and wherever such third-party acknowledgments normally appear.
24   *
25   * 4. The names "LateralNZ" must not be used to endorse or promote 
26   *    products derived from this software without prior written 
27   *    permission. For written permission, please 
28   *    contact oss@lateralnz.org.
29   *
30   * 5. Products derived from this software may not be called "Panther", 
31   *    or "Lateral" or "LateralNZ", nor may "PANTHER" or "LATERAL" or 
32   *    "LATERALNZ" appear in their name, without prior written 
33   *    permission of LateralNZ.
34   *
35   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46   * SUCH DAMAGE.
47   * ====================================================================
48   *
49   * This software consists of voluntary contributions made by many
50   * individuals on behalf of LateralNZ.  For more
51   * information on Lateral, please see http://www.lateralnz.com/ or
52   * http://www.lateralnz.org
53   *
54   */
55  package org.lateralnz.common.util;
56  
57  import java.io.ByteArrayInputStream  ;
58  import java.io.IOException  ;
59  import java.util.LinkedList  ;
60  import java.util.List  ;
61  import java.util.StringTokenizer  ;
62  import javax.xml.parsers.DocumentBuilder  ;
63  import javax.xml.parsers.DocumentBuilderFactory  ;
64  
65  import org.w3c.dom.*;
66  import org.xml.sax.SAXException  ;
67  
68  /**
69   * common XML utility functions
70   *
71   * @author J R Briggs
72   */
73  public final class XMLUtils implements Constants {
74    private static final String   IGNORE_CDATA_MATCH_PATTERN = "(.*?)(<!\\[CDATA.*?\\]\\]>)";
75    private static final String   CDATA_BEGIN                = "<![CDATA";
76    private static final String   CR_NL                      = RETURN + NEWLINE;
77    
78    private static DocumentBuilder   dombuilder = null;
79    private static java.util.regex.Pattern   cdataPattern    = null;
80    
81    static {
82      try {
83        DocumentBuilderFactory   dbf = DocumentBuilderFactory.newInstance();
84        dbf.setValidating(false);
85        dombuilder = dbf.newDocumentBuilder();
86        dombuilder.setEntityResolver(null);
87      }
88      catch (Exception   e) {
89        e.printStackTrace();
90      }
91    }
92    
93    private XMLUtils() {
94    }
95    
96    /**
97     * flatten an XML node into a string (reverse parse?)
98     */
99    public static final String   flatten(Node xml) {
100     StringBuffer   sb = new StringBuffer  ();
101     
102     flatten(xml, sb);
103     
104     return sb.toString();
105   }
106   
107   private static final void flatten(Node xml, StringBuffer   sb) {
108     sb.append(LEFT_CHEV).append(xml.getNodeName());
109     
110     // attributes
111     NamedNodeMap nnm = xml.getAttributes();
112     if (nnm != null) {
113       for (int i = 0; i < nnm.getLength(); i++) {
114         Node n = nnm.item(i);
115         sb.append(SPACE).append(n.getNodeName()).append(EQUALS).append(QUOTE).append(n.getNodeValue()).append(QUOTE);
116       }
117     }
118     sb.append(RIGHT_CHEV);
119     
120     // child nodes
121     NodeList nl = xml.getChildNodes();
122     for (int i = 0; i < nl.getLength(); i++) {
123       if (nl.item(i).getNodeType() == Node.TEXT_NODE) {
124         sb.append(nl.item(i).getNodeValue());
125       }
126       else {
127         flatten(nl.item(i), sb);
128       }
129     }
130     sb.append(LEFT_CHEV).append(FORWARD_SLASH).append(xml.getNodeName()).append(RIGHT_CHEV);
131   }
132   
133   /**
134    * get the value of an xml attribute.  For example, given:
135    * <pre>
136    *   <xmlnode att1="test1" att2="test2">something</xmlnode>
137    * </pre>
138    * calling getattribute(node, "att2", "blah") should return "test2"
139    */
140   public static final String   getAttributeValue(Node xml, String   attribute, String   def) throws Exception   {
141     NamedNodeMap nnm = xml.getAttributes();
142     if (nnm == null) {
143       return def;
144     }
145     Node n = nnm.getNamedItem(attribute);
146     String   tmp = null;
147     if (n != null) {
148       tmp = n.getNodeValue();
149     }
150     if (tmp == null) {
151       return def;
152     }
153     else {
154       return tmp;
155     }
156   }
157   
158   /**
159    * get the 'first child' element value of an element
160    */
161   public static final String   getFirstChildElementValue(Element e, String   elemName) {
162     NodeList nl = e.getElementsByTagName(elemName);
163     if (nl.getLength() < 1) {
164       return EMPTY;
165     }
166     Element e2 = (Element)nl.item(0);
167     return e2.getFirstChild().getNodeValue();
168   }
169   
170   /**
171    * get the first node with a specified name
172    */
173   public static final Node getNamedNode(Node node, String   name) {
174     NodeList nl = node.getChildNodes();
175     for (int i = 0; i < nl.getLength(); i++) {
176       Node n = nl.item(i);
177       if (n.getNodeName().equals(name)) {
178         return n;
179       }
180     }
181     return null;
182   }
183   
184  /**
185   * get a list of XML nodes based upon their nodename
186   */
187   public static final List   getNodesByName(Node node, String   name) {
188     LinkedList   rtn = new LinkedList  ();
189     NodeList nl = node.getChildNodes();
190     for (int i = 0; i < nl.getLength(); i++) {
191       Node n = nl.item(i);
192       if (n.getNodeName().equals(name)) {
193         rtn.add(n);
194       }
195     } 
196     return rtn;
197   }
198   
199   /**
200    * return the value of a node
201    */
202   public static final String   getNodeValue(Node n) {
203     if (n == null || n.getChildNodes().getLength() < 1) {
204       return EMPTY;
205     }
206     else {
207       Node tmp = n.getChildNodes().item(0);
208       if (tmp == null) {
209         return EMPTY;
210       }
211       else {
212         return tmp.getNodeValue();
213       }
214     }
215   }
216   
217  /**
218   * get the node type as text
219   */
220   public static final String   getNodeType(Node n) {
221     switch (n.getNodeType()) {
222       case Node.ATTRIBUTE_NODE :
223         return "attribute";
224       case Node.CDATA_SECTION_NODE :
225         return "cdata section";
226       case Node.COMMENT_NODE :
227         return "comment";
228       case Node.DOCUMENT_FRAGMENT_NODE :
229         return "document fragment";
230       case Node.DOCUMENT_NODE :
231         return "document";
232       case Node.DOCUMENT_TYPE_NODE :
233         return "document type";
234       case Node.ELEMENT_NODE :
235         return "element";
236       case Node.ENTITY_NODE :
237         return "entity";
238       case Node.ENTITY_REFERENCE_NODE :
239         return "entity reference";
240       case Node.NOTATION_NODE :
241         return "notation";
242       case Node.PROCESSING_INSTRUCTION_NODE :
243         return "processing instruction";
244       case Node.TEXT_NODE :
245         return "text";
246     }
247     return EMPTY;
248   }
249   
250   /**
251    * return true if a node contains an attribute
252    */
253   public static final boolean hasAttribute(Node xml, String   attribute) throws Exception   {
254     NamedNodeMap nnm = xml.getAttributes();
255     if (nnm.getNamedItem(attribute) != null) {
256       return true;
257     }
258     else {
259       return false;
260     }
261   }
262   
263   /**
264    * parse an xml string into a org.w3c.dom.Document object
265    */
266   public static final Document parse(String   xml) throws SAXException  , IOException   {
267     ByteArrayInputStream   bais = null;
268     try {
269       if (dombuilder == null) {
270         throw new SAXException  ("invalid document builder");
271       }
272       else {
273         Document doc;
274         bais = new ByteArrayInputStream  (xml.getBytes());
275         doc = dombuilder.parse(bais);
276         return doc;
277       }
278     }
279     finally {
280       IOUtils.close(bais);
281     }
282   }
283   
284   /**
285    * preprocess and xml string, removing all leading and trailing whitespace
286    * from each line, and removing carriage returns and newlines from each line
287    * except where they fall within a CDATA section.
288    */
289   public static final String   preprocess(String   xml) throws Exception   {
290     xml = StringUtils.stripLTSpaces(xml);
291     //PatternMatcherInput input = new PatternMatcherInput(xml);
292     //Pattern p = compiler.compile(IGNORE_CDATA_MATCH_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK);
293     if (cdataPattern == null) {
294       synchronized (XMLUtils.class) {
295         cdataPattern = java.util.regex.Pattern.compile(IGNORE_CDATA_MATCH_PATTERN, java.util.regex.Pattern.CASE_INSENSITIVE | java.util.regex.Pattern.DOTALL);
296       }
297     }
298     
299     // if we have a line which starts with a < but does not end with >
300     // then we need to preserve a space there (so to not cause
301     // parsing problems later.  For example:
302     // <mbean code="com.something.Someclass"
303     //        name="myname">
304     StringBuffer   rtn = new StringBuffer  ();
305     StringTokenizer   st = new StringTokenizer  (xml, NEWLINE + RETURN, true);
306     while (st.hasMoreTokens()) {
307       String   tok = st.nextToken();
308       rtn.append(tok);
309       int lpos = tok.lastIndexOf(CHAR_LEFT_CHEV);
310       int rpos = tok.lastIndexOf(CHAR_RIGHT_CHEV);
311       if (lpos >= 0 && rpos < lpos) {
312         rtn.append(SPACE);
313       }
314     }
315     
316     xml = rtn.toString();
317     rtn = new StringBuffer  ();
318     int total = 0;
319     
320     if (xml.indexOf(CDATA_BEGIN) >= 0) {
321       java.util.regex.Matcher   matcher = cdataPattern.matcher(xml);
322       
323     
324       // now look for any newlines that aren't within a CDATA section
325       // and remove them  
326       while (matcher.find()) {
327         int groups = matcher.groupCount();
328         for (int i = 1; i <= groups; i++) {
329           String   tmp = matcher.group(i);
330           total += tmp.length();
331           if (!tmp.startsWith(CDATA_BEGIN)) {
332             tmp = StringUtils.remove(tmp, CR_NL);
333           }
334           rtn.append(tmp);
335         }
336       }
337     }
338     
339     // remove all remaining newlines
340     rtn.append(StringUtils.remove(xml.substring(total), CR_NL));
341     return rtn.toString();
342   }
343 
344 }
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags