KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > lateralnz > common > util > XMLUtils


1 /* ====================================================================
2  * The LateralNZ Software License, Version 1.0
3  *
4  * Copyright (c) 2003 LateralNZ. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * 3. The end-user documentation included with the redistribution,
19  * if any, must include the following acknowledgment:
20  * "This product includes software developed by
21  * LateralNZ (http://www.lateralnz.org/) and other third parties."
22  * Alternately, this acknowledgment may appear in the software itself,
23  * if and wherever such third-party acknowledgments normally appear.
24  *
25  * 4. The names "LateralNZ" must not be used to endorse or promote
26  * products derived from this software without prior written
27  * permission. For written permission, please
28  * contact oss@lateralnz.org.
29  *
30  * 5. Products derived from this software may not be called "Panther",
31  * or "Lateral" or "LateralNZ", nor may "PANTHER" or "LATERAL" or
32  * "LATERALNZ" appear in their name, without prior written
33  * permission of LateralNZ.
34  *
35  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46  * SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of LateralNZ. For more
51  * information on Lateral, please see http://www.lateralnz.com/ or
52  * http://www.lateralnz.org
53  *
54  */

55 package org.lateralnz.common.util;
56
57 import java.io.ByteArrayInputStream JavaDoc;
58 import java.io.IOException JavaDoc;
59 import java.util.LinkedList JavaDoc;
60 import java.util.List JavaDoc;
61 import java.util.StringTokenizer JavaDoc;
62 import javax.xml.parsers.DocumentBuilder JavaDoc;
63 import javax.xml.parsers.DocumentBuilderFactory JavaDoc;
64
65 import org.w3c.dom.*;
66 import org.xml.sax.SAXException JavaDoc;
67
68 /**
69  * common XML utility functions
70  *
71  * @author J R Briggs
72  */

73 public final class XMLUtils implements Constants {
74   private static final String JavaDoc IGNORE_CDATA_MATCH_PATTERN = "(.*?)(<!\\[CDATA.*?\\]\\]>)";
75   private static final String JavaDoc CDATA_BEGIN = "<![CDATA";
76   private static final String JavaDoc CR_NL = RETURN + NEWLINE;
77   
78   private static DocumentBuilder JavaDoc dombuilder = null;
79   private static java.util.regex.Pattern JavaDoc cdataPattern = null;
80   
81   static {
82     try {
83       DocumentBuilderFactory JavaDoc dbf = DocumentBuilderFactory.newInstance();
84       dbf.setValidating(false);
85       dombuilder = dbf.newDocumentBuilder();
86       dombuilder.setEntityResolver(null);
87     }
88     catch (Exception JavaDoc e) {
89       e.printStackTrace();
90     }
91   }
92   
93   private XMLUtils() {
94   }
95   
96   /**
97    * flatten an XML node into a string (reverse parse?)
98    */

99   public static final String JavaDoc flatten(Node xml) {
100     StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
101     
102     flatten(xml, sb);
103     
104     return sb.toString();
105   }
106   
107   private static final void flatten(Node xml, StringBuffer JavaDoc sb) {
108     sb.append(LEFT_CHEV).append(xml.getNodeName());
109     
110     // attributes
111
NamedNodeMap nnm = xml.getAttributes();
112     if (nnm != null) {
113       for (int i = 0; i < nnm.getLength(); i++) {
114         Node n = nnm.item(i);
115         sb.append(SPACE).append(n.getNodeName()).append(EQUALS).append(QUOTE).append(n.getNodeValue()).append(QUOTE);
116       }
117     }
118     sb.append(RIGHT_CHEV);
119     
120     // child nodes
121
NodeList nl = xml.getChildNodes();
122     for (int i = 0; i < nl.getLength(); i++) {
123       if (nl.item(i).getNodeType() == Node.TEXT_NODE) {
124         sb.append(nl.item(i).getNodeValue());
125       }
126       else {
127         flatten(nl.item(i), sb);
128       }
129     }
130     sb.append(LEFT_CHEV).append(FORWARD_SLASH).append(xml.getNodeName()).append(RIGHT_CHEV);
131   }
132   
133   /**
134    * get the value of an xml attribute. For example, given:
135    * <pre>
136    * <xmlnode att1="test1" att2="test2">something</xmlnode>
137    * </pre>
138    * calling getattribute(node, "att2", "blah") should return "test2"
139    */

140   public static final String JavaDoc getAttributeValue(Node xml, String JavaDoc attribute, String JavaDoc def) throws Exception JavaDoc {
141     NamedNodeMap nnm = xml.getAttributes();
142     if (nnm == null) {
143       return def;
144     }
145     Node n = nnm.getNamedItem(attribute);
146     String JavaDoc tmp = null;
147     if (n != null) {
148       tmp = n.getNodeValue();
149     }
150     if (tmp == null) {
151       return def;
152     }
153     else {
154       return tmp;
155     }
156   }
157   
158   /**
159    * get the 'first child' element value of an element
160    */

161   public static final String JavaDoc getFirstChildElementValue(Element e, String JavaDoc elemName) {
162     NodeList nl = e.getElementsByTagName(elemName);
163     if (nl.getLength() < 1) {
164       return EMPTY;
165     }
166     Element e2 = (Element)nl.item(0);
167     return e2.getFirstChild().getNodeValue();
168   }
169   
170   /**
171    * get the first node with a specified name
172    */

173   public static final Node getNamedNode(Node node, String JavaDoc name) {
174     NodeList nl = node.getChildNodes();
175     for (int i = 0; i < nl.getLength(); i++) {
176       Node n = nl.item(i);
177       if (n.getNodeName().equals(name)) {
178         return n;
179       }
180     }
181     return null;
182   }
183   
184  /**
185   * get a list of XML nodes based upon their nodename
186   */

187   public static final List JavaDoc getNodesByName(Node node, String JavaDoc name) {
188     LinkedList JavaDoc rtn = new LinkedList JavaDoc();
189     NodeList nl = node.getChildNodes();
190     for (int i = 0; i < nl.getLength(); i++) {
191       Node n = nl.item(i);
192       if (n.getNodeName().equals(name)) {
193         rtn.add(n);
194       }
195     }
196     return rtn;
197   }
198   
199   /**
200    * return the value of a node
201    */

202   public static final String JavaDoc getNodeValue(Node n) {
203     if (n == null || n.getChildNodes().getLength() < 1) {
204       return EMPTY;
205     }
206     else {
207       Node tmp = n.getChildNodes().item(0);
208       if (tmp == null) {
209         return EMPTY;
210       }
211       else {
212         return tmp.getNodeValue();
213       }
214     }
215   }
216   
217  /**
218   * get the node type as text
219   */

220   public static final String JavaDoc getNodeType(Node n) {
221     switch (n.getNodeType()) {
222       case Node.ATTRIBUTE_NODE :
223         return "attribute";
224       case Node.CDATA_SECTION_NODE :
225         return "cdata section";
226       case Node.COMMENT_NODE :
227         return "comment";
228       case Node.DOCUMENT_FRAGMENT_NODE :
229         return "document fragment";
230       case Node.DOCUMENT_NODE :
231         return "document";
232       case Node.DOCUMENT_TYPE_NODE :
233         return "document type";
234       case Node.ELEMENT_NODE :
235         return "element";
236       case Node.ENTITY_NODE :
237         return "entity";
238       case Node.ENTITY_REFERENCE_NODE :
239         return "entity reference";
240       case Node.NOTATION_NODE :
241         return "notation";
242       case Node.PROCESSING_INSTRUCTION_NODE :
243         return "processing instruction";
244       case Node.TEXT_NODE :
245         return "text";
246     }
247     return EMPTY;
248   }
249   
250   /**
251    * return true if a node contains an attribute
252    */

253   public static final boolean hasAttribute(Node xml, String JavaDoc attribute) throws Exception JavaDoc {
254     NamedNodeMap nnm = xml.getAttributes();
255     if (nnm.getNamedItem(attribute) != null) {
256       return true;
257     }
258     else {
259       return false;
260     }
261   }
262   
263   /**
264    * parse an xml string into a org.w3c.dom.Document object
265    */

266   public static final Document parse(String JavaDoc xml) throws SAXException JavaDoc, IOException JavaDoc {
267     ByteArrayInputStream JavaDoc bais = null;
268     try {
269       if (dombuilder == null) {
270         throw new SAXException JavaDoc("invalid document builder");
271       }
272       else {
273         Document doc;
274         bais = new ByteArrayInputStream JavaDoc(xml.getBytes());
275         doc = dombuilder.parse(bais);
276         return doc;
277       }
278     }
279     finally {
280       IOUtils.close(bais);
281     }
282   }
283   
284   /**
285    * preprocess and xml string, removing all leading and trailing whitespace
286    * from each line, and removing carriage returns and newlines from each line
287    * except where they fall within a CDATA section.
288    */

289   public static final String JavaDoc preprocess(String JavaDoc xml) throws Exception JavaDoc {
290     xml = StringUtils.stripLTSpaces(xml);
291     //PatternMatcherInput input = new PatternMatcherInput(xml);
292
//Pattern p = compiler.compile(IGNORE_CDATA_MATCH_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK);
293
if (cdataPattern == null) {
294       synchronized (XMLUtils.class) {
295         cdataPattern = java.util.regex.Pattern.compile(IGNORE_CDATA_MATCH_PATTERN, java.util.regex.Pattern.CASE_INSENSITIVE | java.util.regex.Pattern.DOTALL);
296       }
297     }
298     
299     // if we have a line which starts with a < but does not end with >
300
// then we need to preserve a space there (so to not cause
301
// parsing problems later. For example:
302
// <mbean code="com.something.Someclass"
303
// name="myname">
304
StringBuffer JavaDoc rtn = new StringBuffer JavaDoc();
305     StringTokenizer JavaDoc st = new StringTokenizer JavaDoc(xml, NEWLINE + RETURN, true);
306     while (st.hasMoreTokens()) {
307       String JavaDoc tok = st.nextToken();
308       rtn.append(tok);
309       int lpos = tok.lastIndexOf(CHAR_LEFT_CHEV);
310       int rpos = tok.lastIndexOf(CHAR_RIGHT_CHEV);
311       if (lpos >= 0 && rpos < lpos) {
312         rtn.append(SPACE);
313       }
314     }
315     
316     xml = rtn.toString();
317     rtn = new StringBuffer JavaDoc();
318     int total = 0;
319     
320     if (xml.indexOf(CDATA_BEGIN) >= 0) {
321       java.util.regex.Matcher JavaDoc matcher = cdataPattern.matcher(xml);
322       
323     
324       // now look for any newlines that aren't within a CDATA section
325
// and remove them
326
while (matcher.find()) {
327         int groups = matcher.groupCount();
328         for (int i = 1; i <= groups; i++) {
329           String JavaDoc tmp = matcher.group(i);
330           total += tmp.length();
331           if (!tmp.startsWith(CDATA_BEGIN)) {
332             tmp = StringUtils.remove(tmp, CR_NL);
333           }
334           rtn.append(tmp);
335         }
336       }
337     }
338     
339     // remove all remaining newlines
340
rtn.append(StringUtils.remove(xml.substring(total), CR_NL));
341     return rtn.toString();
342   }
343
344 }
Popular Tags