KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > codehaus > groovy > sandbox > util > XmlSlurper


1 package org.codehaus.groovy.sandbox.util;
2 import groovy.lang.Closure;
3 import groovy.lang.GroovyObject;
4 import groovy.lang.GroovyObjectSupport;
5 import groovy.lang.Writable;
6
7 import java.io.File JavaDoc;
8 import java.io.FileInputStream JavaDoc;
9 import java.io.IOException JavaDoc;
10 import java.io.InputStream JavaDoc;
11 import java.io.Reader JavaDoc;
12 import java.io.StringReader JavaDoc;
13 import java.io.Writer JavaDoc;
14 import java.security.AccessController JavaDoc;
15 import java.security.PrivilegedActionException JavaDoc;
16 import java.security.PrivilegedExceptionAction JavaDoc;
17 import java.util.HashMap JavaDoc;
18 import java.util.Iterator JavaDoc;
19 import java.util.LinkedList JavaDoc;
20 import java.util.List JavaDoc;
21 import java.util.Map JavaDoc;
22
23 import javax.xml.parsers.ParserConfigurationException JavaDoc;
24 import javax.xml.parsers.SAXParser JavaDoc;
25 import javax.xml.parsers.SAXParserFactory JavaDoc;
26
27 import org.codehaus.groovy.sandbox.markup.Buildable;
28 import org.xml.sax.Attributes JavaDoc;
29 import org.xml.sax.InputSource JavaDoc;
30 import org.xml.sax.SAXException JavaDoc;
31 import org.xml.sax.XMLReader JavaDoc;
32 import org.xml.sax.helpers.DefaultHandler JavaDoc;
33
34
35 public class XmlSlurper extends DefaultHandler JavaDoc {
36     private final XMLReader JavaDoc reader;
37     private List JavaDoc result = null;
38     private List JavaDoc body = null;
39     private final StringBuffer JavaDoc charBuffer = new StringBuffer JavaDoc();
40
41     public XmlSlurper() throws ParserConfigurationException JavaDoc, SAXException JavaDoc {
42         this(false, true);
43     }
44
45     public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException JavaDoc, SAXException JavaDoc {
46         SAXParserFactory JavaDoc factory = null;
47         
48             try {
49                 factory = (SAXParserFactory JavaDoc) AccessController.doPrivileged(new PrivilegedExceptionAction JavaDoc() {
50                     public Object JavaDoc run() throws ParserConfigurationException JavaDoc {
51                         return SAXParserFactory.newInstance();
52                     }
53                 });
54             } catch (final PrivilegedActionException JavaDoc pae) {
55             final Exception JavaDoc e = pae.getException();
56                 
57                 if (e instanceof ParserConfigurationException JavaDoc) {
58                     throw (ParserConfigurationException JavaDoc) e;
59                 } else {
60                     throw new RuntimeException JavaDoc(e);
61                 }
62             }
63         factory.setNamespaceAware(namespaceAware);
64         factory.setValidating(validating);
65
66         final SAXParser JavaDoc parser = factory.newSAXParser();
67         this.reader = parser.getXMLReader();
68     }
69
70     public XmlSlurper(final XMLReader JavaDoc reader) {
71         this.reader = reader;
72     }
73
74     public XmlSlurper(final SAXParser JavaDoc parser) throws SAXException JavaDoc {
75         this(parser.getXMLReader());
76     }
77
78     /**
79      * Parse the content of the specified input source into a List
80      */

81     public XmlList parse(final InputSource JavaDoc input) throws IOException JavaDoc, SAXException JavaDoc {
82             this.reader.setContentHandler(this);
83             this.reader.parse(input);
84         
85         return (XmlList)this.result.get(0);
86     }
87     
88     /**
89      * Parses the content of the given file as XML turning it into a List
90      */

91     public XmlList parse(final File JavaDoc file) throws IOException JavaDoc, SAXException JavaDoc {
92     final InputSource JavaDoc input = new InputSource JavaDoc(new FileInputStream JavaDoc(file));
93     
94         input.setSystemId("file://" + file.getAbsolutePath());
95         
96         return parse(input);
97
98     }
99
100     /**
101      * Parse the content of the specified input stream into a List.
102      * Note that using this method will not provide the parser with any URI
103      * for which to find DTDs etc
104      */

105     public XmlList parse(final InputStream JavaDoc input) throws IOException JavaDoc, SAXException JavaDoc {
106         return parse(new InputSource JavaDoc(input));
107     }
108
109     /**
110      * Parse the content of the specified reader into a List.
111      * Note that using this method will not provide the parser with any URI
112      * for which to find DTDs etc
113      */

114     public XmlList parse(final Reader JavaDoc in) throws IOException JavaDoc, SAXException JavaDoc {
115         return parse(new InputSource JavaDoc(in));
116     }
117
118     /**
119      * Parse the content of the specified URI into a List
120      */

121     public XmlList parse(final String JavaDoc uri) throws IOException JavaDoc, SAXException JavaDoc {
122         return parse(new InputSource JavaDoc(uri));
123     }
124
125     /**
126      * A helper method to parse the given text as XML
127      *
128      * @param text
129      * @return
130      */

131     public XmlList parseText(final String JavaDoc text) throws IOException JavaDoc, SAXException JavaDoc {
132         return parse(new StringReader JavaDoc(text));
133     }
134     
135
136     // ContentHandler interface
137
//-------------------------------------------------------------------------
138

139     /* (non-Javadoc)
140      * @see org.xml.sax.ContentHandler#startDocument()
141      */

142     public void startDocument() throws SAXException JavaDoc {
143         this.result = null;
144         this.body = new LinkedList JavaDoc();
145         this.charBuffer.setLength(0);
146     }
147     
148     /* (non-Javadoc)
149      * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
150      */

151     public void startElement(final String JavaDoc namespaceURI, final String JavaDoc localName, final String JavaDoc qName, final Attributes JavaDoc atts) throws SAXException JavaDoc {
152         addNonWhitespaceCdata();
153         
154         final Map JavaDoc attributes = new HashMap JavaDoc();
155         
156         for (int i = atts.getLength() - 1; i != -1; i--) {
157             if (atts.getURI(i).length() == 0) {
158                 attributes.put(atts.getQName(i), atts.getValue(i));
159             } else {
160                 //
161
// Note this is strictly incorrect the name is really localname + URI
162
// We need to figure out what to do with paramenters in namespaces
163
//
164
attributes.put(atts.getLocalName(i), atts.getValue(i));
165             }
166             
167         }
168         
169         final List JavaDoc newBody = new LinkedList JavaDoc();
170
171         newBody.add(attributes);
172         
173         newBody.add(this.body);
174
175         this.body = newBody;
176     }
177
178     /* (non-Javadoc)
179      * @see org.xml.sax.ContentHandler#characters(char[], int, int)
180      */

181     public void characters(final char[] ch, final int start, final int length) throws SAXException JavaDoc {
182         this.charBuffer.append(ch, start, length);
183     }
184     
185     /* (non-Javadoc)
186      * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
187      */

188     public void endElement(final String JavaDoc namespaceURI, final String JavaDoc localName, final String JavaDoc qName) throws SAXException JavaDoc {
189         addNonWhitespaceCdata();
190         
191         final List JavaDoc children = this.body;
192         
193         final Map JavaDoc attributes = (Map JavaDoc)this.body.remove(0);
194         
195         this.body = (List JavaDoc)this.body.remove(0);
196         
197         if (namespaceURI.length() == 0) {
198             this.body.add(new XmlList(qName, attributes, children, namespaceURI));
199         } else {
200             this.body.add(new XmlList(localName, attributes, children, namespaceURI));
201         }
202     }
203     
204     /* (non-Javadoc)
205      * @see org.xml.sax.ContentHandler#endDocument()
206      */

207     public void endDocument() throws SAXException JavaDoc {
208         this.result = this.body;
209         this.body = null;
210     }
211
212     // Implementation methods
213
//-------------------------------------------------------------------------
214

215     /**
216      *
217      */

218     private void addNonWhitespaceCdata() {
219         if (this.charBuffer.length() != 0) {
220             //
221
// This element is preceeded by CDATA if it's not whitespace add it to the body
222
// Note that, according to the XML spec, we should preserve the CDATA if it's all whitespace
223
// but for the sort of work I'm doing ignoring the whitespace is preferable
224
//
225
final String JavaDoc cdata = this.charBuffer.toString();
226             
227             this.charBuffer.setLength(0);
228             if (cdata.trim().length() != 0) {
229                 this.body.add(cdata);
230             }
231         }
232     }
233 }
234
235 class XmlList extends GroovyObjectSupport implements Writable, Buildable {
236     final String JavaDoc name;
237     final Map JavaDoc attributes;
238     final Object JavaDoc[] children;
239     final String JavaDoc namespaceURI;
240     
241     public XmlList(final String JavaDoc name, final Map JavaDoc attributes, final List JavaDoc body, final String JavaDoc namespaceURI) {
242         super();
243         
244         this.name = name;
245         this.attributes = attributes;
246         this.children = body.toArray();
247         this.namespaceURI = namespaceURI;
248     }
249     
250     public Object JavaDoc getProperty(final String JavaDoc elementName) {
251             if (elementName.startsWith("@")) {
252                 return this.attributes.get(elementName.substring(1));
253             } else {
254             final int indexOfFirst = getNextXmlElement(elementName, -1);
255             
256                 if (indexOfFirst == -1) { // no elements match the element name
257
return new ElementCollection() {
258                         protected ElementCollection getResult(final String JavaDoc property) {
259                             return this;
260                         }
261
262                                 /**
263                                  *
264                                  * Used by the Invoker when it wants to iterate over this object
265                                  *
266                                  * @return
267                                  */

268                                 public ElementIterator iterator() {
269                                     return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{-1}) {
270                                         {
271                                             findNextChild(); // set up the element indexes
272
}
273                                         
274                                     protected void findNextChild() {
275                                         this.nextParentElements[0] = -1;
276                                     }
277                                     };
278                                 }
279                     };
280                 }
281                 
282                 if (getNextXmlElement(elementName, indexOfFirst) == -1) { // one element matches the element name
283
return this.children[indexOfFirst];
284                 } else { // > 1 element matches the element name
285
return new ElementCollection() {
286                             protected ElementCollection getResult(final String JavaDoc property) {
287                                 return new ComplexElementCollection(new XmlList[]{XmlList.this},
288                                                                 new int[] {indexOfFirst},
289                                                                 new String JavaDoc[] {elementName},
290                                                                 property);
291                             }
292     
293                                 /**
294                                  *
295                                  * Used by the Invoker when it wants to iterate over this object
296                                  *
297                                  * @return
298                                  */

299                                 public ElementIterator iterator() {
300                                     return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{indexOfFirst}) {
301                                     protected void findNextChild() {
302                                         this.nextParentElements[0] = XmlList.this.getNextXmlElement(elementName, this.nextParentElements[0]);
303                                     }
304                                     };
305                                 }
306                         };
307                 }
308             }
309     }
310     
311     public Object JavaDoc getAt(final int index) {
312             if (index == 0) {
313                 return this;
314             } else {
315                 throw new ArrayIndexOutOfBoundsException JavaDoc(index);
316             }
317         }
318     
319     public int size() {
320             return 1;
321     }
322
323     public Object JavaDoc invokeMethod(final String JavaDoc name, final Object JavaDoc args) {
324         if ("attributes".equals(name)) {
325             return this.attributes;
326         } else if ("name".equals(name)) {
327             return this.name;
328         } else if ("children".equals(name)) {
329             return this.children;
330         } else if ("contents".equals(name)) {
331             return new Buildable() {
332                 public void build(GroovyObject builder) {
333                     buildChildren(builder);
334                 }
335             };
336         } else if ("text".equals(name)) {
337             return text();
338         } else if ("getAt".equals(name) && ((Object JavaDoc[])args)[0] instanceof String JavaDoc) {
339             return getProperty((String JavaDoc)((Object JavaDoc[])args)[0]);
340         } else if ("depthFirst".equals(name)) {
341             //
342
// TODO: replace this with an iterator
343
//
344

345             return new GroovyObjectSupport() {
346                 public Object JavaDoc invokeMethod(final String JavaDoc name, final Object JavaDoc args) {
347                     if ("getAt".equals(name) && ((Object JavaDoc[])args)[0] instanceof String JavaDoc) {
348                         return getProperty((String JavaDoc)((Object JavaDoc[])args)[0]);
349                     } else {
350                         return XmlList.this.invokeMethod(name, args);
351                     }
352                 }
353                 
354                 public Object JavaDoc getProperty(final String JavaDoc property) {
355                     if (property.startsWith("@")) {
356                         return XmlList.this.getProperty(property);
357                     } else {
358                     final List JavaDoc result = new LinkedList JavaDoc();
359
360                         depthFirstGetProperty(property, XmlList.this.children, result);
361                         
362                         return result;
363                     }
364                 }
365                 
366                 private void depthFirstGetProperty(final String JavaDoc property, final Object JavaDoc[] contents, final List JavaDoc result) {
367                         for (int i = 0; i != contents.length; i++) {
368                         final Object JavaDoc item = contents[i];
369                         
370                             if (item instanceof XmlList) {
371                                 if (((XmlList)item).name.equals(property)) {
372                                     result.add(item);
373                                 }
374                                 
375                                 depthFirstGetProperty(property, ((XmlList)item).children, result);
376                             }
377                     }
378                 }
379             };
380             } else {
381                 return getMetaClass().invokeMethod(this, name, args);
382             }
383     }
384     
385     /* (non-Javadoc)
386      * @see groovy.lang.Writable#writeTo(java.io.Writer)
387      */

388     public Writer JavaDoc writeTo(Writer JavaDoc out) throws IOException JavaDoc {
389
390         for (int i = 0; i != this.children.length; i++) {
391         final Object JavaDoc child = this.children[i];
392         
393             if (child instanceof String JavaDoc) {
394                 out.write((String JavaDoc)child);
395             } else {
396                 ((XmlList)child).writeTo(out);
397             }
398         }
399         
400         return out;
401     }
402     
403     /* (non-Javadoc)
404      * @see org.codehaus.groovy.sandbox.markup.Buildable#build(groovy.lang.GroovyObject)
405      */

406     public void build(final GroovyObject builder) {
407         // TODO handle Namespaces
408
final Closure rest = new Closure(null) {
409         public Object JavaDoc doCall(final Object JavaDoc o) {
410             buildChildren(builder);
411             
412             return null;
413         }
414     };
415
416         builder.invokeMethod(this.name, new Object JavaDoc[]{this.attributes, rest});
417         
418     }
419     
420     public String JavaDoc toString() {
421         return text();
422     }
423     
424     private String JavaDoc text() {
425     final StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
426
427         for (int i = 0; i != this.children.length; i++) {
428         final Object JavaDoc child = this.children[i];
429         
430             if (child instanceof String JavaDoc) {
431                 buff.append(child);
432             } else {
433                 buff.append(((XmlList)child).text());
434             }
435         }
436     
437         return buff.toString();
438     }
439     
440     private void buildChildren(final GroovyObject builder) {
441         for (int i = 0; i != this.children.length; i++) {
442             if (this.children[i] instanceof Buildable) {
443                 ((Buildable)this.children[i]).build(builder);
444             } else {
445                 builder.getProperty("mkp");
446                 builder.invokeMethod("yield", new Object JavaDoc[]{this.children[i]});
447             }
448         }
449     }
450
451         protected int getNextXmlElement(final String JavaDoc name, final int lastFound) {
452             for (int i = lastFound + 1; i < this.children.length; i++) {
453             final Object JavaDoc item = this.children[i];
454                 
455                 if (item instanceof XmlList && ((XmlList)item).name.equals(name)) {
456                     return i;
457                 }
458             }
459             
460             return -1;
461         }
462 }
463
464 abstract class ElementIterator implements Iterator JavaDoc {
465     protected final XmlList[] parents;
466     protected final int[] nextParentElements;
467     
468     protected ElementIterator(final XmlList[] parents, int[] nextParentElements) {
469         this.parents = new XmlList[parents.length];
470         System.arraycopy(parents, 0, this.parents, 0, parents.length);
471         
472         this.nextParentElements = new int[nextParentElements.length];
473         System.arraycopy(nextParentElements, 0, this.nextParentElements, 0, nextParentElements.length);
474     }
475     
476     /* (non-Javadoc)
477      * @see java.util.Iterator#hasNext()
478      */

479     public boolean hasNext() {
480         return this.nextParentElements[0] != -1;
481     }
482     
483     /* (non-Javadoc)
484      * @see java.util.Iterator#next()
485      */

486     public Object JavaDoc next() {
487     final Object JavaDoc result = this.parents[0].children[this.nextParentElements[0]];
488             
489         findNextChild();
490     
491         return result;
492     }
493     
494     /* (non-Javadoc)
495      * @see java.util.Iterator#remove()
496      */

497     public void remove() {
498         throw new UnsupportedOperationException JavaDoc();
499     }
500     
501     protected abstract void findNextChild();
502 }
503
504 abstract class ElementCollection extends GroovyObjectSupport {
505     private int count = -1;
506     
507     public abstract ElementIterator iterator();
508     
509     /* (non-Javadoc)
510      * @see groovy.lang.GroovyObject#getProperty(java.lang.String)
511      */

512     public Object JavaDoc getProperty(final String JavaDoc property) {
513     final ElementCollection result = getResult(property);
514     final Iterator JavaDoc iterator = result.iterator();
515
516         if (iterator.hasNext()) {
517             //
518
// See if there's only one available
519
//
520
final Object JavaDoc first = iterator.next();
521             
522             if (!iterator.hasNext()) {
523                 return first;
524             }
525         }
526         
527         return result;
528     }
529     
530     protected abstract ElementCollection getResult(String JavaDoc property);
531     
532     public synchronized Object JavaDoc getAt(int index) {
533             if (index >= 0) {
534         final Iterator JavaDoc iter = iterator();
535         
536             while (iter.hasNext()) {
537                 if (index-- == 0) {
538                     return iter.next();
539                 } else {
540                     iter.next();
541                 }
542             }
543             }
544             
545             throw new ArrayIndexOutOfBoundsException JavaDoc(index);
546     }
547     
548     public synchronized int size() {
549         if (this.count == -1) {
550         final Iterator JavaDoc iter = iterator();
551         
552             this.count = 0;
553             
554             while (iter.hasNext()) {
555                 this.count++;
556                 iter.next();
557             }
558         }
559         return this.count;
560     }
561 }
562
563 class ComplexElementCollection extends ElementCollection {
564     private final XmlList[] parents;
565     private final int[] nextParentElements;
566     private final String JavaDoc[] parentElementNames;
567     
568     public ComplexElementCollection(final XmlList[] parents,
569                                   final int[] nextParentElements,
570                                   final String JavaDoc[] parentElementNames,
571                                   final String JavaDoc childElementName)
572     {
573         this.parents = new XmlList[parents.length + 1];
574         this.parents[0] = (XmlList)parents[0].children[nextParentElements[0]];
575         System.arraycopy(parents, 0, this.parents, 1, parents.length);
576         
577         this.nextParentElements = new int[nextParentElements.length + 1];
578         this.nextParentElements[0] = -1;
579         System.arraycopy(nextParentElements, 0, this.nextParentElements, 1, nextParentElements.length);
580         
581         this.parentElementNames = new String JavaDoc[parentElementNames.length + 1];
582         this.parentElementNames[0] = childElementName;
583         System.arraycopy(parentElementNames, 0, this.parentElementNames, 1, parentElementNames.length);
584         
585         //
586
// Use the iterator to get the index of the first element
587
//
588

589         final ElementIterator iter = this.iterator();
590         
591         iter.findNextChild();
592         
593         this.nextParentElements[0] = iter.nextParentElements[0];
594     }
595     
596     protected ElementCollection getResult(final String JavaDoc property) {
597         return new ComplexElementCollection(this.parents,
598                                             this.nextParentElements,
599                                             this.parentElementNames,
600                                             property);
601     }
602     
603     /**
604      *
605      * Used by the Invoker when it wants to iterate over this object
606      *
607      * @return
608      */

609     public ElementIterator iterator() {
610         return new ElementIterator(this.parents, this.nextParentElements) {
611                         protected void findNextChild() {
612                             this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], this.nextParentElements[0]);
613                             
614                             while (this.nextParentElements[0] == -1) {
615                                 this.parents[0] = findNextParent(1);
616                                 
617                                 if (this.parents[0] == null) {
618                                     return;
619                                 } else {
620                                     this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], -1);
621                                 }
622                             }
623                         }
624                         
625                         private XmlList findNextParent(final int i) {
626                             if (i == this.nextParentElements.length) return null;
627                             
628                             this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], this.nextParentElements[i]);
629                             
630                             while (this.nextParentElements[i] == -1) {
631                                 this.parents[i] = findNextParent(i + 1);
632                                 
633                                 if (this.parents[i] == null) {
634                                     return null;
635                                 } else {
636                                     this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], -1);
637                                 }
638                             }
639                         
640                             return (XmlList)this.parents[i].children[this.nextParentElements[i]];
641                         }
642         };
643     }
644 }
645
Popular Tags