KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > mortbay > xml > XmlParser


1 // ========================================================================
2
// $Id: XmlParser.java,v 1.21 2005/10/25 07:53:22 gregwilkins Exp $
3
// Copyright 1999-2004 Mort Bay Consulting Pty. Ltd.
4
// ------------------------------------------------------------------------
5
// Licensed under the Apache License, Version 2.0 (the "License");
6
// you may not use this file except in compliance with the License.
7
// You may obtain a copy of the License at
8
// http://www.apache.org/licenses/LICENSE-2.0
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
// ========================================================================
15
package org.mortbay.xml;
16
17 import java.io.File JavaDoc;
18 import java.io.IOException JavaDoc;
19 import java.io.InputStream JavaDoc;
20 import java.net.URL JavaDoc;
21 import java.util.AbstractList JavaDoc;
22 import java.util.ArrayList JavaDoc;
23 import java.util.HashMap JavaDoc;
24 import java.util.Iterator JavaDoc;
25 import java.util.Map JavaDoc;
26 import java.util.NoSuchElementException JavaDoc;
27 import java.util.StringTokenizer JavaDoc;
28
29 import javax.xml.parsers.SAXParser JavaDoc;
30 import javax.xml.parsers.SAXParserFactory JavaDoc;
31
32 import org.apache.commons.logging.Log;
33 import org.mortbay.log.LogFactory;
34 import org.mortbay.util.LazyList;
35 import org.mortbay.util.LogSupport;
36 import org.xml.sax.Attributes JavaDoc;
37 import org.xml.sax.InputSource JavaDoc;
38 import org.xml.sax.SAXException JavaDoc;
39 import org.xml.sax.SAXParseException JavaDoc;
40 import org.xml.sax.XMLReader JavaDoc;
41 import org.xml.sax.helpers.DefaultHandler JavaDoc;
42
43 /*--------------------------------------------------------------*/
44 /**
45  * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
46  * entity handlers and a mini dom-like document tree.
47  * <P>
48  * By default, the parser is created as a validating parser. This can be changed by setting the
49  * "org.mortbay.xml.XmlParser.NotValidating" system property to true.
50  *
51  * @version $Id: XmlParser.java,v 1.21 2005/10/25 07:53:22 gregwilkins Exp $
52  * @author Greg Wilkins (gregw)
53  */

54 public class XmlParser
55 {
56     private static Log log=LogFactory.getLog(XmlParser.class);
57     private Map JavaDoc _redirectMap=new HashMap JavaDoc();
58     private SAXParser JavaDoc _parser;
59     private String JavaDoc _xpath;
60     private Object JavaDoc _xpaths;
61
62     /* ------------------------------------------------------------ */
63     /**
64      * Construct
65      */

66     public XmlParser()
67     {
68         try
69         {
70             SAXParserFactory JavaDoc factory=SAXParserFactory.newInstance();
71             boolean notValidating=Boolean.getBoolean("org.mortbay.xml.XmlParser.NotValidating");
72             factory.setValidating(!notValidating);
73             _parser=factory.newSAXParser();
74             try
75             {
76                 if(!notValidating)
77                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema",true);
78             }
79             catch(Exception JavaDoc e)
80             {
81                 log.warn("Schema validation may not be supported");
82                 log.debug("",e);
83                 notValidating=true;
84             }
85             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation",!notValidating);
86             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces",!notValidating);
87             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",!notValidating);
88         }
89         catch(Exception JavaDoc e)
90         {
91             log.warn(LogSupport.EXCEPTION,e);
92             throw new Error JavaDoc(e.toString());
93         }
94     }
95
96     /* ------------------------------------------------------------ */
97     /**
98      * Constructor.
99      */

100     public XmlParser(boolean validating)
101     {
102         try
103         {
104             SAXParserFactory JavaDoc factory=SAXParserFactory.newInstance();
105             factory.setValidating(validating);
106             _parser=factory.newSAXParser();
107             try
108             {
109                 if(validating)
110                     _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema",validating);
111             }
112             catch(Exception JavaDoc e)
113             {
114                 if(validating)
115                     log.warn("Schema validation may not be supported: ",e);
116                 else
117                     LogSupport.ignore(log,e);
118             }
119             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation",validating);
120             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces",validating);
121             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",validating);
122         }
123         catch(Exception JavaDoc e)
124         {
125             log.warn(LogSupport.EXCEPTION,e);
126             throw new Error JavaDoc(e.toString());
127         }
128     }
129
130     /* ------------------------------------------------------------ */
131     /**
132      * @param name
133      * @param entity
134      */

135     public synchronized void redirectEntity(String JavaDoc name,URL JavaDoc entity)
136     {
137         if(entity!=null)
138             _redirectMap.put(name,entity);
139     }
140     
141     /* ------------------------------------------------------------ */
142     /**
143      *
144      * @return Returns the xpath.
145      */

146     public String JavaDoc getXpath()
147     {
148         return _xpath;
149     }
150     
151     /* ------------------------------------------------------------ */
152     /** Set an XPath
153      * A very simple subset of xpath is supported to select a partial
154      * tree. Currently only path like "/node1/nodeA | /node1/nodeB"
155      * are supported.
156      * @param xpath The xpath to set.
157      */

158     public void setXpath(String JavaDoc xpath)
159     {
160         _xpath = xpath;
161         StringTokenizer JavaDoc tok = new StringTokenizer JavaDoc(xpath,"| ");
162         while(tok.hasMoreTokens())
163             _xpaths=LazyList.add(_xpaths, tok.nextToken());
164     }
165     
166     /* ------------------------------------------------------------ */
167     public synchronized Node parse(InputSource JavaDoc source) throws IOException JavaDoc,SAXException JavaDoc
168     {
169         Handler JavaDoc handler=new Handler JavaDoc();
170         XMLReader JavaDoc reader=_parser.getXMLReader();
171         reader.setContentHandler(handler);
172         reader.setErrorHandler(handler);
173         reader.setEntityResolver(handler);
174         if(log.isDebugEnabled())
175             log.debug("parsing: sid="+source.getSystemId()+",pid="+source.getPublicId());
176         _parser.parse(source,handler);
177         if(handler._error!=null)
178             throw handler._error;
179         Node doc=(Node)handler._top.get(0);
180         handler.clear();
181         return doc;
182     }
183
184     /* ------------------------------------------------------------ */
185     /**
186      * Parse string URL.
187      */

188     public synchronized Node parse(String JavaDoc url) throws IOException JavaDoc,SAXException JavaDoc
189     {
190         if(log.isDebugEnabled())
191             log.debug("parse: "+url);
192         return parse(new InputSource JavaDoc(url));
193     }
194
195     /* ------------------------------------------------------------ */
196     /**
197      * Parse File.
198      */

199     public synchronized Node parse(File JavaDoc file) throws IOException JavaDoc,SAXException JavaDoc
200     {
201         if(log.isDebugEnabled())
202             log.debug("parse: "+file);
203         return parse(new InputSource JavaDoc(file.toURL().toString()));
204     }
205
206     /* ------------------------------------------------------------ */
207     /**
208      * Parse InputStream.
209      */

210     public synchronized Node parse(InputStream JavaDoc in) throws IOException JavaDoc,SAXException JavaDoc
211     {
212         Handler JavaDoc handler=new Handler JavaDoc();
213         XMLReader JavaDoc reader=_parser.getXMLReader();
214         reader.setContentHandler(handler);
215         reader.setErrorHandler(handler);
216         reader.setEntityResolver(handler);
217         _parser.parse(new InputSource JavaDoc(in),handler);
218         if(handler._error!=null)
219             throw handler._error;
220         Node doc=(Node)handler._top.get(0);
221         handler.clear();
222         return doc;
223     }
224
225     /* ------------------------------------------------------------ */
226     /**
227      * Parse URL.
228      */

229     public synchronized Node parse(URL JavaDoc url) throws IOException JavaDoc,SAXException JavaDoc
230     {
231         Node n=null;
232         InputStream JavaDoc is=url.openStream();
233         try
234         {
235             n=parse(is);
236         }
237         finally
238         {
239             try
240             {
241                 is.close();
242             }
243             catch(Exception JavaDoc e)
244             {
245                 // xerces closes streams you give it to parse, so this close() will throw an
246
// exception.
247
// This behavior is stupid, so we should not assume it.
248
}
249         }
250         return n;
251     }
252
253
254
255     /* ------------------------------------------------------------ */
256     /* ------------------------------------------------------------ */
257     private class NoopHandler extends DefaultHandler JavaDoc
258     {
259         Handler JavaDoc _next;
260         int _depth;
261         
262         NoopHandler(Handler JavaDoc next)
263         {
264             this._next=next;
265         }
266
267         /* ------------------------------------------------------------ */
268         public void startElement(String JavaDoc uri,String JavaDoc localName,String JavaDoc qName,Attributes JavaDoc attrs) throws SAXException JavaDoc
269         {
270             _depth++;
271         }
272
273         /* ------------------------------------------------------------ */
274         public void endElement(String JavaDoc uri,String JavaDoc localName,String JavaDoc qName) throws SAXException JavaDoc
275         {
276             if (_depth==0)
277                 _parser.getXMLReader().setContentHandler(_next);
278             else
279                 _depth--;
280         }
281     }
282     
283     /* ------------------------------------------------------------ */
284     /* ------------------------------------------------------------ */
285     private class Handler extends DefaultHandler JavaDoc
286     {
287         Node _top=new Node(null,null,null);
288         SAXParseException JavaDoc _error;
289         private Node _context=_top;
290         private NoopHandler _noop;
291
292         Handler()
293         {
294             _noop = new NoopHandler(this);
295         }
296         
297         /* ------------------------------------------------------------ */
298         void clear()
299         {
300             _top=null;
301             _error=null;
302             _context=null;
303         }
304
305         /* ------------------------------------------------------------ */
306         public void startElement(String JavaDoc uri,String JavaDoc localName,String JavaDoc qName,Attributes JavaDoc attrs) throws SAXException JavaDoc
307         {
308             String JavaDoc name=(uri==null||uri.equals(""))?qName:localName;
309             Node node=new Node(_context,name,attrs);
310             
311             // check if the node matches any xpaths set?
312
if (_xpaths!=null)
313             {
314                 String JavaDoc path=node.getPath();
315                 boolean match=false;
316                 for (int i=LazyList.size(_xpaths);!match&&i-->0;)
317                 {
318                     String JavaDoc xpath=(String JavaDoc)LazyList.get(_xpaths,i);
319                     
320                     match=path.equals(xpath) ||
321                           xpath.startsWith(path) && xpath.length()>path.length() && xpath.charAt(path.length())=='/';
322                 }
323                 
324                 if (match)
325                 {
326                     _context.add(node);
327                     _context=node;
328                 }
329                 else
330                 {
331                     _parser.getXMLReader().setContentHandler(_noop);
332                 }
333             }
334             else
335             {
336                 _context.add(node);
337                 _context=node;
338             }
339         }
340
341         /* ------------------------------------------------------------ */
342         public void endElement(String JavaDoc uri,String JavaDoc localName,String JavaDoc qName) throws SAXException JavaDoc
343         {
344             _context=_context._parent;
345         }
346
347         /* ------------------------------------------------------------ */
348         public void ignorableWhitespace(char buf[],int offset,int len) throws SAXException JavaDoc
349         {
350         }
351
352         /* ------------------------------------------------------------ */
353         public void characters(char buf[],int offset,int len) throws SAXException JavaDoc
354         {
355             _context.add(new String JavaDoc(buf,offset,len));
356         }
357
358         /* ------------------------------------------------------------ */
359         public void warning(SAXParseException JavaDoc ex)
360         {
361             log.debug(LogSupport.EXCEPTION,ex);
362             log.warn("WARNING@"+getLocationString(ex)+" : "+ex.toString());
363         }
364
365         /* ------------------------------------------------------------ */
366         public void error(SAXParseException JavaDoc ex) throws SAXException JavaDoc
367         {
368             // Save error and continue to report other errors
369
if(_error==null)
370                 _error=ex;
371             log.debug(LogSupport.EXCEPTION,ex);
372             log.warn("ERROR@"+getLocationString(ex)+" : "+ex.toString());
373         }
374
375         /* ------------------------------------------------------------ */
376         public void fatalError(SAXParseException JavaDoc ex) throws SAXException JavaDoc
377         {
378             _error=ex;
379             log.debug(LogSupport.EXCEPTION,ex);
380             log.warn("FATAL@"+getLocationString(ex)+" : "+ex.toString());
381             throw ex;
382         }
383
384         /* ------------------------------------------------------------ */
385         private String JavaDoc getLocationString(SAXParseException JavaDoc ex)
386         {
387             return ex.getSystemId()+" line:"+ex.getLineNumber()+" col:"+ex.getColumnNumber();
388         }
389
390         /* ------------------------------------------------------------ */
391         public InputSource JavaDoc resolveEntity(String JavaDoc pid,String JavaDoc sid)
392         {
393             if(log.isDebugEnabled())
394                 log.debug("resolveEntity("+pid+", "+sid+")");
395             URL JavaDoc entity=null;
396             if(pid!=null)
397                 entity=(URL JavaDoc)_redirectMap.get(pid);
398             if(entity==null)
399                 entity=(URL JavaDoc)_redirectMap.get(sid);
400             if(entity==null)
401             {
402                 String JavaDoc dtd=sid;
403                 if(dtd.lastIndexOf('/')>=0)
404                     dtd=dtd.substring(dtd.lastIndexOf('/')+1);
405                 if(log.isDebugEnabled())
406                     log.debug("Can't exact match entity in redirect map, trying "+dtd);
407                 entity=(URL JavaDoc)_redirectMap.get(dtd);
408             }
409             if(entity!=null)
410             {
411                 try
412                 {
413                     InputStream JavaDoc in=entity.openStream();
414                     if(log.isDebugEnabled())
415                         log.debug("Redirected entity "+sid+" --> "+entity);
416                     InputSource JavaDoc is=new InputSource JavaDoc(in);
417                     is.setSystemId(sid);
418                     return is;
419                 }
420                 catch(IOException JavaDoc e)
421                 {
422                     LogSupport.ignore(log,e);
423                 }
424             }
425             return null;
426         }
427     }
428
429
430
431     /* ------------------------------------------------------------ */
432     /* ------------------------------------------------------------ */
433     /**
434      * XML Attribute.
435      */

436     public static class Attribute
437     {
438         private String JavaDoc _name;
439         private String JavaDoc _value;
440
441         Attribute(String JavaDoc n,String JavaDoc v)
442         {
443             _name=n;
444             _value=v;
445         }
446
447         public String JavaDoc getName()
448         {
449             return _name;
450         }
451
452         public String JavaDoc getValue()
453         {
454             return _value;
455         }
456     }
457
458
459
460     /* ------------------------------------------------------------ */
461     /* ------------------------------------------------------------ */
462     /**
463      * XML Node. Represents an XML element with optional attributes and ordered content.
464      */

465     public static class Node extends AbstractList JavaDoc
466     {
467         Node _parent;
468         private ArrayList JavaDoc _list;
469         private String JavaDoc _tag;
470         private Attribute[] _attrs;
471         private boolean _lastString=false;
472         private String JavaDoc _path;
473
474         /* ------------------------------------------------------------ */
475         Node(Node parent,String JavaDoc tag,Attributes JavaDoc attrs)
476         {
477             _parent=parent;
478             _tag=tag;
479             if(attrs!=null)
480             {
481                 _attrs=new Attribute[attrs.getLength()];
482                 for(int i=0;i<attrs.getLength();i++)
483                 {
484                     String JavaDoc name=attrs.getLocalName(i);
485                     if(name==null||name.equals(""))
486                         name=attrs.getQName(i);
487                     _attrs[i]=new Attribute(name,attrs.getValue(i));
488                 }
489             }
490         }
491
492         /* ------------------------------------------------------------ */
493         public Node getParent()
494         {
495             return _parent;
496         }
497
498         /* ------------------------------------------------------------ */
499         public String JavaDoc getTag()
500         {
501             return _tag;
502         }
503
504         /* ------------------------------------------------------------ */
505         public String JavaDoc getPath()
506         {
507             if (_path==null)
508             {
509                 if (getParent()!=null && getParent().getTag()!=null)
510                     _path= getParent().getPath()+"/"+_tag;
511                 else
512                     _path ="/"+_tag;
513             }
514             return _path;
515         }
516
517         /* ------------------------------------------------------------ */
518         /**
519          * Get an array of element attributes.
520          */

521         public Attribute[] getAttributes()
522         {
523             return _attrs;
524         }
525
526         /* ------------------------------------------------------------ */
527         /**
528          * Get an element attribute.
529          *
530          * @return attribute or null.
531          */

532         public String JavaDoc getAttribute(String JavaDoc name)
533         {
534             return getAttribute(name,null);
535         }
536
537         /* ------------------------------------------------------------ */
538         /**
539          * Get an element attribute.
540          *
541          * @return attribute or null.
542          */

543         public String JavaDoc getAttribute(String JavaDoc name,String JavaDoc dft)
544         {
545             if(_attrs==null||name==null)
546                 return dft;
547             for(int i=0;i<_attrs.length;i++)
548                 if(name.equals(_attrs[i].getName()))
549                     return _attrs[i].getValue();
550             return dft;
551         }
552
553         /* ------------------------------------------------------------ */
554         /**
555          * Get the number of children nodes.
556          */

557         public int size()
558         {
559             if(_list!=null)
560                 return _list.size();
561             return 0;
562         }
563
564         /* ------------------------------------------------------------ */
565         /**
566          * Get the ith child node or content.
567          *
568          * @return Node or String.
569          */

570         public Object JavaDoc get(int i)
571         {
572             if(_list!=null)
573                 return _list.get(i);
574             return null;
575         }
576
577         /* ------------------------------------------------------------ */
578         /**
579          * Get the first child node with the tag.
580          *
581          * @param tag
582          * @return Node or null.
583          */

584         public Node get(String JavaDoc tag)
585         {
586             if(_list!=null)
587             {
588                 for(int i=0;i<_list.size();i++)
589                 {
590                     Object JavaDoc o=_list.get(i);
591                     if(o instanceof Node)
592                     {
593                         Node n=(Node)o;
594                         if(tag.equals(n._tag))
595                             return n;
596                     }
597                 }
598             }
599             return null;
600         }
601
602         /* ------------------------------------------------------------ */
603         public void add(int i,Object JavaDoc o)
604         {
605             if(_list==null)
606                 _list=new ArrayList JavaDoc();
607             if(o instanceof String JavaDoc)
608             {
609                 if(_lastString)
610                 {
611                     int last=_list.size()-1;
612                     _list.set(last,(String JavaDoc)_list.get(last)+o);
613                 }
614                 else
615                     _list.add(i,o);
616                 _lastString=true;
617             }
618             else
619             {
620                 _lastString=false;
621                 _list.add(i,o);
622             }
623         }
624
625         /* ------------------------------------------------------------ */
626         public void clear()
627         {
628             if(_list!=null)
629                 _list.clear();
630             _list=null;
631         }
632
633         /* ------------------------------------------------------------ */
634         /**
635          * Get a tag as a string.
636          *
637          * @param tag The tag to get
638          * @param tags IF true, tags are included in the value.
639          * @param trim If true, trim the value.
640          * @return results of get(tag).toString(tags).
641          */

642         public String JavaDoc getString(String JavaDoc tag,boolean tags,boolean trim)
643         {
644             Node node=get(tag);
645             if(node==null)
646                 return null;
647             String JavaDoc s=node.toString(tags);
648             if(s!=null&&trim)
649                 s=s.trim();
650             return s;
651         }
652
653         /* ------------------------------------------------------------ */
654         public synchronized String JavaDoc toString()
655         {
656             return toString(true);
657         }
658
659         /* ------------------------------------------------------------ */
660         /**
661          * Convert to a string.
662          *
663          * @param tag If false, only content is shown.
664          */

665         public synchronized String JavaDoc toString(boolean tag)
666         {
667             StringBuffer JavaDoc buf=new StringBuffer JavaDoc();
668             synchronized(buf)
669             {
670                 toString(buf,tag);
671                 return buf.toString();
672             }
673         }
674
675         /* ------------------------------------------------------------ */
676         /**
677          * Convert to a string.
678          *
679          * @param tag If false, only content is shown.
680          */

681         public synchronized String JavaDoc toString(boolean tag,boolean trim)
682         {
683             String JavaDoc s=toString(tag);
684             if(s!=null&&trim)
685                 s=s.trim();
686             return s;
687         }
688
689         /* ------------------------------------------------------------ */
690         private synchronized void toString(StringBuffer JavaDoc buf,boolean tag)
691         {
692             if(tag)
693             {
694                 buf.append("<");
695                 buf.append(_tag);
696                 if(_attrs!=null)
697                 {
698                     for(int i=0;i<_attrs.length;i++)
699                     {
700                         buf.append(' ');
701                         buf.append(_attrs[i].getName());
702                         buf.append("=\"");
703                         buf.append(_attrs[i].getValue());
704                         buf.append("\"");
705                     }
706                 }
707             }
708             if(_list!=null)
709             {
710                 if(tag)
711                     buf.append(">");
712                 for(int i=0;i<_list.size();i++)
713                 {
714                     Object JavaDoc o=_list.get(i);
715                     if(o==null)
716                         continue;
717                     if(o instanceof Node)
718                         ((Node)o).toString(buf,tag);
719                     else
720                         buf.append(o.toString());
721                 }
722                 if(tag)
723                 {
724                     buf.append("</");
725                     buf.append(_tag);
726                     buf.append(">");
727                 }
728             }
729             else if(tag)
730                 buf.append("/>");
731         }
732
733         /* ------------------------------------------------------------ */
734         /**
735          * Iterator over named child nodes.
736          *
737          * @param tag The tag of the nodes.
738          * @return Iterator over all child nodes with the specified tag.
739          */

740         public Iterator JavaDoc iterator(final String JavaDoc tag)
741         {
742             return new Iterator JavaDoc()
743             {
744                 int c=0;
745                 Node _node;
746
747                 /* -------------------------------------------------- */
748                 public boolean hasNext()
749                 {
750                     if(_node!=null)
751                         return true;
752                     while(_list!=null&&c<_list.size())
753                     {
754                         Object JavaDoc o=_list.get(c);
755                         if(o instanceof Node)
756                         {
757                             Node n=(Node)o;
758                             if(tag.equals(n._tag))
759                             {
760                                 _node=n;
761                                 return true;
762                             }
763                         }
764                         c++;
765                     }
766                     return false;
767                 }
768
769                 /* -------------------------------------------------- */
770                 public Object JavaDoc next()
771                 {
772                     try
773                     {
774                         if(hasNext())
775                             return _node;
776                         throw new NoSuchElementException JavaDoc();
777                     }
778                     finally
779                     {
780                         _node=null;
781                         c++;
782                     }
783                 }
784
785                 /* -------------------------------------------------- */
786                 public void remove()
787                 {
788                     throw new UnsupportedOperationException JavaDoc("Not supported");
789                 }
790             };
791         }
792     }
793 }
794
Popular Tags