KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > xml2 > XMLReaderImpl


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  * Free SoftwareFoundation, Inc.
23  * 59 Temple Place, Suite 330
24  * Boston, MA 02111-1307 USA
25  *
26  * @author Scott Ferguson
27  */

28
29 package com.caucho.xml2;
30
31 import com.caucho.util.L10N;
32 import com.caucho.vfs.ReadStream;
33 import com.caucho.vfs.TempCharBuffer;
34 import com.caucho.vfs.Vfs;
35 import com.caucho.xml.ExtendedLocator;
36 import com.caucho.xml.QName;
37 import com.caucho.xml.XmlChar;
38
39 import org.xml.sax.*;
40
41 import java.io.IOException JavaDoc;
42 import java.io.InputStream JavaDoc;
43 import java.io.Reader JavaDoc;
44 import java.util.HashMap JavaDoc;
45
46 /**
47  * A fast XML parser.
48  */

49 public class XMLReaderImpl implements XMLReader {
50   private static final L10N L = new L10N(XMLReaderImpl.class);
51   
52   // Xerces uses the following
53
public static final String JavaDoc XMLNS = "http://www.w3.org/2000/xmlns/";
54
55   static final QName DOC_NAME = new QName(null, "#document", null);
56   static final QName TEXT_NAME = new QName(null, "#text", null);
57   static final QName JSP_NAME = new QName(null, "#jsp", null);
58   static final QName WHITESPACE_NAME = new QName(null, "#whitespace", null);
59   static final QName JSP_ATTRIBUTE_NAME = new QName("xtp", "jsp-attribute", null);
60   static final String JavaDoc LEXICAL_HANDLER = "http://xml.org/sax/properties/lexical-handler";
61
62   private static final boolean []XML_NAME_CHAR;
63
64   private ContentHandler _contentHandler;
65   private EntityResolver _entityResolver;
66   private DTDHandler _dtdHandler;
67   private ErrorHandler _errorHandler;
68
69   private Reader JavaDoc _reader;
70
71   private final AttributesImpl _attributes = new AttributesImpl();
72   private final ExtendedLocator _locator = new LocatorImpl();
73
74   private final Intern _intern = new Intern();
75
76   private final HashMap JavaDoc<NameKey,QName> _nameMap
77     = new HashMap JavaDoc<NameKey,QName>();
78
79   private final NameKey _nameKey = new NameKey();
80
81   private char []_valueBuf;
82   private char []_inputBuf;
83   private int _inputOffset;
84   private int _inputLength;
85
86   private String JavaDoc _filename;
87   private String JavaDoc _systemId;
88   private String JavaDoc _publicId;
89   private int _line;
90
91   /**
92    * Returns a SAX feature.
93    *
94    * <p>All XMLReaders are required to recognize the
95    * http://xml.org/sax/features/namespaces and the
96    * http://xml.org/sax/features/namespace-prefixes feature names.</p>
97    */

98   public boolean getFeature(String JavaDoc name)
99     throws SAXNotRecognizedException, SAXNotSupportedException
100   {
101     throw new SAXNotRecognizedException(name);
102   }
103
104   /**
105    * Sets a SAX property.
106    */

107   public void setProperty(String JavaDoc name, Object JavaDoc value)
108     throws SAXNotRecognizedException, SAXNotSupportedException
109   {
110     if (LEXICAL_HANDLER.equals(name)) {
111     }
112     else
113       throw new SAXNotRecognizedException(name);
114   }
115
116   /**
117    * Returns a SAX property.
118    */

119   public Object JavaDoc getProperty(String JavaDoc name)
120     throws SAXNotRecognizedException, SAXNotSupportedException
121   {
122     throw new SAXNotRecognizedException(name);
123   }
124
125   /**
126    * Sets a SAX feature.
127    */

128   public void setFeature(String JavaDoc name, boolean value)
129     throws SAXNotRecognizedException, SAXNotSupportedException
130   {
131     throw new SAXNotRecognizedException(name);
132   }
133   
134   /**
135    * Sets the SAX entityResolver.
136    *
137    * @param resolver the entity resolver
138    */

139   public void setEntityResolver(EntityResolver resolver)
140   {
141     _entityResolver = resolver;
142   }
143   
144   /**
145    * Gets the SAX entityResolver.
146    *
147    * @return the entity resolver
148    */

149   public EntityResolver getEntityResolver()
150   {
151     return _entityResolver;
152   }
153
154   /**
155    * Sets the SAX DTD handler
156    *
157    * @param handler the dtd handler
158    */

159   public void setDTDHandler(DTDHandler handler)
160   {
161     _dtdHandler = handler;
162   }
163
164   /**
165    * Gets the SAX DTD handler
166    *
167    * @return the dtd handler
168    */

169   public DTDHandler getDTDHandler()
170   {
171     return _dtdHandler;
172   }
173
174   /**
175    * Sets the SAX content handler
176    *
177    * @param handler the content handler
178    */

179   public void setContentHandler(ContentHandler handler)
180   {
181     _contentHandler = handler;
182   }
183
184   /**
185    * Gets the SAX content handler
186    *
187    * @param handler the content handler
188    */

189   public ContentHandler getContentHandler()
190   {
191     return _contentHandler;
192   }
193
194   /**
195    * Sets the SAX errorHandler.
196    *
197    * @param handler the error handler
198    */

199   public void setErrorHandler(ErrorHandler handler)
200   {
201     _errorHandler = handler;
202   }
203
204   /**
205    * Gets the SAX errorHandler.
206    *
207    * @param handler the error handler
208    */

209   public ErrorHandler getErrorHandler()
210   {
211     return _errorHandler;
212   }
213   
214   /**
215    * parses the input source.
216    *
217    * @param source the source to parse from
218    */

219   public void parse(InputSource source)
220     throws IOException JavaDoc, SAXException
221   {
222     InputStream JavaDoc is = source.getByteStream();
223     if (is != null) {
224       _systemId = source.getSystemId();
225       
226       if (is instanceof ReadStream) {
227     _filename = ((ReadStream) is).getPath().getUserPath();
228     if (_systemId == null)
229       _systemId = ((ReadStream) is).getPath().getURL();
230       }
231       else {
232     _filename = _systemId;
233       }
234
235       _reader = new java.io.InputStreamReader JavaDoc(is);
236       
237       parseImpl();
238     }
239     else
240       throw new IllegalArgumentException JavaDoc();
241   }
242   
243   /**
244    * Parses the file at the given string
245    *
246    * @param url the source url to parse from
247    */

248   public void parse(String JavaDoc systemId)
249     throws IOException JavaDoc, SAXException
250   {
251     ReadStream is = Vfs.lookup(systemId).openRead();
252
253     _reader = is.getReader();
254     _systemId = systemId;
255     _filename = systemId;
256     try {
257       parseImpl();
258     } finally {
259       _reader = null;
260     }
261   }
262   
263   /**
264    * Parses the file at the given string
265    *
266    * @param url the source url to parse from
267    */

268   private void parseImpl()
269     throws IOException JavaDoc, SAXException
270   {
271     TempCharBuffer inputBuffer = TempCharBuffer.allocate();
272     TempCharBuffer valueBuffer = TempCharBuffer.allocate();
273     try {
274       _valueBuf = valueBuffer.getBuffer();
275       _inputBuf = inputBuffer.getBuffer();
276       _inputLength = 0;
277       _inputOffset = 0;
278       _line = 1;
279
280       _contentHandler.setDocumentLocator(_locator);
281       _contentHandler.startDocument();
282
283       parseContent();
284       
285       _contentHandler.endDocument();
286     } finally {
287       _inputBuf = null;
288       _valueBuf = null;
289       
290       TempCharBuffer.free(inputBuffer);
291       TempCharBuffer.free(valueBuffer);
292     }
293   }
294
295   /**
296    * Parses XML content.
297    */

298   private void parseContent()
299     throws IOException JavaDoc, SAXException
300   {
301     char []inputBuf = _inputBuf;
302     char []valueBuffer = _valueBuf;
303     int valueLength = valueBuffer.length;
304     int valueOffset = 0;
305
306     boolean isWhitespace = true;
307     boolean seenCr = false;
308
309     while (true) {
310       if (_inputLength == _inputOffset && ! fillBuffer()) {
311     writeText(valueBuffer, valueOffset, isWhitespace);
312     return;
313       }
314
315       char ch = inputBuf[_inputOffset++];
316
317       switch (ch) {
318       case ' ': case '\t':
319     if (valueOffset < valueLength)
320       valueBuffer[valueOffset++] = ch;
321     else {
322       writeText(valueBuffer, valueOffset, isWhitespace);
323       valueOffset = 0;
324     }
325     break;
326
327       case '\n':
328     if (valueOffset < valueLength)
329       valueBuffer[valueOffset++] = ch;
330     else {
331       writeText(valueBuffer, valueOffset, isWhitespace);
332       valueOffset = 0;
333     }
334     _line++;
335     break;
336
337       case '\r':
338     if (valueOffset < valueLength)
339       valueBuffer[valueOffset++] = ch;
340     else {
341       writeText(valueBuffer, valueOffset, isWhitespace);
342       valueOffset = 0;
343     }
344
345     addCarriageReturnLine();
346     break;
347
348       case '<':
349     if (valueOffset > 0) {
350       writeText(valueBuffer, valueOffset, isWhitespace);
351       valueOffset = 0;
352     }
353     
354     if (_inputLength == _inputOffset && ! fillBuffer())
355       error("XXX: unexpected eof");
356
357     ch = inputBuf[_inputOffset];
358     switch (ch) {
359     case '!':
360       break;
361     case '?':
362       break;
363     case '/':
364       _inputOffset++;
365       return;
366     default:
367       parseElement();
368       break;
369     }
370
371     isWhitespace = true;
372     break;
373
374       case '&':
375     if (valueOffset > 0) {
376       writeText(valueBuffer, valueOffset, isWhitespace);
377       valueOffset = 0;
378     }
379     isWhitespace = true;
380     break;
381
382       default:
383     isWhitespace = false;
384     if (valueOffset < valueLength)
385       valueBuffer[valueOffset++] = ch;
386     else {
387       writeText(valueBuffer, valueOffset, false);
388       valueOffset = 0;
389     }
390     break;
391       }
392     }
393   }
394
395   /**
396    * Parses the element.
397    */

398   private void parseElement()
399     throws IOException JavaDoc, SAXException
400   {
401     InternQName qName = parseName();
402     String JavaDoc name = qName.getName();
403
404     _attributes.clear();
405
406     while (true) {
407       int ch = read();
408
409       switch (ch) {
410       case -1:
411     throw error("XXX: unexpected eof");
412
413       case ' ': case '\t':
414     break;
415
416       case '\r':
417     addCarriageReturnLine();
418     break;
419
420       case '\n':
421     _line++;
422     break;
423
424       case '/':
425     if ((ch = read()) != '>')
426       throw error("XXX: expected '>'");
427
428     _contentHandler.startElement("", "", name, _attributes);
429     _contentHandler.endElement("", "", name);
430
431     return;
432     
433       case '>':
434     _contentHandler.startElement("", "", name, _attributes);
435
436     parseContent();
437
438     InternQName tailQName = parseName();
439     String JavaDoc tailName = tailQName.getName();
440
441     if ((ch = read()) != '>')
442       throw error("XXX: expected '>'");
443
444     if (! name.equals(tailName))
445       throw error("XXX: mismatch name");
446
447     _contentHandler.endElement("", "", name);
448     
449     return;
450
451       default:
452     if (XmlChar.isNameStart(ch)) {
453       unread();
454       
455       InternQName attrName = parseName();
456       ch = skipWhitespace(read());
457
458       if (ch != '=')
459         throw error(L.l("Expected '=' for attribute value at {0}.",
460               badChar(ch)));
461
462       String JavaDoc attrValue = parseValue();
463
464       _attributes.add(attrName, attrValue);
465     }
466     else
467       throw error(L.l("{0} is an unexpected character in element.",
468               badChar(ch)));
469       }
470     }
471   }
472
473   /**
474    * Parses a name.
475    */

476   private QName parseAttrName()
477     throws IOException JavaDoc
478   {
479     int valueOffset = 0;
480
481     char []inputBuf = _inputBuf;
482     char []valueBuf = _valueBuf;
483
484     int inputLength = _inputLength;
485     int inputOffset = _inputOffset;
486
487     while (true) {
488       if (inputOffset < inputLength) {
489       }
490       else if (fillBuffer()) {
491     inputLength = _inputLength;
492     inputOffset = 0;
493       }
494       else {
495     _nameKey.init(valueBuf, 0, valueOffset);
496
497     QName name = _nameMap.get(_nameKey);
498
499     if (name == null) {
500       name = new QName(new String JavaDoc(valueBuf, 0, valueOffset), null);
501       _nameMap.put(new NameKey(valueBuf, 0, valueOffset), name);
502     }
503
504     return name;
505       }
506
507       char ch = inputBuf[inputOffset++];
508
509       if (XML_NAME_CHAR[ch])
510     valueBuf[valueOffset++] = ch;
511       else if (ch == ':') {
512     valueBuf[valueOffset++] = ch;
513       }
514       else {
515     _inputOffset = inputOffset - 1;
516
517     QName name = _nameMap.get(_nameKey);
518
519     if (name == null) {
520       name = new QName(new String JavaDoc(valueBuf, 0, valueOffset), null);
521       _nameMap.put(new NameKey(valueBuf, 0, valueOffset), name);
522     }
523
524     return name;
525       }
526     }
527   }
528
529   /**
530    * Parses a name.
531    */

532   private InternQName parseName()
533     throws IOException JavaDoc
534   {
535     int valueOffset = 0;
536
537     char []inputBuf = _inputBuf;
538     char []valueBuf = _valueBuf;
539
540     int inputLength = _inputLength;
541     int inputOffset = _inputOffset;
542     int colon = 0;
543
544     while (true) {
545       if (inputOffset < inputLength) {
546     char ch = inputBuf[inputOffset++];
547
548     if (XML_NAME_CHAR[ch]) {
549       valueBuf[valueOffset++] = ch;
550     }
551     else if (ch == ':') {
552       if (colon <= 0)
553         colon = valueOffset;
554       
555       valueBuf[valueOffset++] = ch;
556     }
557     else {
558       _inputOffset = inputOffset - 1;
559
560       return _intern.add(valueBuf, 0, valueOffset, colon);
561     }
562       }
563       else if (fillBuffer()) {
564     inputLength = _inputLength;
565     inputOffset = 0;
566       }
567       else {
568     return _intern.add(valueBuf, 0, valueOffset, colon);
569       }
570     }
571   }
572
573   /**
574    * Writes text data.
575    */

576   private void writeText(char []buffer, int length, boolean isWhitespace)
577     throws SAXException
578   {
579   }
580
581   /**
582    * Adds the line for cr
583    */

584   private void addCarriageReturnLine()
585     throws IOException JavaDoc
586   {
587     if (_inputLength == _inputOffset && ! fillBuffer())
588       _line++;
589     else if (_inputBuf[_inputOffset] != '\n')
590       _line++;
591   }
592
593   /**
594    * Parses an attribute value.
595    */

596   private String JavaDoc parseValue()
597     throws IOException JavaDoc, SAXException
598   {
599     int end = skipWhitespace(read());
600
601     if (end != '\'' && end != '"')
602       throw error(L.l("expected quote at '{0}'", badChar(end)));
603
604     int index = 0;
605     char []inputBuf = _inputBuf;
606     char []valueBuf = _valueBuf;
607     
608     while (true) {
609       if (_inputLength == _inputOffset && ! fillBuffer())
610     throw error(L.l("Unexpected end of file in attribute value."));
611
612       char ch = inputBuf[_inputOffset++];
613       
614       switch (ch) {
615       case '&':
616     throw error(L.l("Can't handle entities yet."));
617
618       case '\r':
619     addCarriageReturnLine();
620     ch = ' ';
621     break;
622
623       case '\n':
624     _line++;
625     ch = ' ';
626     break;
627
628       case '\'': case '"':
629     if (ch == end)
630       return new String JavaDoc(valueBuf, 0, index);
631     break;
632       }
633
634       valueBuf[index++] = ch;
635     }
636   }
637
638   /**
639    * Skips whitespace, returning the next character.
640    */

641   private int skipWhitespace(int ch)
642     throws IOException JavaDoc
643   {
644     while (true) {
645       switch (ch) {
646       case -1:
647     return -1;
648     
649       case ' ': case '\t':
650     break;
651
652       case '\r':
653     addCarriageReturnLine();
654     break;
655
656       case '\n':
657     _line++;
658     break;
659     
660       default:
661     return ch;
662       }
663       
664       if (_inputLength == _inputOffset && ! fillBuffer())
665     return -1;
666
667       ch = _inputBuf[_inputOffset++];
668     }
669   }
670
671   /**
672    * Reads a character.
673    */

674   private int read()
675     throws IOException JavaDoc
676   {
677     if (_inputLength == _inputOffset && ! fillBuffer())
678       return -1;
679     else
680       return _inputBuf[_inputOffset++];
681   }
682
683   /**
684    * Reads a character.
685    */

686   private void unread()
687     throws IOException JavaDoc
688   {
689     _inputOffset--;
690   }
691
692   /**
693    * Fills the input buffer.
694    */

695   private boolean fillBuffer()
696     throws IOException JavaDoc
697   {
698     _inputOffset = 0;
699     _inputLength = _reader.read(_inputBuf, 0, _inputBuf.length);
700
701     return _inputLength > 0;
702   }
703
704   /**
705    * Returns a string for a bad char.
706    */

707   private String JavaDoc badChar(int ch)
708   {
709     return "" + (char) ch;
710   }
711
712   /**
713    * Returns an error.
714    */

715   private SAXException error(String JavaDoc msg)
716   {
717     return new SAXException(msg);
718   }
719
720   class LocatorImpl implements ExtendedLocator {
721     /**
722      * Returns the parser's system id.
723      */

724     public String JavaDoc getSystemId()
725     {
726       return _systemId;
727       /*
728       if (_parser._reader != null && _parser._reader.getSystemId() != null)
729         return _parser._reader.getSystemId();
730       else if (_parser.getSystemId() != null)
731         return _parser.getSystemId();
732       else if (_parser._reader != null && _parser._reader.getFilename() != null)
733         return _parser._reader.getFilename();
734       else if (_parser.getFilename() != null)
735         return _parser.getFilename();
736       else
737         return null;
738       */

739     }
740
741     /**
742      * Returns the parser's filename.
743      */

744     public String JavaDoc getFilename()
745     {
746       return _filename;
747       /*
748       if (_parser._reader != null && _parser._reader.getFilename() != null)
749         return _parser._reader.getFilename();
750       else if (_parser.getFilename() != null)
751         return _parser.getFilename();
752       else if (_parser._reader != null && _parser._reader.getSystemId() != null)
753         return _parser._reader.getSystemId();
754       else if (_parser.getSystemId() != null)
755         return _parser.getSystemId();
756       else
757         return null;
758       */

759     }
760
761     /**
762      * Returns the public id.
763      */

764     public String JavaDoc getPublicId()
765     {
766       return _publicId;
767       /*
768       if (_parser._reader != null)
769         return _parser._reader.getPublicId();
770       else
771         return _parser.getPublicId();
772       */

773     }
774
775     /**
776      * Returns the line number.
777      */

778     public int getLineNumber()
779     {
780       return _line;
781       /*
782       if (_parser._reader != null)
783         return _parser._reader.getLine();
784       else
785         return _parser.getLineNumber();
786       */

787     }
788
789     /**
790      * Returns the column.
791      */

792     public int getColumnNumber()
793     {
794       return -1;
795     }
796   }
797
798   static class NameKey {
799     char []_buf;
800     int _offset;
801     int _length;
802
803     NameKey()
804     {
805     }
806
807     NameKey(char []buf, int offset, int length)
808     {
809       _buf = new char[length];
810       System.arraycopy(buf, offset, _buf, 0, length);
811       _offset = 0;
812       _length = 0;
813     }
814
815     void init(char []buf, int offset, int length)
816     {
817       _buf = buf;
818       _offset = offset;
819       _length = length;
820     }
821
822     @Override JavaDoc
823     public int hashCode()
824     {
825       int hash = 37;
826
827       char buf[] = _buf;
828       for (int i = _length - 1; i >= 0; i--)
829     hash = 65537 * hash + buf[i];
830
831       return hash;
832     }
833
834     @Override JavaDoc
835     public boolean equals(Object JavaDoc o)
836     {
837       NameKey key = (NameKey) o;
838
839       int length = _length;
840       if (length != key._length)
841     return false;
842
843       char []aBuf = _buf;
844       char []bBuf = key._buf;
845
846       int aOffset = _offset;
847       int bOffset = key._offset;
848
849       for (int i = 0; i < length; i++) {
850     if (aBuf[aOffset + i] != bBuf[bOffset + i])
851       return false;
852       }
853
854       return true;
855     }
856   }
857
858   static {
859     XML_NAME_CHAR = new boolean[65536];
860
861     for (int i = 0; i < 65536; i++) {
862       XML_NAME_CHAR[i] = XmlChar.isNameChar(i) && i != ':';
863     }
864   }
865 }
866
Popular Tags