KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > javolution > xml > pull > XmlPullParserImpl


1 /*
2  * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
3  * Copyright (C) 2005 - Javolution (http://javolution.org/)
4  * All rights reserved.
5  *
6  * Permission to use, copy, modify, and distribute this software is
7  * freely granted, provided that this notice is preserved.
8  */

9 package javolution.xml.pull;
10
11 import java.io.IOException;
12 import java.io.InputStream;
13 import java.io.InputStreamReader;
14 import java.io.Reader;
15 import java.io.UnsupportedEncodingException;
16
17 import j2me.lang.CharSequence;
18 import j2me.lang.IllegalStateException;
19 import j2me.nio.ByteBuffer;
20
21 import javolution.io.Utf8ByteBufferReader;
22 import javolution.io.Utf8StreamReader;
23 import javolution.lang.PersistentReference;
24 import javolution.lang.Reusable;
25 import javolution.lang.Text;
26 import javolution.lang.TypeFormat;
27 import javolution.realtime.ObjectFactory;
28 import javolution.util.FastComparator;
29 import javolution.util.FastTable;
30 import javolution.xml.sax.Attributes;
31 import javolution.xml.sax.AttributesImpl;
32
33 /**
34  * <p> This class provides a real-time XPP-like XML parser; this parser is
35  * <i>extremely</i> fast and <b>does not create temporary objects</b>
36  * (no garbage generated and no GC interruption).</p>
37  *
38  * <p> The parser input source can be either a {@link #setInput(Reader) Reader},
39  * an {@link #setInput(InputStream) InputStream} or even a {@link
40  * #setInput(ByteBuffer) ByteBuffer} (e.g. <code>MappedByteBuffer</code>).</p>
41  *
42  * <p> This parser is light (less than 15Kbytes compressed) and maintains
43  * a very small memory footprint while parsing (e.g. less than 10Kbytes
44  * while parsing 32Mbytes files). Typical applications include SOAP
45  * messaging, embedded/realtime systems, web servers (possibly thousands
46  * instances running concurrently), etc.</p>
47  *
48  * <p> Namespaces (SAX2 feature), comments, predefined entities
49  * (<code>&amp;amp;, &amp;lt;, &amp;gt;, &amp;apos;, &amp;quot;</code>)
50  * numeric character references (e.g. <code>&amp;#10;</code> for
51  * linefeed) and <code>CDATA</code> are recognized. Processing instructions,
52  * comments and entities declarations are ignored.</p>
53  *
54  * <p> The {@link CharSequence CharSequence} generated by this parser have
55  * the following characteristics:
56  * <ul>
57  * <li> They are immutable within their definition scope. The <code>
58  * CharSequence</code> created while parsing an XML element are
59  * reused only after the element is out-of-scope (idem for the
60  * {@link #getSaxAttributes attributes lists}).</li>
61  * <li> They support equality or lexical comparison with any
62  * <code>CharSequence</code> (e.g. <code>String</code>).</li>
63  * <li> They have the same hashcode than <code>String</code> and can be
64  * used to retrieve data from a <code>Map</code> (e.g.
65  * {@link javolution.util.FastMap FastMap}) for which
66  * the keys are <code>String</code> instances.</li>
67  * <li> Like any <code>CharSequence</code>, they can be parsed
68  * to primitive types (e.g. int, double) using the utility class
69  * {@link TypeFormat}.</li>
70  * </ul></p>
71  *
72  * <p> Finally, this parser does not break up character data during call back
73  * (the whole character data between markups is always being returned).</p>
74  *
75  * @author <a HREF="mailto:javolution@arakelian.com">Gregory Arakelian</a>
76  * @author <a HREF="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
77  * @version 3.3, May 10, 2005
78  */

79 public final class XmlPullParserImpl implements XmlPullParser, Reusable {
80
81     /**
82      * Holds a factory producing AttributesImpl instances.
83      */

84     private static final ObjectFactory ATTRIBUTES_IMPL_FACTORY = new ObjectFactory() {
85         protected Object create() {
86             return new AttributesImpl();
87         }
88     };
89
90     /**
91      * Holds the reader buffer capacity.
92      */

93     private static final int READER_BUFFER_CAPACITY = 2048;
94
95     /**
96      * Holds the configurable nominal length for the data array length (must be
97      * larger than the reader buffer capacity to avoid overflow).
98      */

99     private static final PersistentReference DATA_SIZE = new PersistentReference(
100             "javolution.xml.pull.XmlPullParserImpl#DATA_SIZE", new Integer(
101                     READER_BUFFER_CAPACITY * 2));
102
103     /**
104      * Holds the configurable nominal length for the CharSequenceImpl stack.
105      */

106     private static final PersistentReference SEQ_SIZE = new PersistentReference(
107             "javolution.xml.pull.XmlPullParserImpl#SEQ_SIZE", new Integer(256));
108
109     /**
110      * Holds the parsing line.
111      */

112     private int _lineNumber;
113
114     /**
115      * Holds the column offset (column = _columnOffset + _index).
116      */

117     private int _columnOffset;
118
119     /**
120      * Holds the line length when line break occurs.
121      */

122     private int _lineLength;
123
124     /**
125      * Holds the current index in the character buffer.
126      */

127     private int _index;
128
129     /**
130      * Holds the data buffer for CharSequence produced by this parser.
131      */

132     private char[] _data = (char[]) new char[((Integer) DATA_SIZE.get())
133             .intValue()];
134
135     /**
136      * Holds the current length of the data buffer (_data).
137      */

138     private int _length;
139
140     /**
141      * Holds the current depth.
142      */

143     private int _depth;
144
145     /**
146      * Holds the namespace stack.
147      */

148     private final Namespaces _namespaces = new Namespaces();
149
150     /**
151      * Holds the current attributes (view over _attrPool.get(_depth)).
152      */

153     private AttributesImpl _attributes;
154
155     /**
156      * Holds a pool of AttributesImpl to avoid overwritting the current one.
157      */

158     private final FastTable _attrPool = new FastTable();
159
160     /**
161      * Holds working stack.
162      */

163     private final FastTable _elemStack = new FastTable();
164
165     /**
166      * Holds the character buffer used for reading.
167      */

168     private final char[] _chars = new char[READER_BUFFER_CAPACITY];
169
170     /**
171      * Holds the default stream reader (UTF-8).
172      */

173     private final Utf8StreamReader _inputStreamReader = new Utf8StreamReader(
174             READER_BUFFER_CAPACITY);
175
176     /**
177      * Holds the default ByteBuffer reader (UTF-8).
178      */

179     private final Utf8ByteBufferReader _byteBufferReader = new Utf8ByteBufferReader();
180
181     /**
182      * Number of characters read from reader
183      */

184     private int _charsRead;
185
186     /**
187      * Holds local name (i.e. does not include prefix, if any).
188      */

189     private CharSequenceImpl _elemLocalName;
190
191     /**
192      * Holds element namespace (lookup performed using prefix and namespace stack).
193      */

194     private CharSequenceImpl _elemNamespace;
195
196     /**
197      * Holds qualified name (include prefix).
198      */

199     private CharSequenceImpl _elemQName;
200
201     /**
202      * Holds prefix.
203      */

204     private CharSequenceImpl _elemPrefix;
205
206     /**
207      * Holds attribute qualified name.
208      */

209     private CharSequenceImpl _attrQName;
210
211     /**
212      * Holds attribute prefix.
213      */

214     private CharSequenceImpl _attrPrefix;
215
216     /**
217      * Holds attribute value.
218      */

219     private CharSequenceImpl _attrValue;
220
221     /**
222      * Holds character sequence when parsing numeric literal.
223      */

224     private final CharSequenceImpl _num = new CharSequenceImpl();
225
226     /**
227      * Holds current event type
228      */

229     private int _eventType = END_DOCUMENT;
230
231     /**
232      * Input encoding, if known
233      */

234     private String _inputEncoding;
235
236     /**
237      * Indicates if event type is START_TAG, and tag is empty, i.e. <sometag/>
238      */

239     private boolean _isEmpty;
240
241     /**
242      * Holds index of the last non-whitespace character (-1 = all whitespace).
243      */

244     private int _nonwhitespace;
245
246     /**
247      * Holds reader used to get data
248      */

249     private Reader _reader;
250
251     /**
252      * Holds start of escape sequence
253      */

254     private int _escStart;
255
256     /**
257      * Holds saved parser state when escape sequence encountered.
258      */

259     private int _savedState;
260
261     /**
262      * Holds the start of text withing _data array.
263      */

264     private int _start;
265
266     /**
267      * Holds the parser state.
268      */

269     private int _state = CHAR_DATA;
270
271     /**
272      * Holds the text associated with current event.
273      */

274     private CharSequenceImpl _text;
275
276     /**
277      * Indicates if text contains non whitespace characters (characters
278      * others than space, cr, lf, tab).
279      */

280     private boolean _hasNonWhitespace;
281
282     /**
283      * Holds character sequences instances.
284      */

285     private CharSequenceImpl[] _seqs = new CharSequenceImpl[((Integer) SEQ_SIZE
286             .get()).intValue()];
287
288     /**
289      * Holds character sequence index.
290      */

291     private int _seqsIndex;
292
293     /**
294      * Holds number of character sequence instances allocated.
295      */

296     private int _seqsCapacity;
297
298     /**
299      * Default constructor.
300      */

301     public XmlPullParserImpl() {
302         _attributes = new AttributesImpl();
303         _attrPool.addLast(_attributes);
304     }
305
306     /**
307      * Sets the byte buffer this parser is going to process
308      * (UTF-8 encoding).
309      *
310      * @param byteBuffer the byte buffer with UTF-8 encoding.
311      * @see Utf8ByteBufferReader
312      */

313     public void setInput(ByteBuffer byteBuffer) {
314         if (_reader != null)
315             throw new IllegalStateException("Parser not reset.");
316         _byteBufferReader.setByteBuffer(byteBuffer);
317         _inputEncoding = "UTF-8";
318         setInput(_byteBufferReader);
319     }
320
321     /**
322      * Sets the input stream this parser is going to process
323      * (UTF-8 encoding).
324      *
325      * @param in the input stream with UTF-8 encoding.
326      * @see Utf8StreamReader
327      */

328     public void setInput(InputStream in) {
329         if (_reader != null)
330             throw new IllegalStateException("Parser not reset.");
331         _inputStreamReader.setInputStream(in);
332         _inputEncoding = "UTF-8";
333         setInput(_inputStreamReader);
334     }
335
336     // Implements XmlPullParser interface.
337
public void setInput(InputStream inputStream, String inputEncoding)
338             throws XmlPullParserException {
339         if ((inputEncoding == null) || inputEncoding.equals("utf-8")
340                 || inputEncoding.equals("UTF-8")) {
341             setInput(inputStream);
342             return;
343         }
344         try {
345             _inputEncoding = inputEncoding;
346             setInput(new InputStreamReader(inputStream, inputEncoding));
347         } catch (UnsupportedEncodingException e) {
348             throw new XmlPullParserException(e.getMessage());
349         }
350     }
351
352     // Implements XmlPullParser interface.
353
public void setInput(Reader in) {
354         if (_reader != null)
355             throw new IllegalStateException("Parser not reset.");
356         _reader = in;
357         _eventType = START_DOCUMENT;
358     }
359
360     // Implements XmlPullParser interface.
361
public void defineEntityReplacementText(CharSequence entityName,
362             CharSequence replacementText) throws XmlPullParserException {
363     }
364
365     /**
366      * Returns SAX-2 like attributes for the current element.
367      *
368      * @return the attributes of the current element.
369      */

370     public Attributes getSaxAttributes() {
371         return _attributes;
372     }
373
374     // Implements XmlPullParser interface.
375
public int getAttributeCount() {
376         if (_eventType != START_TAG)
377             return -1;
378         return _attributes.getLength();
379     }
380
381     // Implements XmlPullParser interface.
382
public CharSequence getAttributeName(int index) {
383         return _attributes.getLocalName(index);
384     }
385
386     // Implements XmlPullParser interface.
387
public CharSequence getAttributeNamespace(int index) {
388         return _attributes.getURI(index);
389     }
390
391     // Implements XmlPullParser interface.
392
public CharSequence getAttributePrefix(int index) {
393         return _attributes.getPrefix(index);
394     }
395
396     // Implements XmlPullParser interface.
397
public String getAttributeType(int index) {
398         return _attributes.getType(index);
399     }
400
401     // Implements XmlPullParser interface.
402
public CharSequence getAttributeValue(CharSequence namespace,
403             CharSequence name) {
404         return _attributes.getValue(namespace, name);
405     }
406
407     // Implements XmlPullParser interface.
408
public CharSequence getAttributeValue(int index) {
409         return _attributes.getValue(index);
410     }
411
412     // Implements XmlPullParser interface.
413
public int getDepth() {
414         return _depth;
415     }
416
417     // Implements XmlPullParser interface.
418
public int getEventType() throws XmlPullParserException {
419         return _eventType;
420     }
421
422     // Implements XmlPullParser interface.
423
public boolean getFeature(CharSequence name) {
424         return false;
425     }
426
427     // Implements XmlPullParser interface.
428
public String getInputEncoding() {
429         return _inputEncoding;
430     }
431
432     // Implements XmlPullParser interface.
433
public int getLineNumber() {
434         int column = _columnOffset + _index;
435         return (column != 0) ? _lineNumber : _lineNumber - 1;
436     }
437
438     // Implements XmlPullParser interface.
439
public int getColumnNumber() {
440         int column = _columnOffset + _index;
441         return (column != 0) ? column : _lineLength;
442     }
443
444     // Implements XmlPullParser interface.
445
public CharSequence getName() {
446         return _elemLocalName;
447     }
448
449     // Implements XmlPullParser interface.
450
public CharSequence getNamespace() {
451         return _elemNamespace;
452     }
453
454     // Implements XmlPullParser interface.
455
public CharSequence getPrefix() {
456         return _elemPrefix;
457     }
458
459     /**
460      * Returns the current element qualified name.
461      *
462      * @return the qualified name of the current element (prefix:localName).
463      */

464     public CharSequence getQName() {
465         return _elemQName;
466     }
467
468     // Implements XmlPullParser interface.
469
public CharSequence getNamespace(CharSequence prefix) {
470         return _namespaces.getNamespaceUri(prefix);
471     }
472
473     // Implements XmlPullParser interface.
474
public int getNamespaceCount(int depth) {
475         return _namespaces.getNamespaceCount(depth);
476     }
477
478     // Implements XmlPullParser interface.
479
public CharSequence getNamespacePrefix(int pos) {
480         return _namespaces.getNamespacePrefix(pos);
481     }
482
483     // Implements XmlPullParser interface.
484
public CharSequence getNamespaceUri(int pos) {
485         return _namespaces.getNamespaceUri(pos);
486     }
487
488     // Implements XmlPullParser interface.
489
public CharSequence getPositionDescription() {
490         return Text.valueOf("line ").concat(Text.valueOf(getLineNumber()))
491                 .concat(Text.valueOf(", column ")).concat(
492                         Text.valueOf(getColumnNumber()));
493     }
494
495     // Implements XmlPullParser interface.
496
public Object getProperty(CharSequence name) {
497         return null;
498     }
499
500     // Implements XmlPullParser interface.
501
public CharSequence getText() {
502         if (_eventType == START_DOCUMENT || _eventType == END_DOCUMENT) {
503             return null;
504         }
505         return _text;
506     }
507
508     // Implements XmlPullParser interface.
509
public char[] getTextCharacters(int[] holderForStartAndLength) {
510         if (_eventType == START_DOCUMENT || _eventType == END_DOCUMENT) {
511             holderForStartAndLength[0] = holderForStartAndLength[1] = -1;
512             return null;
513         }
514         holderForStartAndLength[0] = _text.offset;
515         holderForStartAndLength[1] = _text.length;
516         return _text.data;
517     }
518
519     // Implements XmlPullParser interface.
520
public int indexOf(CharSequence namespace, CharSequence name) {
521         if (_eventType != START_TAG)
522             throw new IndexOutOfBoundsException();
523         return _attributes.getIndex(namespace, name);
524     }
525
526     // Implements XmlPullParser interface.
527
public boolean isAttribute(CharSequence namespace, CharSequence name) {
528         return indexOf(namespace, name) >= 0;
529     }
530
531     // Implements XmlPullParser interface.
532
public boolean isAttributeDefault(int index) {
533         return false;
534     }
535
536     // Implements XmlPullParser interface.
537
public boolean isEmptyElementTag() throws XmlPullParserException {
538         return _isEmpty;
539     }
540
541     // Implements XmlPullParser interface.
542
public boolean isWhitespace() throws XmlPullParserException {
543         if (_eventType == TEXT || _eventType == CDSECT)
544             return !_hasNonWhitespace;
545         throw new IllegalStateException();
546     }
547
548     // Implements XmlPullParser interface.
549
public int next() throws XmlPullParserException, IOException {
550         return (_eventType = parse(false));
551     }
552
553     // Implements XmlPullParser interface.
554
public int nextTag() throws XmlPullParserException, IOException {
555         int eventType = next();
556         if (eventType == TEXT && isWhitespace()) { // skip whitespace
557
eventType = next();
558         }
559         if (eventType != START_TAG && eventType != END_TAG) {
560             throw error("expected start or end tag");
561         }
562         return eventType;
563     }
564
565     // Implements XmlPullParser interface.
566
public CharSequence nextText() throws XmlPullParserException, IOException {
567         if (getEventType() != START_TAG)
568             throw error("parser must be on START_TAG to read next text");
569         int eventType = next();
570         if (eventType == TEXT) {
571             CharSequence result = getText();
572             eventType = next();
573             if (eventType != END_TAG)
574                 throw error("event TEXT must be immediately followed by END_TAG");
575
576             return result;
577         } else if (eventType == END_TAG) {
578             return CharSequenceImpl.EMPTY;
579         } else {
580             throw error("parser must be on START_TAG or TEXT to read text");
581         }
582     }
583
584     // Implements XmlPullParser interface.
585
public int nextToken() throws XmlPullParserException, IOException {
586         return (_eventType = parse(true));
587     }
588
589     // Implements XmlPullParser interface.
590
public void setFeature(String name, boolean state)
591             throws XmlPullParserException {
592     }
593
594     // Implements XmlPullParser interface.
595
public boolean getFeature(String name) {
596         return false;
597     }
598
599     // Implements XmlPullParser interface.
600
public void setProperty(String name, Object value)
601             throws XmlPullParserException {
602     }
603
604     // Implements XmlPullParser interface.
605
public Object getProperty(String name) {
606         return null;
607     }
608
609     // Implements XmlPullParser interface.
610
public void require(int type, CharSequence namespace, CharSequence name)
611             throws XmlPullParserException, IOException {
612         if (type != getEventType()
613                 || (namespace != null && !FastComparator.LEXICAL.areEqual(
614                         namespace, getNamespace()))
615                 || (name != null && !FastComparator.LEXICAL.areEqual(name,
616                         getName())))
617             throw error("Require " + TYPES[type] + " failed");
618     }
619
620     /**
621      * Parses the document.
622      *
623      * @param tokenize indicates if tokenization is performed.
624      * @return the event type.
625      */

626     private int parse(boolean tokenize) throws XmlPullParserException,
627             IOException {
628         switch (_eventType) { // Previous event.
629
case START_DOCUMENT:
630             _charsRead = _reader.read(_chars, 0, _chars.length);
631             break;
632         case END_DOCUMENT:
633             throw error("End of document reached.");
634         case START_TAG:
635             if (_isEmpty) { // Previous empty tag, generates END_TAG automatically.
636
_isEmpty = false;
637                 return END_TAG;
638             }
639             _elemPrefix = null;
640             _elemLocalName = null;
641             _elemNamespace = null;
642             _elemQName = null;
643             break;
644         case END_TAG:
645             _attributes.reset();
646             _depth--;
647             _attributes = (AttributesImpl) _attrPool.get(_depth);
648             _length = _elemQName.offset;
649             _start = _length;
650             while (_seqs[--_seqsIndex] != _elemQName) {
651             }
652             _elemPrefix = null;
653             _elemLocalName = null;
654             _elemNamespace = null;
655             _elemQName = null;
656             break;
657         default:
658             _text = null;
659             _hasNonWhitespace = false;
660         }
661
662         while (_index < _charsRead) {
663
664             // Preprocessing.
665
//
666
char c = _chars[_index];
667             if (++_index == _charsRead) { // Reloads buffer.
668
_columnOffset += _index;
669                 _index = 0;
670                 _charsRead = _reader.read(_chars, 0, _chars.length);
671                 while ((_length + _charsRead) >= _data.length) {
672                     // Potential overflow, resizes.
673
char[] tmp = new char[_data.length * 2];
674                     System.arraycopy(_data, 0, tmp, 0, _data.length);
675                     _data = tmp;
676                     DATA_SIZE.set(new Integer(_data.length));
677                 }
678             }
679             // Replaces #xD and #xD#xA with #xA as per XML 1.0
680
// recommendations (&2.11).
681
if (c < 0x20) {
682                 if (c == 0xD) { // Replaces #xD with #xA
683
if ((_index < _charsRead) && (_chars[_index] == 0xA)) {
684                         // Unless next char is #xA, then continue,
685
// #xD#xA will be replaced by #xA
686
continue;
687                     }
688                     c = 0xA;
689                 }
690                 if (c == 0xA) {
691                     _lineNumber++; // Do it now, locator will readjust.
692
_lineLength = _columnOffset + _index;
693                     _columnOffset = -_index; // column = 0
694
} else if (c != 0x9) { // Not a tab.
695
throw error("Illegal XML character U+"
696                             + Integer.toHexString(c));
697                 }
698             }
699             // Appends to buffer.
700
_data[_length++] = c;
701             // Detects escape sequence (e.g. character reference).
702
if ((c == '&') && (_state != STATE_COMMENT) && (_state != PI)
703                     && (_state != CDATA) && (_state != ESCAPE)) { // (&2.4)
704
_savedState = _state;
705                 _escStart = _length;
706                 _state = ESCAPE;
707             }
708
709             // Main processing.
710
//
711
switch (_state) {
712             case CHAR_DATA:
713                 if (c == '<') {
714                     _state = MARKUP;
715                     if (_hasNonWhitespace) {
716                         int nbrChar = _length - _start - 1;
717                         setText(_start, nbrChar);
718                         _length = _start; // Do not keep character data.
719
return TEXT;
720                     }
721                     _length = _start; // Do not keep character data.
722
} else if (!_hasNonWhitespace && c > ' ') {
723                     _hasNonWhitespace = true;
724                 }
725                 break;
726
727             case MARKUP:
728                 if (_length - _start == 1) {
729                     if (c == '/') {
730                         _state = CLOSE_TAG + READ_ELEM_NAME;
731                         _length = _start;
732                         _elemQName = newSeq();
733                         _elemQName.offset = _start;
734                     } else if (c == '?') {
735                         _state = PI;
736                         _length = _start;
737                     } else if (c != '!') {
738                         _state = OPEN_TAG + READ_ELEM_NAME;
739                         // Sets the attributes for the current depth.
740
if (_depth >= _attrPool.size()) {
741                             _attrPool.addLast(ATTRIBUTES_IMPL_FACTORY.newObject());
742                         }
743                         _attributes = (AttributesImpl) _attrPool.get(_depth);
744                         _elemQName = newSeq();
745                         _elemQName.offset = _start;
746                     }
747                 } else if ((_length - _start == 3) && (_data[_start] == '!')
748                         && (_data[_start + 1] == '-')
749                         && (_data[_start + 2] == '-')) {
750                     _state = STATE_COMMENT;
751                     _length = _start;
752                 } else if ((_length - _start == 8) && (_data[_start] == '!')
753                         && (_data[_start + 1] == '[')
754                         && (_data[_start + 2] == 'C')
755                         && (_data[_start + 3] == 'D')
756                         && (_data[_start + 4] == 'A')
757                         && (_data[_start + 5] == 'T')
758                         && (_data[_start + 6] == 'A')
759                         && (_data[_start + 7] == '[')) {
760                     _state = CDATA;
761                     _nonwhitespace = -1;
762                     _length = _start;
763                 } else if (c == '>') {
764                     _state = CHAR_DATA;
765                     _length = _start;
766                 }
767                 break;
768
769             case STATE_COMMENT:
770                 if ((c == '>') && (_length - _start >= 3)
771                         && (_data[_length - 2] == '-')
772                         && (_data[_length - 3] == '-')) {
773                     _state = CHAR_DATA;
774                     int nbrChar = _length - _start - 3;
775                     _length = _start; // Do not keep comment.
776
if (tokenize && nbrChar > 0) {
777                         setText(_start, nbrChar);
778                         return COMMENT;
779                     }
780                 }
781                 break;
782
783             case PI: // Ignores Processing Instructions.
784
if ((c == '>') && (_length - _start >= 2)
785                         && (_data[_length - 2] == '?')) {
786                     _state = CHAR_DATA;
787                     int nbrChar = _length - _start - 2;
788                     _length = _start; // Do not keep Processing Instructions.
789
if (tokenize && nbrChar > 0) {
790                         setText(_start, nbrChar);
791                         return PROCESSING_INSTRUCTION;
792                     }
793                 }
794                 break;
795
796             case CDATA:
797                 if ((c == '>') && (_length - _start >= 3)
798                         && (_data[_length - 2] == ']')
799                         && (_data[_length - 3] == ']')) {
800                     _state = CHAR_DATA;
801                     int nbrChar = _length - _start - 3;
802                     _hasNonWhitespace = !(_nonwhitespace == _start + nbrChar
803                             + 1);
804                     setText(_start, nbrChar);
805                     _length = _start; // Do not keep CDATA
806
return CDSECT;
807                 }
808                 if ((_nonwhitespace == -1) && (c > ' '))
809                     _nonwhitespace = _length;
810                 break;
811
812             // OPEN_TAG:
813
case OPEN_TAG + READ_ELEM_NAME:
814                 if (c == '>') {
815                     _elemQName.length = _length - _elemQName.offset - 1;
816                     _elemQName.data = _data;
817                     _state = CHAR_DATA;
818                     _start = _length;
819                     return processElement(OPEN_TAG);
820                 } else if (c == '/') {
821                     _elemQName.length = _length - _elemQName.offset - 1;
822                     _elemQName.data = _data;
823                     _state = OPEN_TAG + EMPTY_TAG;
824                 } else if ((c == ':') && (_elemPrefix == null)) {
825                     _elemPrefix = newSeq();
826                     _elemPrefix.offset = _elemQName.offset;
827                     _elemPrefix.length = _length - _elemQName.offset - 1;
828                     _elemPrefix.data = _data;
829                 } else if (c <= ' ') {
830                     _elemQName.length = _length - _elemQName.offset - 1;
831                     _elemQName.data = _data;
832                     _state = OPEN_TAG + ELEM_NAME_READ;
833                 }
834                 break;
835             case OPEN_TAG + ELEM_NAME_READ:
836                 if (c == '>') {
837                     _state = CHAR_DATA;
838                     _start = _length;
839                     return processElement(OPEN_TAG);
840                 } else if (c == '/') {
841                     _state = OPEN_TAG + EMPTY_TAG;
842                 } else if (c > ' ') {
843                     _attrQName = newSeq();
844                     _attrQName.offset = _length - 1;
845                     _state = OPEN_TAG + READ_ATTR_NAME;
846                 }
847                 break;
848             case OPEN_TAG + READ_ATTR_NAME:
849                 if (c <= ' ') {
850                     _attrQName.length = _length - _attrQName.offset - 1;
851                     _attrQName.data = _data;
852                     _state = OPEN_TAG + ATTR_NAME_READ;
853                 } else if (c == '=') {
854                     _attrQName.length = _length - _attrQName.offset - 1;
855                     _attrQName.data = _data;
856                     _state = OPEN_TAG + EQUAL_READ;
857                 } else if ((c == ':') && (_attrPrefix == null)) {
858                     _attrPrefix = newSeq();
859                     _attrPrefix.offset = _attrQName.offset;
860                     _attrPrefix.length = _length - _attrQName.offset - 1;
861                     _attrPrefix.data = _data;
862                 }
863                 break;
864             case OPEN_TAG + ATTR_NAME_READ:
865                 if (c == '=') {
866                     _state = OPEN_TAG + EQUAL_READ;
867                 } else if (c > ' ') {
868                     throw error("'=' expected");
869                 }
870                 break;
871             case OPEN_TAG + EQUAL_READ:
872                 if (c == '\'') {
873                     _attrValue = newSeq();
874                     _attrValue.offset = _length;
875                     _state = OPEN_TAG + READ_ATTR_VALUE_SIMPLE_QUOTE;
876                 } else if (c == '\"') {
877                     _attrValue = newSeq();
878                     _attrValue.offset = _length;
879                     _state = OPEN_TAG + READ_ATTR_VALUE_DOUBLE_QUOTE;
880                 } else if (c > ' ') {
881                     throw error("Quotes expected");
882                 }
883                 break;
884             case OPEN_TAG + READ_ATTR_VALUE_SIMPLE_QUOTE:
885                 if (c == '\'') {
886                     _attrValue.length = _length - _attrValue.offset - 1;
887                     _attrValue.data = _data;
888                     processAttribute();
889                     _state = OPEN_TAG + ELEM_NAME_READ;
890                 }
891                 break;
892             case OPEN_TAG + READ_ATTR_VALUE_DOUBLE_QUOTE:
893                 if (c == '\"') {
894                     _attrValue.length = _length - _attrValue.offset - 1;
895                     _attrValue.data = _data;
896                     processAttribute();
897                     _state = OPEN_TAG + ELEM_NAME_READ;
898                 }
899                 break;
900             case OPEN_TAG + EMPTY_TAG:
901                 if (c == '>') {
902                     _state = CHAR_DATA;
903                     _start = _length;
904                     return processElement(OPEN_TAG + EMPTY_TAG);
905                 } else {
906                     throw error("'>' expected");
907                 }
908
909             // CLOSE_TAG:
910
case CLOSE_TAG + READ_ELEM_NAME:
911                 if (c == '>') {
912                     _elemQName.length = _length - _elemQName.offset - 1;
913                     _elemQName.data = _data;
914                     _state = CHAR_DATA;
915                     _start = _length;
916                     return processElement(CLOSE_TAG);
917                 } else if ((c == ':') && (_elemPrefix == null)) {
918                     _elemPrefix = newSeq();
919                     _elemPrefix.offset = _elemQName.offset;
920                     _elemPrefix.length = _length - _elemQName.offset - 1;
921                     _elemPrefix.data = _data;
922                 } else if (c <= ' ') {
923                     _elemQName.length = _length - _elemQName.offset - 1;
924                     _elemQName.data = _data;
925                     _state = CLOSE_TAG + ELEM_NAME_READ;
926                 }
927                 break;
928             case CLOSE_TAG + ELEM_NAME_READ:
929                 if (c == '>') {
930                     _state = CHAR_DATA;
931                     _start = _length;
932                     return processElement(CLOSE_TAG);
933                 } else if (c > ' ') {
934                     throw error("'>' expected");
935                 }
936                 break;
937
938             case ESCAPE:
939                 if (c == ';') { // Escape terminator.
940
if ((_length - _escStart == 3)
941                             && (_data[_length - 3] == 'l')
942                             && (_data[_length - 2] == 't')) {
943                         _data[_escStart - 1] = '<';
944                     } else if ((_length - _escStart == 3)
945                             && (_data[_length - 3] == 'g')
946                             && (_data[_length - 2] == 't')) {
947                         _data[_escStart - 1] = '>';
948                     } else if ((_length - _escStart == 5)
949                             && (_data[_length - 5] == 'a')
950                             && (_data[_length - 4] == 'p')
951                             && (_data[_length - 3] == 'o')
952                             && (_data[_length - 2] == 's')) {
953                         _data[_escStart - 1] = '\'';
954                     } else if ((_length - _escStart == 5)
955                             && (_data[_length - 5] == 'q')
956                             && (_data[_length - 4] == 'u')
957                             && (_data[_length - 3] == 'o')
958                             && (_data[_length - 2] == 't')) {
959                         _data[_escStart - 1] = '\"';
960                     } else if ((_length - _escStart == 4)
961                             && (_data[_length - 4] == 'a')
962                             && (_data[_length - 3] == 'm')
963                             && (_data[_length - 2] == 'p')) {
964                         _data[_escStart - 1] = '&';
965                     } else { // Character reference (&4.1)
966
if ((_length - _escStart > 1)
967                                 && (_data[_escStart] == '#')) {
968                             try {
969                                 if (_data[_escStart + 1] == 'x') { // Hexadecimal.
970
_num.offset = _escStart + 2;
971                                     _num.length = _length - _escStart - 3;
972                                     _num.data = _data;
973                                     _data[_escStart - 1] = (char) TypeFormat
974                                             .parseInt(_num, 16);
975                                 } else { // Decimal.
976
_num.offset = _escStart + 1;
977                                     _num.length = _length - _escStart - 2;
978                                     _num.data = _data;
979                                     _data[_escStart - 1] = (char) TypeFormat
980                                             .parseInt(_num);
981                                 }
982                             } catch (NumberFormatException e) {
983                                 throw error("Ill-formed character reference");
984                             }
985                         } else {
986                             throw error("'#' expected");
987                         }
988                     }
989                     _state = _savedState;
990                     _length = _escStart;
991                 } else if (c <= ' ') {
992                     throw error("';' expected");
993                 }
994                 break;
995
996             default:
997                 throw error("State unknown: " + _state);
998             }
999         }
1000
1001        if (_depth != 0)
1002            throw error("Unexpected end of file");
1003        reset();
1004        return END_DOCUMENT;
1005    }
1006
1007    // Defines parsing states.
1008
private static final int CHAR_DATA = 0x10;
1009
1010    private static final int MARKUP = 0x20;
1011
1012    private static final int STATE_COMMENT = 0x30;
1013
1014    private static final int PI = 0x40;
1015
1016    private static final int CDATA = 0x50;
1017
1018    private static final int OPEN_TAG = 0x60;
1019
1020    private static final int CLOSE_TAG = 0x70;
1021
1022    private static final int ESCAPE = 0x90;
1023
1024    // Defines element parsing sub-states.
1025
private static final int READ_ELEM_NAME = 0x01;
1026
1027    private static final int ELEM_NAME_READ = 0x02;
1028
1029    private static final int READ_ATTR_NAME = 0x03;
1030
1031    private static final int ATTR_NAME_READ = 0x04;
1032
1033    private static final int EQUAL_READ = 0x05;
1034
1035    private static final int READ_ATTR_VALUE_SIMPLE_QUOTE = 0x06;
1036
1037    private static final int READ_ATTR_VALUE_DOUBLE_QUOTE = 0x07;
1038
1039    private static final int EMPTY_TAG = 0x08;
1040
1041    /**
1042     * Processes the attribute just read.
1043     *
1044     * @throws XmlPullParserException any SAX exception, possibly wrapping another
1045     * exception.
1046     */

1047    private void processAttribute() throws XmlPullParserException {
1048        if (_attrPrefix == null) { // No prefix.
1049
if (isXmlns(_attrQName)) { // Sets default namespace.
1050
_namespaces.map(null, _attrValue);
1051            } else {
1052                _attributes
1053                        .addAttribute(CharSequenceImpl.EMPTY, _attrQName,
1054                                CharSequenceImpl.EMPTY, _attrQName, "CDATA",
1055                                _attrValue);
1056            }
1057        } else { // Prefix.
1058
CharSequenceImpl localName = newSeq();
1059            localName.offset = _attrQName.offset + _attrPrefix.length + 1;
1060            localName.length = _attrQName.length - _attrPrefix.length - 1;
1061            localName.data = _attrQName.data;
1062
1063            if (isXmlns(_attrPrefix)) { // Namespace association.
1064
_namespaces.map(localName, _attrValue);
1065            } else { // Searches URI
1066
CharSequenceImpl uri = _namespaces.getNamespaceUri(_attrPrefix);
1067                if (uri != null) {
1068                    _attributes.addAttribute(uri, localName, _attrPrefix,
1069                            _attrQName, "CDATA", _attrValue);
1070                } else {
1071                    error("Namespace " + _attrPrefix + " undefined");
1072                }
1073            }
1074            _attrPrefix = null; // Resets.
1075
}
1076    }
1077
1078    /**
1079     * Processes the element just read.
1080     *
1081     * @param state the current parser state.
1082     * @throws XmlPullParserException any SAX exception, possibly wrapping another
1083     * exception.
1084     */

1085    private int processElement(int state) throws XmlPullParserException {
1086        if (_elemPrefix != null) { // Prefix, sets uri.
1087
_elemLocalName = newSeq();
1088            _elemLocalName.offset = _elemQName.offset + _elemPrefix.length + 1;
1089            _elemLocalName.length = _elemQName.length - _elemPrefix.length - 1;
1090            _elemLocalName.data = _elemQName.data;
1091            _elemNamespace = _namespaces.getNamespaceUri(_elemPrefix);
1092            if (_elemNamespace == null)
1093                throw error("Namespace " + _elemPrefix + " undefined");
1094        } else { // No prefix.
1095
_elemLocalName = _elemQName;
1096            _elemNamespace = _namespaces.getNamespaceUri(null); // Default namespace.
1097
}
1098        if (state == OPEN_TAG + EMPTY_TAG) {
1099            _isEmpty = true;
1100            _depth++;
1101            _namespaces.flush();
1102            return START_TAG;
1103
1104        } else if (state == OPEN_TAG) {
1105            _depth++;
1106            _elemStack.addLast(_elemQName);
1107            _elemStack.addLast(_elemNamespace);
1108            _elemStack.addLast(_elemLocalName);
1109            _elemStack.addLast(_elemPrefix);
1110            _namespaces.push();
1111            return START_TAG;
1112
1113        } else if (state == CLOSE_TAG) {
1114            _elemPrefix = (CharSequenceImpl) _elemStack.removeLast();
1115            _elemLocalName = (CharSequenceImpl) _elemStack.removeLast();
1116            _elemNamespace = (CharSequenceImpl) _elemStack.removeLast();
1117            CharSequenceImpl qName = _elemQName; // Current.
1118
_elemQName = (CharSequenceImpl) _elemStack.removeLast();
1119            if (!_elemQName.equals(qName))
1120                throw error("Unexpected end tag for " + _elemQName);
1121            _namespaces.pop();
1122            return END_TAG;
1123
1124        } else {
1125            throw error("Unexpected state: " + state);
1126        }
1127    }
1128
1129    // Implements Reusable.
1130
public void reset() {
1131        try {
1132            if (_reader != null)
1133                _reader.close();
1134        } catch (IOException e) {
1135            // Ignores exceptions.
1136
}
1137
1138        // Resets all members (alphabetically ordered).
1139
_attributes = (AttributesImpl) _attrPool.get(0);
1140        for (int i = 0, n = _attrPool.size(); i < n;) {
1141            ((AttributesImpl) _attrPool.get(i++)).reset();
1142        }
1143        _attrPrefix = null;
1144        _attrQName = null;
1145        _attrValue = null;
1146        _attrQName = null;
1147        _charsRead = 0;
1148        _columnOffset = 0;
1149        _depth = 0;
1150        _elemLocalName = null;
1151        _elemNamespace = null;
1152        _elemPrefix = null;
1153        _elemQName = null;
1154        _elemStack.reset();
1155        _escStart = 0;
1156        _eventType = END_DOCUMENT;
1157        _hasNonWhitespace = false;
1158        _index = 0;
1159        _inputEncoding = null;
1160        _isEmpty = false;
1161        _length = 0;
1162        _lineLength = 0;
1163        _lineNumber = 0;
1164        _namespaces.reset();
1165        _nonwhitespace = 0;
1166        _reader = null;
1167        _savedState = 0;
1168        _seqsIndex = 0;
1169        _start = 0;
1170        _state = CHAR_DATA;
1171        _text = null;
1172    }
1173
1174    /**
1175     * Sets current text.
1176     *
1177     * @param start the start index.
1178     * @param end the end index.
1179     */

1180    private void setText(int start, int length) {
1181        _text = newSeq();
1182        _text.data = _data;
1183        _text.offset = start;
1184        _text.length = length;
1185    }
1186
1187    /**
1188     * Indicates if the specified character sequence is xmlns.
1189     *
1190     * @param chars the characters sequence to be tested.
1191     * @return <code>chars.equals("xmlns")</code>
1192     */

1193    private static boolean isXmlns(CharSequenceImpl chars) {
1194        return (chars.length == 5) && (chars.data[chars.offset] == 'x')
1195                && (chars.data[chars.offset + 1] == 'm')
1196                && (chars.data[chars.offset + 2] == 'l')
1197                && (chars.data[chars.offset + 3] == 'n')
1198                && (chars.data[chars.offset + 4] == 's');
1199    }
1200
1201    private XmlPullParserException error(String message) {
1202        XmlPullParserException e = new XmlPullParserException(message, this,
1203                null);
1204        return e;
1205    }
1206
1207    // Returns a new character sequence from the pool.
1208
private CharSequenceImpl newSeq() {
1209        return (_seqsIndex < _seqsCapacity) ? _seqs[_seqsIndex++] : newSeq2();
1210    }
1211
1212    private CharSequenceImpl newSeq2() {
1213        if (_seqsCapacity++ >= _seqs.length) { // Resizes.
1214
CharSequenceImpl[] tmp = new CharSequenceImpl[_seqs.length * 2];
1215            System.arraycopy(_seqs, 0, tmp, 0, _seqs.length);
1216            _seqs = tmp;
1217            SEQ_SIZE.set(new Integer(_seqs.length));
1218        }
1219        return _seqs[_seqsIndex++] = (CharSequenceImpl) CharSequenceImpl.FACTORY
1220                .newObject();
1221    }
1222
1223}
Popular Tags