| 1 9 package javolution.xml.pull; 10 11 import java.io.IOException; 12 import java.io.InputStream; 13 import java.io.InputStreamReader; 14 import java.io.Reader; 15 import java.io.UnsupportedEncodingException; 16 17 import j2me.lang.CharSequence; 18 import j2me.lang.IllegalStateException; 19 import j2me.nio.ByteBuffer; 20 21 import javolution.io.Utf8ByteBufferReader; 22 import javolution.io.Utf8StreamReader; 23 import javolution.lang.PersistentReference; 24 import javolution.lang.Reusable; 25 import javolution.lang.Text; 26 import javolution.lang.TypeFormat; 27 import javolution.realtime.ObjectFactory; 28 import javolution.util.FastComparator; 29 import javolution.util.FastTable; 30 import javolution.xml.sax.Attributes; 31 import javolution.xml.sax.AttributesImpl; 32 33 79 public final class XmlPullParserImpl implements XmlPullParser, Reusable { 80 81 84 private static final ObjectFactory ATTRIBUTES_IMPL_FACTORY = new ObjectFactory() { 85 protected Object create() { 86 return new AttributesImpl(); 87 } 88 }; 89 90 93 private static final int READER_BUFFER_CAPACITY = 2048; 94 95 99 private static final PersistentReference DATA_SIZE = new PersistentReference( 100 "javolution.xml.pull.XmlPullParserImpl#DATA_SIZE", new Integer( 101 READER_BUFFER_CAPACITY * 2)); 102 103 106 private static final PersistentReference SEQ_SIZE = new PersistentReference( 107 "javolution.xml.pull.XmlPullParserImpl#SEQ_SIZE", new Integer(256)); 108 109 112 private int _lineNumber; 113 114 117 private int _columnOffset; 118 119 122 private int _lineLength; 123 124 127 private int _index; 128 129 132 private char[] _data = (char[]) new char[((Integer) DATA_SIZE.get()) 133 .intValue()]; 134 135 138 private int _length; 139 140 143 private int _depth; 144 145 148 private final Namespaces _namespaces = new Namespaces(); 149 150 153 private AttributesImpl _attributes; 154 155 158 private final FastTable _attrPool = new FastTable(); 159 160 163 private final FastTable _elemStack = new FastTable(); 164 165 168 private final char[] _chars = new char[READER_BUFFER_CAPACITY]; 169 170 173 private final Utf8StreamReader _inputStreamReader = new Utf8StreamReader( 174 READER_BUFFER_CAPACITY); 175 176 179 private final Utf8ByteBufferReader _byteBufferReader = new Utf8ByteBufferReader(); 180 181 184 private int _charsRead; 185 186 189 private CharSequenceImpl _elemLocalName; 190 191 194 private CharSequenceImpl _elemNamespace; 195 196 199 private CharSequenceImpl _elemQName; 200 201 204 private CharSequenceImpl _elemPrefix; 205 206 209 private CharSequenceImpl _attrQName; 210 211 214 private CharSequenceImpl _attrPrefix; 215 216 219 private CharSequenceImpl _attrValue; 220 221 224 private final CharSequenceImpl _num = new CharSequenceImpl(); 225 226 229 private int _eventType = END_DOCUMENT; 230 231 234 private String _inputEncoding; 235 236 239 private boolean _isEmpty; 240 241 244 private int _nonwhitespace; 245 246 249 private Reader _reader; 250 251 254 private int _escStart; 255 256 259 private int _savedState; 260 261 264 private int _start; 265 266 269 private int _state = CHAR_DATA; 270 271 274 private CharSequenceImpl _text; 275 276 280 private boolean _hasNonWhitespace; 281 282 285 private CharSequenceImpl[] _seqs = new CharSequenceImpl[((Integer) SEQ_SIZE 286 .get()).intValue()]; 287 288 291 private int _seqsIndex; 292 293 296 private int _seqsCapacity; 297 298 301 public XmlPullParserImpl() { 302 _attributes = new AttributesImpl(); 303 _attrPool.addLast(_attributes); 304 } 305 306 313 public void setInput(ByteBuffer byteBuffer) { 314 if (_reader != null) 315 throw new IllegalStateException("Parser not reset."); 316 _byteBufferReader.setByteBuffer(byteBuffer); 317 _inputEncoding = "UTF-8"; 318 setInput(_byteBufferReader); 319 } 320 321 328 public void setInput(InputStream in) { 329 if (_reader != null) 330 throw new IllegalStateException("Parser not reset."); 331 _inputStreamReader.setInputStream(in); 332 _inputEncoding = "UTF-8"; 333 setInput(_inputStreamReader); 334 } 335 336 public void setInput(InputStream inputStream, String inputEncoding) 338 throws XmlPullParserException { 339 if ((inputEncoding == null) || inputEncoding.equals("utf-8") 340 || inputEncoding.equals("UTF-8")) { 341 setInput(inputStream); 342 return; 343 } 344 try { 345 _inputEncoding = inputEncoding; 346 setInput(new InputStreamReader(inputStream, inputEncoding)); 347 } catch (UnsupportedEncodingException e) { 348 throw new XmlPullParserException(e.getMessage()); 349 } 350 } 351 352 public void setInput(Reader in) { 354 if (_reader != null) 355 throw new IllegalStateException("Parser not reset."); 356 _reader = in; 357 _eventType = START_DOCUMENT; 358 } 359 360 public void defineEntityReplacementText(CharSequence entityName, 362 CharSequence replacementText) throws XmlPullParserException { 363 } 364 365 370 public Attributes getSaxAttributes() { 371 return _attributes; 372 } 373 374 public int getAttributeCount() { 376 if (_eventType != START_TAG) 377 return -1; 378 return _attributes.getLength(); 379 } 380 381 public CharSequence getAttributeName(int index) { 383 return _attributes.getLocalName(index); 384 } 385 386 public CharSequence getAttributeNamespace(int index) { 388 return _attributes.getURI(index); 389 } 390 391 public CharSequence getAttributePrefix(int index) { 393 return _attributes.getPrefix(index); 394 } 395 396 public String getAttributeType(int index) { 398 return _attributes.getType(index); 399 } 400 401 public CharSequence getAttributeValue(CharSequence namespace, 403 CharSequence name) { 404 return _attributes.getValue(namespace, name); 405 } 406 407 public CharSequence getAttributeValue(int index) { 409 return _attributes.getValue(index); 410 } 411 412 public int getDepth() { 414 return _depth; 415 } 416 417 public int getEventType() throws XmlPullParserException { 419 return _eventType; 420 } 421 422 public boolean getFeature(CharSequence name) { 424 return false; 425 } 426 427 public String getInputEncoding() { 429 return _inputEncoding; 430 } 431 432 public int getLineNumber() { 434 int column = _columnOffset + _index; 435 return (column != 0) ? _lineNumber : _lineNumber - 1; 436 } 437 438 public int getColumnNumber() { 440 int column = _columnOffset + _index; 441 return (column != 0) ? column : _lineLength; 442 } 443 444 public CharSequence getName() { 446 return _elemLocalName; 447 } 448 449 public CharSequence getNamespace() { 451 return _elemNamespace; 452 } 453 454 public CharSequence getPrefix() { 456 return _elemPrefix; 457 } 458 459 464 public CharSequence getQName() { 465 return _elemQName; 466 } 467 468 public CharSequence getNamespace(CharSequence prefix) { 470 return _namespaces.getNamespaceUri(prefix); 471 } 472 473 public int getNamespaceCount(int depth) { 475 return _namespaces.getNamespaceCount(depth); 476 } 477 478 public CharSequence getNamespacePrefix(int pos) { 480 return _namespaces.getNamespacePrefix(pos); 481 } 482 483 public CharSequence getNamespaceUri(int pos) { 485 return _namespaces.getNamespaceUri(pos); 486 } 487 488 public CharSequence getPositionDescription() { 490 return Text.valueOf("line ").concat(Text.valueOf(getLineNumber())) 491 .concat(Text.valueOf(", column ")).concat( 492 Text.valueOf(getColumnNumber())); 493 } 494 495 public Object getProperty(CharSequence name) { 497 return null; 498 } 499 500 public CharSequence getText() { 502 if (_eventType == START_DOCUMENT || _eventType == END_DOCUMENT) { 503 return null; 504 } 505 return _text; 506 } 507 508 public char[] getTextCharacters(int[] holderForStartAndLength) { 510 if (_eventType == START_DOCUMENT || _eventType == END_DOCUMENT) { 511 holderForStartAndLength[0] = holderForStartAndLength[1] = -1; 512 return null; 513 } 514 holderForStartAndLength[0] = _text.offset; 515 holderForStartAndLength[1] = _text.length; 516 return _text.data; 517 } 518 519 public int indexOf(CharSequence namespace, CharSequence name) { 521 if (_eventType != START_TAG) 522 throw new IndexOutOfBoundsException(); 523 return _attributes.getIndex(namespace, name); 524 } 525 526 public boolean isAttribute(CharSequence namespace, CharSequence name) { 528 return indexOf(namespace, name) >= 0; 529 } 530 531 public boolean isAttributeDefault(int index) { 533 return false; 534 } 535 536 public boolean isEmptyElementTag() throws XmlPullParserException { 538 return _isEmpty; 539 } 540 541 public boolean isWhitespace() throws XmlPullParserException { 543 if (_eventType == TEXT || _eventType == CDSECT) 544 return !_hasNonWhitespace; 545 throw new IllegalStateException(); 546 } 547 548 public int next() throws XmlPullParserException, IOException { 550 return (_eventType = parse(false)); 551 } 552 553 public int nextTag() throws XmlPullParserException, IOException { 555 int eventType = next(); 556 if (eventType == TEXT && isWhitespace()) { eventType = next(); 558 } 559 if (eventType != START_TAG && eventType != END_TAG) { 560 throw error("expected start or end tag"); 561 } 562 return eventType; 563 } 564 565 public CharSequence nextText() throws XmlPullParserException, IOException { 567 if (getEventType() != START_TAG) 568 throw error("parser must be on START_TAG to read next text"); 569 int eventType = next(); 570 if (eventType == TEXT) { 571 CharSequence result = getText(); 572 eventType = next(); 573 if (eventType != END_TAG) 574 throw error("event TEXT must be immediately followed by END_TAG"); 575 576 return result; 577 } else if (eventType == END_TAG) { 578 return CharSequenceImpl.EMPTY; 579 } else { 580 throw error("parser must be on START_TAG or TEXT to read text"); 581 } 582 } 583 584 public int nextToken() throws XmlPullParserException, IOException { 586 return (_eventType = parse(true)); 587 } 588 589 public void setFeature(String name, boolean state) 591 throws XmlPullParserException { 592 } 593 594 public boolean getFeature(String name) { 596 return false; 597 } 598 599 public void setProperty(String name, Object value) 601 throws XmlPullParserException { 602 } 603 604 public Object getProperty(String name) { 606 return null; 607 } 608 609 public void require(int type, CharSequence namespace, CharSequence name) 611 throws XmlPullParserException, IOException { 612 if (type != getEventType() 613 || (namespace != null && !FastComparator.LEXICAL.areEqual( 614 namespace, getNamespace())) 615 || (name != null && !FastComparator.LEXICAL.areEqual(name, 616 getName()))) 617 throw error("Require " + TYPES[type] + " failed"); 618 } 619 620 626 private int parse(boolean tokenize) throws XmlPullParserException, 627 IOException { 628 switch (_eventType) { case START_DOCUMENT: 630 _charsRead = _reader.read(_chars, 0, _chars.length); 631 break; 632 case END_DOCUMENT: 633 throw error("End of document reached."); 634 case START_TAG: 635 if (_isEmpty) { _isEmpty = false; 637 return END_TAG; 638 } 639 _elemPrefix = null; 640 _elemLocalName = null; 641 _elemNamespace = null; 642 _elemQName = null; 643 break; 644 case END_TAG: 645 _attributes.reset(); 646 _depth--; 647 _attributes = (AttributesImpl) _attrPool.get(_depth); 648 _length = _elemQName.offset; 649 _start = _length; 650 while (_seqs[--_seqsIndex] != _elemQName) { 651 } 652 _elemPrefix = null; 653 _elemLocalName = null; 654 _elemNamespace = null; 655 _elemQName = null; 656 break; 657 default: 658 _text = null; 659 _hasNonWhitespace = false; 660 } 661 662 while (_index < _charsRead) { 663 664 char c = _chars[_index]; 667 if (++_index == _charsRead) { _columnOffset += _index; 669 _index = 0; 670 _charsRead = _reader.read(_chars, 0, _chars.length); 671 while ((_length + _charsRead) >= _data.length) { 672 char[] tmp = new char[_data.length * 2]; 674 System.arraycopy(_data, 0, tmp, 0, _data.length); 675 _data = tmp; 676 DATA_SIZE.set(new Integer(_data.length)); 677 } 678 } 679 if (c < 0x20) { 682 if (c == 0xD) { if ((_index < _charsRead) && (_chars[_index] == 0xA)) { 684 continue; 687 } 688 c = 0xA; 689 } 690 if (c == 0xA) { 691 _lineNumber++; _lineLength = _columnOffset + _index; 693 _columnOffset = -_index; } else if (c != 0x9) { throw error("Illegal XML character U+" 696 + Integer.toHexString(c)); 697 } 698 } 699 _data[_length++] = c; 701 if ((c == '&') && (_state != STATE_COMMENT) && (_state != PI) 703 && (_state != CDATA) && (_state != ESCAPE)) { _savedState = _state; 705 _escStart = _length; 706 _state = ESCAPE; 707 } 708 709 switch (_state) { 712 case CHAR_DATA: 713 if (c == '<') { 714 _state = MARKUP; 715 if (_hasNonWhitespace) { 716 int nbrChar = _length - _start - 1; 717 setText(_start, nbrChar); 718 _length = _start; return TEXT; 720 } 721 _length = _start; } else if (!_hasNonWhitespace && c > ' ') { 723 _hasNonWhitespace = true; 724 } 725 break; 726 727 case MARKUP: 728 if (_length - _start == 1) { 729 if (c == '/') { 730 _state = CLOSE_TAG + READ_ELEM_NAME; 731 _length = _start; 732 _elemQName = newSeq(); 733 _elemQName.offset = _start; 734 } else if (c == '?') { 735 _state = PI; 736 _length = _start; 737 } else if (c != '!') { 738 _state = OPEN_TAG + READ_ELEM_NAME; 739 if (_depth >= _attrPool.size()) { 741 _attrPool.addLast(ATTRIBUTES_IMPL_FACTORY.newObject()); 742 } 743 _attributes = (AttributesImpl) _attrPool.get(_depth); 744 _elemQName = newSeq(); 745 _elemQName.offset = _start; 746 } 747 } else if ((_length - _start == 3) && (_data[_start] == '!') 748 && (_data[_start + 1] == '-') 749 && (_data[_start + 2] == '-')) { 750 _state = STATE_COMMENT; 751 _length = _start; 752 } else if ((_length - _start == 8) && (_data[_start] == '!') 753 && (_data[_start + 1] == '[') 754 && (_data[_start + 2] == 'C') 755 && (_data[_start + 3] == 'D') 756 && (_data[_start + 4] == 'A') 757 && (_data[_start + 5] == 'T') 758 && (_data[_start + 6] == 'A') 759 && (_data[_start + 7] == '[')) { 760 _state = CDATA; 761 _nonwhitespace = -1; 762 _length = _start; 763 } else if (c == '>') { 764 _state = CHAR_DATA; 765 _length = _start; 766 } 767 break; 768 769 case STATE_COMMENT: 770 if ((c == '>') && (_length - _start >= 3) 771 && (_data[_length - 2] == '-') 772 && (_data[_length - 3] == '-')) { 773 _state = CHAR_DATA; 774 int nbrChar = _length - _start - 3; 775 _length = _start; if (tokenize && nbrChar > 0) { 777 setText(_start, nbrChar); 778 return COMMENT; 779 } 780 } 781 break; 782 783 case PI: if ((c == '>') && (_length - _start >= 2) 785 && (_data[_length - 2] == '?')) { 786 _state = CHAR_DATA; 787 int nbrChar = _length - _start - 2; 788 _length = _start; if (tokenize && nbrChar > 0) { 790 setText(_start, nbrChar); 791 return PROCESSING_INSTRUCTION; 792 } 793 } 794 break; 795 796 case CDATA: 797 if ((c == '>') && (_length - _start >= 3) 798 && (_data[_length - 2] == ']') 799 && (_data[_length - 3] == ']')) { 800 _state = CHAR_DATA; 801 int nbrChar = _length - _start - 3; 802 _hasNonWhitespace = !(_nonwhitespace == _start + nbrChar 803 + 1); 804 setText(_start, nbrChar); 805 _length = _start; return CDSECT; 807 } 808 if ((_nonwhitespace == -1) && (c > ' ')) 809 _nonwhitespace = _length; 810 break; 811 812 case OPEN_TAG + READ_ELEM_NAME: 814 if (c == '>') { 815 _elemQName.length = _length - _elemQName.offset - 1; 816 _elemQName.data = _data; 817 _state = CHAR_DATA; 818 _start = _length; 819 return processElement(OPEN_TAG); 820 } else if (c == '/') { 821 _elemQName.length = _length - _elemQName.offset - 1; 822 _elemQName.data = _data; 823 _state = OPEN_TAG + EMPTY_TAG; 824 } else if ((c == ':') && (_elemPrefix == null)) { 825 _elemPrefix = newSeq(); 826 _elemPrefix.offset = _elemQName.offset; 827 _elemPrefix.length = _length - _elemQName.offset - 1; 828 _elemPrefix.data = _data; 829 } else if (c <= ' ') { 830 _elemQName.length = _length - _elemQName.offset - 1; 831 _elemQName.data = _data; 832 _state = OPEN_TAG + ELEM_NAME_READ; 833 } 834 break; 835 case OPEN_TAG + ELEM_NAME_READ: 836 if (c == '>') { 837 _state = CHAR_DATA; 838 _start = _length; 839 return processElement(OPEN_TAG); 840 } else if (c == '/') { 841 _state = OPEN_TAG + EMPTY_TAG; 842 } else if (c > ' ') { 843 _attrQName = newSeq(); 844 _attrQName.offset = _length - 1; 845 _state = OPEN_TAG + READ_ATTR_NAME; 846 } 847 break; 848 case OPEN_TAG + READ_ATTR_NAME: 849 if (c <= ' ') { 850 _attrQName.length = _length - _attrQName.offset - 1; 851 _attrQName.data = _data; 852 _state = OPEN_TAG + ATTR_NAME_READ; 853 } else if (c == '=') { 854 _attrQName.length = _length - _attrQName.offset - 1; 855 _attrQName.data = _data; 856 _state = OPEN_TAG + EQUAL_READ; 857 } else if ((c == ':') && (_attrPrefix == null)) { 858 _attrPrefix = newSeq(); 859 _attrPrefix.offset = _attrQName.offset; 860 _attrPrefix.length = _length - _attrQName.offset - 1; 861 _attrPrefix.data = _data; 862 } 863 break; 864 case OPEN_TAG + ATTR_NAME_READ: 865 if (c == '=') { 866 _state = OPEN_TAG + EQUAL_READ; 867 } else if (c > ' ') { 868 throw error("'=' expected"); 869 } 870 &nb
|