| 1 38 39 40 package com.sun.xml.fastinfoset.stax; 41 42 import com.sun.xml.fastinfoset.Decoder; 43 import com.sun.xml.fastinfoset.DecoderStateTables; 44 import com.sun.xml.fastinfoset.EncodingConstants; 45 import com.sun.xml.fastinfoset.QualifiedName; 46 import com.sun.xml.fastinfoset.algorithm.BuiltInEncodingAlgorithmFactory; 47 import com.sun.xml.fastinfoset.sax.AttributesHolder; 48 import com.sun.xml.fastinfoset.util.CharArray; 49 import com.sun.xml.fastinfoset.util.CharArrayString; 50 import com.sun.xml.fastinfoset.util.XMLChar; 51 import com.sun.xml.fastinfoset.util.EventLocation; 52 import java.io.IOException ; 53 import java.io.InputStream ; 54 import java.util.Iterator ; 55 import java.util.NoSuchElementException ; 56 import javax.xml.namespace.NamespaceContext ; 57 import javax.xml.namespace.QName ; 58 import javax.xml.stream.Location; 59 import javax.xml.stream.XMLStreamException; 60 import javax.xml.stream.XMLStreamReader; 61 import org.jvnet.fastinfoset.EncodingAlgorithm; 62 import org.jvnet.fastinfoset.EncodingAlgorithmException; 63 import org.jvnet.fastinfoset.EncodingAlgorithmIndexes; 64 import org.jvnet.fastinfoset.FastInfosetException; 65 import com.sun.xml.fastinfoset.CommonResourceBundle; 66 67 public class StAXDocumentParser extends Decoder implements XMLStreamReader { 68 protected static final int INTERNAL_STATE_START_DOCUMENT = 0; 69 protected static final int INTERNAL_STATE_START_ELEMENT_TERMINATE = 1; 70 protected static final int INTERNAL_STATE_SINGLE_TERMINATE_ELEMENT_WITH_NAMESPACES = 2; 71 protected static final int INTERNAL_STATE_DOUBLE_TERMINATE_ELEMENT = 3; 72 protected static final int INTERNAL_STATE_END_DOCUMENT = 4; 73 protected static final int INTERNAL_STATE_VOID = -1; 74 75 protected int _internalState; 76 77 80 protected int _eventType; 81 82 85 protected QualifiedName[] _qNameStack = new QualifiedName[32]; 86 protected int[] _namespaceAIIsStartStack = new int[32]; 87 protected int[] _namespaceAIIsEndStack = new int[32]; 88 protected int _stackCount = -1; 89 90 protected String [] _namespaceAIIsPrefix = new String [32]; 91 protected String [] _namespaceAIIsNamespaceName = new String [32]; 92 protected int[] _namespaceAIIsPrefixIndex = new int[32]; 93 protected int _namespaceAIIsIndex; 94 95 98 protected int _currentNamespaceAIIsStart; 99 protected int _currentNamespaceAIIsEnd; 100 101 104 protected QualifiedName _qualifiedName; 105 106 109 protected AttributesHolder _attributes = new AttributesHolder(); 110 111 protected boolean _clearAttributes = false; 112 113 116 protected char[] _characters; 117 protected int _charactersOffset; 118 protected int _charactersLength; 119 120 protected String _algorithmURI; 121 protected int _algorithmId; 122 protected byte[] _algorithmData; 123 protected int _algorithmDataOffset; 124 protected int _algorithmDataLength; 125 126 129 protected String _piTarget; 130 protected String _piData; 131 132 protected NamespaceContextImpl _nsContext = new NamespaceContextImpl(); 133 134 protected String _characterEncodingScheme; 135 136 protected StAXManager _manager; 137 138 public StAXDocumentParser() { 139 reset(); 140 } 141 142 public StAXDocumentParser(InputStream s) { 143 this(); 144 setInputStream(s); 145 } 146 147 public StAXDocumentParser(InputStream s, StAXManager manager) { 148 this(s); 149 _manager = manager; 150 } 151 152 public void setInputStream(InputStream s) { 153 super.setInputStream(s); 154 reset(); 155 } 156 157 public void reset() { 158 super.reset(); 159 if (_internalState != INTERNAL_STATE_START_DOCUMENT && 160 _internalState != INTERNAL_STATE_END_DOCUMENT) { 161 162 for (int i = _namespaceAIIsIndex - 1; i >= 0; i--) { 163 _prefixTable.popScopeWithPrefixEntry(_namespaceAIIsPrefixIndex[i]); 164 } 165 166 _stackCount = -1; 167 168 _namespaceAIIsIndex = 0; 169 _characters = null; 170 _algorithmData = null; 171 } 172 173 _characterEncodingScheme = "UTF-8"; 174 _eventType = START_DOCUMENT; 175 _internalState = INTERNAL_STATE_START_DOCUMENT; 176 } 177 178 protected void resetOnError() { 179 super.reset(); 180 181 if (_v != null) { 182 _prefixTable.clearCompletely(); 183 } 184 _duplicateAttributeVerifier.clear(); 185 186 _stackCount = -1; 187 188 _namespaceAIIsIndex = 0; 189 _characters = null; 190 _algorithmData = null; 191 192 _eventType = START_DOCUMENT; 193 _internalState = INTERNAL_STATE_START_DOCUMENT; 194 } 195 196 198 public Object getProperty(java.lang.String name) 199 throws java.lang.IllegalArgumentException { 200 if (_manager != null) { 201 return _manager.getProperty(name); 202 } 203 return null; 204 } 205 206 public int next() throws XMLStreamException { 207 try { 208 if (_internalState != INTERNAL_STATE_VOID) { 209 switch (_internalState) { 210 case INTERNAL_STATE_START_DOCUMENT: 211 decodeHeader(); 212 processDII(); 213 214 _internalState = INTERNAL_STATE_VOID; 215 break; 216 case INTERNAL_STATE_START_ELEMENT_TERMINATE: 217 if (_currentNamespaceAIIsEnd > 0) { 218 for (int i = _currentNamespaceAIIsEnd - 1; i >= _currentNamespaceAIIsStart; i--) { 219 _prefixTable.popScopeWithPrefixEntry(_namespaceAIIsPrefixIndex[i]); 220 } 221 _namespaceAIIsIndex = _currentNamespaceAIIsStart; 222 } 223 224 popStack(); 226 227 _internalState = INTERNAL_STATE_VOID; 228 return _eventType = END_ELEMENT; 229 case INTERNAL_STATE_SINGLE_TERMINATE_ELEMENT_WITH_NAMESPACES: 230 for (int i = _currentNamespaceAIIsEnd - 1; i >= _currentNamespaceAIIsStart; i--) { 232 _prefixTable.popScopeWithPrefixEntry(_namespaceAIIsPrefixIndex[i]); 233 } 234 _namespaceAIIsIndex = _currentNamespaceAIIsStart; 235 _internalState = INTERNAL_STATE_VOID; 236 break; 237 case INTERNAL_STATE_DOUBLE_TERMINATE_ELEMENT: 238 if (_currentNamespaceAIIsEnd > 0) { 240 for (int i = _currentNamespaceAIIsEnd - 1; i >= _currentNamespaceAIIsStart; i--) { 241 _prefixTable.popScopeWithPrefixEntry(_namespaceAIIsPrefixIndex[i]); 242 } 243 _namespaceAIIsIndex = _currentNamespaceAIIsStart; 244 } 245 246 if (_stackCount == -1) { 247 _internalState = INTERNAL_STATE_END_DOCUMENT; 248 return _eventType = END_DOCUMENT; 249 } 250 251 popStack(); 253 254 _internalState = (_currentNamespaceAIIsEnd > 0) ? 255 INTERNAL_STATE_SINGLE_TERMINATE_ELEMENT_WITH_NAMESPACES : 256 INTERNAL_STATE_VOID; 257 return _eventType = END_ELEMENT; 258 case INTERNAL_STATE_END_DOCUMENT: 259 throw new NoSuchElementException (CommonResourceBundle.getInstance().getString("message.noMoreEvents")); 260 } 261 } 262 263 _characters = null; 265 _algorithmData = null; 266 _currentNamespaceAIIsEnd = 0; 267 268 final int b = read(); 270 switch(DecoderStateTables.EII[b]) { 271 case DecoderStateTables.EII_NO_AIIS_INDEX_SMALL: 272 processEII(_elementNameTable._array[b], false); 273 return _eventType; 274 case DecoderStateTables.EII_AIIS_INDEX_SMALL: 275 processEII(_elementNameTable._array[b & EncodingConstants.INTEGER_3RD_BIT_SMALL_MASK], true); 276 return _eventType; 277 case DecoderStateTables.EII_INDEX_MEDIUM: 278 processEII(processEIIIndexMedium(b), (b & EncodingConstants.ELEMENT_ATTRIBUTE_FLAG) > 0); 279 return _eventType; 280 case DecoderStateTables.EII_INDEX_LARGE: 281 processEII(processEIIIndexLarge(b), (b & EncodingConstants.ELEMENT_ATTRIBUTE_FLAG) > 0); 282 return _eventType; 283 case DecoderStateTables.EII_LITERAL: 284 { 285 final QualifiedName qn = processLiteralQualifiedName( 286 b & EncodingConstants.LITERAL_QNAME_PREFIX_NAMESPACE_NAME_MASK); 287 _elementNameTable.add(qn); 288 processEII(qn, (b & EncodingConstants.ELEMENT_ATTRIBUTE_FLAG) > 0); 289 return _eventType; 290 } 291 case DecoderStateTables.EII_NAMESPACES: 292 processEIIWithNamespaces((b & EncodingConstants.ELEMENT_ATTRIBUTE_FLAG) > 0); 293 return _eventType; 294 case DecoderStateTables.CII_UTF8_SMALL_LENGTH: 295 _octetBufferLength = (b & EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_SMALL_MASK) 296 + 1; 297 decodeUtf8StringAsCharBuffer(); 298 if ((b & EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG) > 0) { 299 _characterContentChunkTable.add(_charBuffer, _charBufferLength); 300 } 301 302 _characters = _charBuffer; 303 _charactersOffset = 0; 304 _charactersLength = _charBufferLength; 305 return _eventType = CHARACTERS; 306 case DecoderStateTables.CII_UTF8_MEDIUM_LENGTH: 307 _octetBufferLength = read() + EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_SMALL_LIMIT; 308 decodeUtf8StringAsCharBuffer(); 309 if ((b & EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG) > 0) { 310 _characterContentChunkTable.add(_charBuffer, _charBufferLength); 311 } 312 313 _characters = _charBuffer; 314 _charactersOffset = 0; 315 _charactersLength = _charBufferLength; 316 return _eventType = CHARACTERS; 317 case DecoderStateTables.CII_UTF8_LARGE_LENGTH: 318 _octetBufferLength = ((read() << 24) | 319 (read() << 16) | 320 (read() << 8) | 321 read()) 322 + EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_MEDIUM_LIMIT; 323 decodeUtf8StringAsCharBuffer(); 324 if ((b & EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG) > 0) { 325 _characterContentChunkTable.add(_charBuffer, _charBufferLength); 326 } 327 328 _characters = _charBuffer; 329 _charactersOffset = 0; 330 _charactersLength = _charBufferLength; 331 return _eventType = CHARACTERS; 332 case DecoderStateTables.CII_UTF16_SMALL_LENGTH: 333 _octetBufferLength = (b & EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_SMALL_MASK) 334 + 1; 335 decodeUtf16StringAsCharBuffer(); 336 if ((b & EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG) > 0) { 337 _characterContentChunkTable.add(_charBuffer, _charBufferLength); 338 } 339 340 _characters = _charBuffer; 341 _charactersOffset = 0; 342 _charactersLength = _charBufferLength; 343 return _eventType = CHARACTERS; 344 case DecoderStateTables.CII_UTF16_MEDIUM_LENGTH: 345 _octetBufferLength = read() + EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_SMALL_LIMIT; 346 decodeUtf16StringAsCharBuffer(); 347 if ((b & EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG) > 0) { 348 _characterContentChunkTable.add(_charBuffer, _charBufferLength); 349 } 350 351 _characters = _charBuffer; 352 _charactersOffset = 0; 353 _charactersLength = _charBufferLength; 354 return _eventType = CHARACTERS; 355 case DecoderStateTables.CII_UTF16_LARGE_LENGTH: 356 _octetBufferLength = ((read() << 24) | 357 (read() << 16) | 358 (read() << 8) | 359 read()) 360 + EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_MEDIUM_LIMIT; 361 decodeUtf16StringAsCharBuffer(); 362 if ((b & EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG) > 0) { 363 _characterContentChunkTable.add(_charBuffer, _charBufferLength); 364 } 365 366 _characters = _charBuffer; 367 _charactersOffset = 0; 368 _charactersLength = _charBufferLength; 369 return _eventType = CHARACTERS; 370 case DecoderStateTables.CII_RA: 371 { 372 final boolean addToTable = (_b & EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG) > 0; 373 374 _identifier = (b & 0x02) << 6; 375 final int b2 = read(); 376 _identifier |= (b2 & 0xFC) >> 2; 377 378 decodeOctetsOnSeventhBitOfNonIdentifyingStringOnThirdBit(b2); 379 380 decodeRestrictedAlphabetAsCharBuffer(); 381 382 if (addToTable) { 383 _characterContentChunkTable.add(_charBuffer, _charBufferLength); 384 } 385 386 _characters = _charBuffer; 387 _charactersOffset = 0; 388 _charactersLength = _charBufferLength; 389 return _eventType = CHARACTERS; 390 } 391 case DecoderStateTables.CII_EA: 392 { 393 if ((b & EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG) > 0) { 394 throw new EncodingAlgorithmException(CommonResourceBundle.getInstance().getString("message.addToTableNotSupported")); 395 } 396 397 _algorithmId = (b & 0x02) << 6; 399 final int b2 = read(); 400 _algorithmId |= (b2 & 0xFC) >> 2; 401 402 decodeOctetsOnSeventhBitOfNonIdentifyingStringOnThirdBit(b2); 403 processCIIEncodingAlgorithm(); 404 405 return _eventType = CHARACTERS; 406 } 407 case DecoderStateTables.CII_INDEX_SMALL: 408 { 409 final int index = b & EncodingConstants.INTEGER_4TH_BIT_SMALL_MASK; 410 411 _characters = _characterContentChunkTable._array; 412 _charactersOffset = _characterContentChunkTable._offset[index]; 413 _charactersLength = _characterContentChunkTable._length[index]; 414 return _eventType = CHARACTERS; 415 } 416 case DecoderStateTables.CII_INDEX_MEDIUM: 417 { 418 final int index = (((b & EncodingConstants.INTEGER_4TH_BIT_MEDIUM_MASK) << 8) | read()) 419 + EncodingConstants.INTEGER_4TH_BIT_SMALL_LIMIT; 420 421 _characters = _characterContentChunkTable._array; 422 _charactersOffset = _characterContentChunkTable._offset[index]; 423 _charactersLength = _characterContentChunkTable._length[index]; 424 return _eventType = CHARACTERS; 425 } 426 case DecoderStateTables.CII_INDEX_LARGE: 427 { 428 final int index = (((b & EncodingConstants.INTEGER_4TH_BIT_LARGE_MASK) << 16) | 429 (read() << 8) | 430 read()) 431 + EncodingConstants.INTEGER_4TH_BIT_MEDIUM_LIMIT; 432 433 _characters = _characterContentChunkTable._array; 434 _charactersOffset = _characterContentChunkTable._offset[index]; 435 _charactersLength = _characterContentChunkTable._length[index]; 436 return _eventType = CHARACTERS; 437 } 438 case DecoderStateTables.CII_INDEX_LARGE_LARGE: 439 { 440 final int index = ((read() << 16) | 441 (read() << 8) | 442 read()) 443 + EncodingConstants.INTEGER_4TH_BIT_LARGE_LIMIT; 444 445 _characters = _characterContentChunkTable._array; 446 _charactersOffset = _characterContentChunkTable._offset[index]; 447 _charactersLength = _characterContentChunkTable._length[index]; 448 return _eventType = CHARACTERS; 449 } 450 case DecoderStateTables.COMMENT_II: 451 processCommentII(); 452 return _eventType; 453 case DecoderStateTables.PROCESSING_INSTRUCTION_II: 454 processProcessingII(); 455 return _eventType; 456 case DecoderStateTables.UNEXPANDED_ENTITY_REFERENCE_II: 457 { 458 462 String entity_reference_name = decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherNCName); 463 464 String system_identifier = ((b & EncodingConstants.UNEXPANDED_ENTITY_SYSTEM_IDENTIFIER_FLAG) > 0) 465 ? decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherURI) : ""; 466 String public_identifier = ((b & EncodingConstants.UNEXPANDED_ENTITY_PUBLIC_IDENTIFIER_FLAG) > 0) 467 ? decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherURI) : ""; 468 return _eventType; 469 } 470 case DecoderStateTables.TERMINATOR_DOUBLE: 471 if (_stackCount != -1) { 472 popStack(); 474 475 _internalState = INTERNAL_STATE_DOUBLE_TERMINATE_ELEMENT; 476 return _eventType = END_ELEMENT; 477 } 478 479 _internalState = INTERNAL_STATE_END_DOCUMENT; 480 return _eventType = END_DOCUMENT; 481 case DecoderStateTables.TERMINATOR_SINGLE: 482 if (_stackCount != -1) { 483 popStack(); 485 486 if (_currentNamespaceAIIsEnd > 0) { 487 _internalState = INTERNAL_STATE_SINGLE_TERMINATE_ELEMENT_WITH_NAMESPACES; 488 } 489 return _eventType = END_ELEMENT; 490 } 491 492 _internalState = INTERNAL_STATE_END_DOCUMENT; 493 return _eventType = END_DOCUMENT; 494 default: 495 throw new FastInfosetException(CommonResourceBundle.getInstance().getString("message.IllegalStateDecodingEII")); 496 } 497 } catch (IOException e) { 498 resetOnError(); 499 e.printStackTrace(); 500 throw new XMLStreamException(e); 501 } catch (FastInfosetException e) { 502 resetOnError(); 503 e.printStackTrace(); 504 throw new XMLStreamException(e); 505 } catch (RuntimeException e) { 506 resetOnError(); 507 e.printStackTrace(); 508 throw e; 509 } 510 } 511 512 private final void popStack() { 513 _qualifiedName = _qNameStack[_stackCount]; 515 _currentNamespaceAIIsStart = _namespaceAIIsStartStack[_stackCount]; 516 _currentNamespaceAIIsEnd = _namespaceAIIsEndStack[_stackCount]; 517 _qNameStack[_stackCount--] = null; 518 } 519 520 527 public final void require(int type, String namespaceURI, String localName) 528 throws XMLStreamException { 529 if( type != _eventType) 530 throw new XMLStreamException(CommonResourceBundle.getInstance().getString("message.eventTypeNotMatch", new Object []{getEventTypeString(type)})); 531 if( namespaceURI != null && !namespaceURI.equals(getNamespaceURI()) ) 532 throw new XMLStreamException(CommonResourceBundle.getInstance().getString("message.namespaceURINotMatch", new Object []{namespaceURI})); 533 if(localName != null && !localName.equals(getLocalName())) 534 throw new XMLStreamException(CommonResourceBundle.getInstance().getString("message.localNameNotMatch", new Object []{localName})); 535 536 return; 537 } 538 539 545 public final String getElementText() throws XMLStreamException { 546 547 if(getEventType() != START_ELEMENT) { 548 throw new XMLStreamException( 549 CommonResourceBundle.getInstance().getString("message.mustBeOnSTARTELEMENT"), getLocation()); 550 } 551 int eventType = next(); 553 return getElementText(true); 554 } 555 558 public final String getElementText(boolean startElementRead) throws XMLStreamException { 559 if (!startElementRead) { 560 throw new XMLStreamException( 561 CommonResourceBundle.getInstance().getString("message.mustBeOnSTARTELEMENT"), getLocation()); 562 } 563 int eventType = getEventType(); 564 StringBuffer content = new StringBuffer (); 565 while(eventType != END_ELEMENT ) { 566 if(eventType == CHARACTERS 567 || eventType == CDATA 568 || eventType == SPACE 569 || eventType == ENTITY_REFERENCE) { 570 content.append(getText()); 571 } else if(eventType == PROCESSING_INSTRUCTION 572 || eventType == COMMENT) { 573 } else if(eventType == END_DOCUMENT) { 575 throw new XMLStreamException(CommonResourceBundle.getInstance().getString("message.unexpectedEOF")); 576 } else if(eventType == START_ELEMENT) { 577 throw new XMLStreamException( 578 CommonResourceBundle.getInstance().getString("message.getElementTextExpectTextOnly"), getLocation()); 579 } else { 580 throw new XMLStreamException( 581 CommonResourceBundle.getInstance().getString("message.unexpectedEventType")+ getEventTypeString(eventType), getLocation()); 582 } 583 eventType = next(); 584 } 585 return content.toString(); 586 } 587 588 601 public final int nextTag() throws XMLStreamException { 602 int eventType = next(); 603 return nextTag(true); 604 } 605 608 public final int nextTag(boolean currentTagRead) throws XMLStreamException { 609 int eventType = getEventType(); 610 if (!currentTagRead) { 611 eventType = next(); 612 } 613 while((eventType == CHARACTERS && isWhiteSpace()) || (eventType == CDATA && isWhiteSpace()) 615 || eventType == SPACE 616 || eventType == PROCESSING_INSTRUCTION 617 || eventType == COMMENT) { 618 eventType = next(); 619 } 620 if (eventType != START_ELEMENT && eventType != END_ELEMENT) { 621 throw new XMLStreamException(CommonResourceBundle.getInstance().getString("message.expectedStartOrEnd"), getLocation()); 622 } 623 return eventType; 624 } 625 626 public final boolean hasNext() throws XMLStreamException { 627 return (_eventType != END_DOCUMENT); 628 } 629 630 public void close() throws XMLStreamException { 631 } 632 633 public final String getNamespaceURI(String prefix) { 634 String namespace = getNamespaceDecl(prefix); 635 if (namespace == null) { 636 if (prefix == null) { 637 throw new IllegalArgumentException (CommonResourceBundle.getInstance().getString("message.nullPrefix")); 638 } 639 return null; } 641 return namespace; 642 } 643 644 public final boolean isStartElement() { 645 return (_eventType == START_ELEMENT); 646 } 647 648 public final boolean isEndElement() { 649 return (_eventType == END_ELEMENT); 650 } 651 652 public final boolean isCharacters() { 653 return (_eventType == CHARACTERS); 654 } 655 656 662 public final boolean isWhiteSpace() { 663 if(isCharacters() || (_eventType == CDATA)){ 664 char [] ch = this.getTextCharacters(); 665 int start = this.getTextStart(); 666 int length = this.getTextLength(); 667 for (int i=start; i< length;i++){ 668 if(!XMLChar.isSpace(ch[i])){ 669 return false; 670 } 671 } 672 return true; 673 } 674 return false; 675 } 676 677 public final String getAttributeValue(String namespaceURI, String localName) { 678 if (_eventType != START_ELEMENT) { 679 throw new IllegalStateException (CommonResourceBundle.getInstance().getString("message.invalidCallingGetAttributeValue")); 680 } 681 682 |