1 29 30 package com.caucho.xml2; 31 32 import com.caucho.util.CharBuffer; 33 import com.caucho.vfs.*; 34 import com.caucho.xml2.readers.MacroReader; 35 import com.caucho.xml2.readers.Utf16Reader; 36 import com.caucho.xml2.readers.Utf8Reader; 37 import com.caucho.xml2.readers.XmlReader; 38 39 import org.w3c.dom.Document ; 40 import org.w3c.dom.Node ; 41 import org.xml.sax.InputSource ; 42 import org.xml.sax.Locator ; 43 import org.xml.sax.SAXException ; 44 import org.xml.sax.SAXParseException ; 45 46 import javax.xml.namespace.QName ; 47 import java.io.FileNotFoundException ; 48 import java.io.IOException ; 49 import java.io.InputStream ; 50 import java.util.ArrayList ; 51 import java.util.Arrays ; 52 import java.util.logging.Level ; 53 54 60 public class XmlParser extends AbstractParser { 61 public static final String XMLNS = "http://www.w3.org/2000/xmlns/"; 63 public static final String XML = "http://www.w3.org/XML/1998/namespace"; 64 65 static final QName DOC_NAME = new QName ("#document"); 66 static final QName TEXT_NAME = new QName ("#text"); 67 static final QName WHITESPACE_NAME = new QName ("#whitespace"); 68 69 private static final boolean []XML_NAME_CHAR; 70 71 QAttributes _attributes; 72 QAttributes _nullAttributes; 73 74 CharBuffer _text; 75 CharBuffer _eltName; 76 CharBuffer _cb; 77 CharBuffer _buf = new CharBuffer(); 78 String _textFilename; 79 int _textLine; 80 81 TempCharBuffer _tempInputBuffer; 82 char []_inputBuffer; 83 int _inputOffset; 84 int _inputLength; 85 86 char []_textBuffer = new char[1024]; 87 int _textLength; 88 int _textCapacity = _textBuffer.length; 89 boolean _isIgnorableWhitespace; 90 91 char []_valueBuffer = _textBuffer; 92 93 CharBuffer _name = new CharBuffer(); 94 CharBuffer _nameBuffer = new CharBuffer(); 95 96 MacroReader _macro = new MacroReader(); 97 int _macroIndex = 0; 98 int _macroLength = 0; 99 char []_macroBuffer; 100 101 int []_elementLines = new int[64]; 102 int _elementTop; 103 104 ArrayList <SaxIntern.Entry> _attrNames = new ArrayList <SaxIntern.Entry>(); 105 ArrayList <String > _attrValues = new ArrayList <String >(); 106 107 ReadStream _is; 108 XmlReader _reader; 109 110 String _extPublicId; 111 String _extSystemId; 112 113 NamespaceContextImpl _namespace = new NamespaceContextImpl(); 114 SaxIntern _intern = new SaxIntern(_namespace);; 115 116 QName _activeNode; 117 QName _topNamespaceNode; 118 boolean _isTagStart; 119 boolean _stopOnIncludeEnd; 120 boolean _hasTopElement; 121 boolean _hasDoctype; 122 Locator _locator = new LocatorImpl(this); 123 124 public XmlParser() 125 { 126 } 127 128 134 XmlParser(QDocumentType dtd) 135 { 136 super(dtd); 137 } 138 139 142 void init() 143 { 144 super.init(); 145 146 _attributes = new QAttributes(); 147 _nullAttributes = new QAttributes(); 148 _eltName = new CharBuffer(); 149 _text = new CharBuffer(); 150 151 _textLength = 0; 152 _isIgnorableWhitespace = true; 153 _elementTop = 0; 154 _elementLines[0] = 1; 155 156 _line = 1; 157 158 _dtd = null; 159 _isTagStart = false; 160 _stopOnIncludeEnd = false; 161 162 _extPublicId = null; 163 _extSystemId = null; 164 165 _filename = null; 166 _publicId = null; 167 _systemId = null; 168 169 _hasTopElement = false; 170 _hasDoctype = false; 171 172 _macroIndex = 0; 173 _macroLength = 0; 174 175 _reader = null; 176 177 } 179 180 187 Document parseInt(ReadStream is) 188 throws IOException , SAXException  189 { 190 _tempInputBuffer = TempCharBuffer.allocate(); 191 _inputBuffer = _tempInputBuffer.getBuffer(); 192 _inputLength = _inputOffset = 0; 193 194 _is = is; 195 196 if (_filename == null && _systemId != null) 197 _filename = _systemId; 198 else if (_filename == null) 199 _filename = _is.getUserPath(); 200 201 if (_systemId == null) { 202 _systemId = _is.getPath().getURL(); 203 if ("null:".equals(_systemId) || "string:".equals(_systemId)) 204 _systemId = "stream"; 205 } 206 207 if (_filename == null) 208 _filename = _systemId; 209 210 if (_filename == null) 211 _filename = "stream"; 212 213 if (_dtd != null) 214 _dtd.setSystemId(_systemId); 215 216 if (_builder != null) { 217 if (! "string:".equals(_systemId) && ! "stream".equals(_systemId)) 218 _builder.setSystemId(_systemId); 219 _builder.setFilename(_is.getPath().getURL()); 220 } 221 222 if (_contentHandler == null) 223 _contentHandler = new org.xml.sax.helpers.DefaultHandler (); 224 225 _contentHandler.setDocumentLocator(_locator); 226 227 if (_owner == null) 228 _owner = new QDocument(); 229 if (_defaultEncoding != null) 230 _owner.setAttribute("encoding", _defaultEncoding); 231 _owner.addDepend(is.getPath()); 232 233 _activeNode = DOC_NAME; 234 235 _contentHandler.startDocument(); 236 237 parseXMLDeclaration(null); 238 239 parseNode(); 240 241 247 248 if (! _hasTopElement) 249 throw error(L.l("XML file has no top-element. All well-formed XML files have a single top-level element.")); 250 251 _contentHandler.endDocument(); 252 253 QDocument owner = _owner; 254 _owner = null; 255 256 return owner; 257 } 258 259 265 private void parseNode() 266 throws IOException , SAXException  267 { 268 char []valueBuffer = _valueBuffer; 269 int valueLength = valueBuffer.length; 270 int valueOffset = 0; 271 boolean isWhitespace = true; 272 273 char []inputBuffer = _inputBuffer; 274 int inputLength = _inputLength; 275 int inputOffset = _inputOffset; 276 277 loop: 278 while (true) { 279 int ch; 280 281 if (inputOffset < inputLength) 282 ch = inputBuffer[inputOffset++]; 283 else if (fillBuffer()) { 284 inputBuffer = _inputBuffer; 285 inputOffset = _inputOffset; 286 inputLength = _inputLength; 287 288 ch = inputBuffer[inputOffset++]; 289 } 290 else { 291 if (valueOffset > 0) 292 addText(valueBuffer, 0, valueOffset, isWhitespace); 293 294 _inputOffset = inputOffset; 295 _inputLength = inputLength; 296 297 close(); 298 return; 299 } 300 301 switch (ch) { 302 case '\n': 303 _line++; 304 valueBuffer[valueOffset++] = (char) ch; 305 break; 306 307 case ' ': case '\t': case '\r': 308 valueBuffer[valueOffset++] = (char) ch; 309 break; 310 311 case 0xffff: 312 if (valueOffset > 0) 314 addText(valueBuffer, 0, valueOffset, isWhitespace); 315 316 _inputOffset = inputOffset; 317 _inputLength = inputLength; 318 return; 319 320 case '&': 321 if (valueOffset > 0) 322 addText(valueBuffer, 0, valueOffset, isWhitespace); 323 324 _inputOffset = inputOffset; 325 _inputLength = inputLength; 326 327 parseEntityReference(); 328 329 inputOffset = _inputOffset; 330 inputLength = _inputOffset; 331 break; 332 333 case '<': 334 if (valueOffset > 0) 335 addText(valueBuffer, 0, valueOffset, isWhitespace); 336 337 _inputOffset = inputOffset; 338 _inputLength = inputLength; 339 340 ch = read(); 341 342 if (ch == '/') { 343 SaxIntern.Entry entry = parseName(0, false); 344 345 ch = read(); 346 347 if (ch != '>') { 348 throw error(L.l("'</{0}>' expected '>' at {1}. Closing tags must close immediately after the tag name.", 349 entry.getName(), badChar(ch))); 350 } 351 352 _namespace.pop(entry); 353 } 354 else if (XmlChar.isNameStart(ch)) { 356 parseElement(ch); 357 ch = read(); 358 } 359 else if (ch == '!') { 361 if ((ch = read()) == '[') { 363 parseCdata(); 364 ch = read(); 365 } 366 else if (ch == '-') { 368 parseComment(); 369 370 ch = read(); 371 } 372 else if (XmlChar.isNameStart(ch)) { 373 unread(ch); 374 375 SaxIntern.Entry entry = parseName(0, false); 376 377 String declName = entry.getName(); 378 if (declName.equals("DOCTYPE")) { 379 parseDoctype(); 380 if (_contentHandler instanceof DOMBuilder) 381 ((DOMBuilder) _contentHandler).dtd(_dtd); 382 } 383 else 384 throw error(L.l("expected '<!DOCTYPE' declaration at {0}", declName)); 385 } 386 else 387 throw error(L.l("expected '<!DOCTYPE' declaration at {0}", badChar(ch))); 388 } 389 else if (ch == '?') { 391 parsePI(); 392 } 393 else { 394 throw error(L.l("expected tag name after '<' at {0}. Open tag names must immediately follow the open brace like '<foo ...>'", badChar(ch))); 395 } 396 397 inputOffset = _inputOffset; 398 inputLength = _inputLength; 399 break; 400 401 default: 402 isWhitespace = false; 403 valueBuffer[valueOffset++] = (char) ch; 404 break; 405 } 406 407 if (valueOffset == valueLength) { 408 addText(valueBuffer, 0, valueOffset, isWhitespace); 409 410 valueOffset = 0; 411 } 412 } 413 } 414 415 418 private void parseDoctype() 419 throws IOException , SAXException  420 { 421 if (_activeNode != DOC_NAME) 422 throw error(L.l("<!DOCTYPE immediately follow the <?xml ...?> declaration.")); 423 424 int ch = skipWhitespace(read()); 425 ch = _reader.parseName(_nameBuffer, ch); 426 String name = _nameBuffer.toString(); 427 ch = skipWhitespace(ch); 428 429 if (_dtd == null) 430 _dtd = new QDocumentType(name); 431 432 _dtd.setName(name); 433 434 if (XmlChar.isNameStart(ch)) { 435 ch = parseExternalID(ch); 436 ch = skipWhitespace(ch); 437 438 _dtd._publicId = _extPublicId; 439 _dtd._systemId = _extSystemId; 440 } 441 442 if (_dtd._systemId != null && ! _dtd._systemId.equals("")) { 443 InputStream is = null; 444 445 unread(ch); 446 447 XmlReader oldReader = _reader; 448 boolean hasInclude = false; 449 450 try { 451 pushInclude(_extPublicId, _extSystemId); 452 hasInclude = true; 453 } catch (Exception e) { 454 if (log.isLoggable(Level.FINEST)) 455 log.log(Level.FINER, e.toString(), e); 456 else 457 log.finer(e.toString()); 458 } 459 460 if (hasInclude) { 461 _stopOnIncludeEnd = true; 462 try { 463 DtdParser dtdParser = new DtdParser(this, _dtd); 464 ch = dtdParser.parseDoctypeDecl(_dtd); 465 } catch (XmlParseException e) { 466 if (_extSystemId != null && 467 _extSystemId.startsWith("http")) { 468 log.log(Level.FINE, e.toString(), e); 469 } 470 else 471 throw e; 472 } 473 _stopOnIncludeEnd = false; 474 475 while (_reader != null && _reader != oldReader) 476 popInclude(); 477 } 478 479 if (_reader != null) 480 ch = skipWhitespace(read()); 481 } 482 483 if (ch == '[') { 484 DtdParser dtdParser = new DtdParser(this, _dtd); 485 ch = dtdParser.parseDoctypeDecl(_dtd); 486 } 487 488 ch = skipWhitespace(ch); 489 490 if (ch != '>') 491 throw error(L.l("expected '>' in <!DOCTYPE at {0}", 492 badChar(ch))); 493 } 494 495 500 private void parseElement(int ch) 501 throws IOException , SAXException  502 { 503 unread(ch); 504 505 SaxIntern.Entry entry = parseName(0, false); 506 507 _namespace.push(entry); 508 509 ch = read(); 510 511 if (ch != '>' && ch != '/') { 512 ch = parseAttributes(ch, true); 513 } 514 else 515 _attributes.clear(); 516 517 QName qName = entry.getQName(); 518 519 if (_isValidating && _dtd != null) { 520 QElementDef elementDef = _dtd.getElement(qName.getLocalPart()); 521 522 if (elementDef != null) 523 elementDef.fillDefaults(_attributes); 524 } 525 526 _contentHandler.startElement(entry.getUri(), 527 entry.getLocalName(), 528 entry.getName(), 529 _attributes); 530 531 _hasTopElement = true; 532 533 if (ch == '/') { 534 if ((ch = read()) == '>') { 536 _contentHandler.endElement(entry.getUri(), 537 entry.getLocalName(), 538 entry.getName()); 539 540 _namespace.pop(entry); 541 } 542 else { 544 throw error(L.l("unexpected character {0} after '/', expected '/>'", 545 badChar(ch), entry.getName())); 546 } 547 } 548 else if (ch != '>') { 549 throw error(L.l("unexpected character {0} while parsing '{1}' attributes. Expected an attribute name or '>' or '/>'. XML element syntax is:\n <name attr-1=\"value-1\" ... attr-n=\"value-n\">", 550 badChar(ch), entry.getName())); 551 } 552 } 553 554 561 private int parseAttributes(int ch, boolean isElement) 562 throws IOException , SAXException  563 { 564 _attributes.clear(); 565 566 _attrNames.clear(); 567 _attrValues.clear(); 568 569 while (ch != -1) { 570 boolean hasWhitespace = false; 571 572 while (ch <= 0x20 573 && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) { 574 hasWhitespace = true; 575 ch = read(); 576 } 577 578 if (! XmlChar.isNameStart(ch)) { 579 break; 580 } 581 582 if (! hasWhitespace) 583 throw error(L.l("attributes must be separated by whitespace")); 584 585 hasWhitespace = false; 586 587 unread(ch); 588 589 SaxIntern.Entry entry = parseName(0, true); 590 591 ch = read(); 592 593 594 while (ch <= 0x20 595 && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) { 596 ch = read(); 597 } 598 599 String value = null; 600 601 if (ch != '=') { 602 throw error(L.l("attribute '{0}' expects value at {1}. XML requires attributes to have explicit values.", 603 entry.getName(), badChar(ch))); 604 } 605 606 ch = read(); 607 608 while (ch <= 0x20 609 && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) { 610 ch = read(); 611 } 612 613 value = parseValue(ch); 614 615 ch = read(); 616 617 if (entry.isXmlns()) { 618 String prefix; 619 620 if (entry.getPrefix() != null) 621 prefix = entry.getLocalName(); 622 else 623 prefix = ""; 624 625 String uri = value; 626 627 if (_isXmlnsPrefix) { 628 _contentHandler.startPrefixMapping(prefix, uri); 629 } 630 631 if (isElement && _isXmlnsAttribute) { 633 _attributes.add(entry.getQName(), uri); 634 } 635 } 636 else { 637 _attrNames.add(entry); 638 _attrValues.add(value); 639 } 640 } 641 642 int len = _attrNames.size(); 643 for (int i = 0; i < len; i++) { 644 SaxIntern.Entry attrEntry = _attrNames.get(i); 645 String value = _attrValues.get(i); 646 647 QName name = attrEntry.getQName(); 648 649 _attributes.add(name, value); 650 } 651 652 return ch; 653 } 654 655 663 private int parseEntityReference() 664 throws IOException , SAXException  665 { 666 int ch; 667 668 ch = read(); 669 670 if (ch == '#') { 672 addText((char) parseCharacterReference()); 673 674 return read(); 675 } 676 else if (XmlChar.isNameStart(ch)) { 678 ch = _reader.parseName(_buf, ch); 679 680 if (ch != ';' && _strictXml) 681 throw error(L.l("'&{0};' expected ';' at {0}. Entity references have a '&name;' syntax.", _buf, badChar(ch))); 682 else if (ch != ';') { 683 addText('&'); 684 addText(_buf.toString()); 685 return ch; 686 } 687 688 addEntityReference(_buf.toString()); 689 690 ch = read(); 691 692 return ch; 693 } else if (_strictXml) { 694 throw error(L.l("expected name at {0}", badChar(ch))); 695 } else { 696 addText('&'); 697 return ch; 698 } 699 } 700 701 private int parseCharacterReference() 702 throws IOException , SAXException  703 { 704 int ch = read(); 705 706 int radix = 10; 707 if (ch == 'x') { 708 radix = 16; 709 ch = read(); 710 } 711 712 int value = 0; 713 for (; ch != ';'; ch = read()) { 714 if (ch >= '0' && ch <= '9') 715 value = radix * value + ch - '0'; 716 else if (radix == 16 && ch >= 'a' && ch <= 'f') 717 value = radix * value + ch - 'a' + 10; 718 else if (radix == 16 && ch >= 'A' && ch <= 'F') 719 value = radix * value + ch - 'A' + 10; 720 else 721 throw error(L.l("malformed entity ref at {0}", badChar(ch))); 722 } 723 724 if (value > 0xffff) 725 throw error(L.l("malformed entity ref at {0}", "" + value)); 726 727 if (_strictCharacters && ! isChar(value)) 729 throw error(L.l("illegal character ref at {0}", badChar(value))); 730 731 return value; 732 } 733 734 737 private void addEntityReference(String name) 738 throws IOException , SAXException  739 { 740 boolean expand = ! _entitiesAsText || _hasDoctype; 741 743 if (! expand) { 744 addText("&" + name + ";"); 745 return; 746 } 747 748 int ch = _entities.getEntity(name); 749 if (ch >= 0 && ch <= 0xffff) { 750 addText((char) ch); 751 return; 752 } 753 754 QEntity entity = _dtd == null ? null : _dtd.getEntity(name); 755 756 if (! _expandEntities) { 757 addText("&" + name + ";"); 758 return; 759 } 760 761 if (entity == null && (_dtd == null || _dtd.getName() == null || 762 ! _dtd.isExternal())) { 763 throw error(L.l("'&{0};' is an unknown entity. XML predefines only '<', '&', '>', ''' and '"'. All other entities must be defined in an <!ENTITY> definition in the DTD.", name)); 764 } 765 else if (entity != null) { 766 if (entity._isSpecial && entity._value != null) 767 addText(entity._value); 768 else if (entity.getSystemId() != null) { 769 if (pushSystemEntity(entity)) { 770 } 771 776 else if (_contentHandler instanceof DOMBuilder) { 777 ((DOMBuilder) _contentHandler).entityReference(name); 778 } 779 else 780 addText("&" + name + ";"); 781 } 782 else if (expand && entity._value != null) 783 setMacro(entity._value); 784 else 785 addText("&" + name + ";"); 786 } 787 else { 788 if (_contentHandler instanceof DOMBuilder) { 789 ((DOMBuilder) _contentHandler).entityReference(name); 790 } 791 else addText("&" + name + ";"); 793 } 794 } 795 796 private boolean pushSystemEntity(QEntity entity) 797 throws IOException , SAXException  798 { 799 String publicId = entity.getPublicId(); 800 String systemId = entity.getSystemId(); 801 String value = null; 802 InputSource source = null; 803 ReadStream is = null; 804 805 if (_entityResolver != null) 806 source = _entityResolver.resolveEntity(publicId, systemId); 807 808 if (source != null && source.getByteStream() != null) 809 is = Vfs.openRead(source.getByteStream()); 810 else if (source != null && source.getCharacterStream() != null) 811 is = Vfs.openRead(source.getCharacterStream()); 812 else if (source != null && source.getSystemId() != null && 813 _searchPath.lookup(source.getSystemId()).isFile()) { 814 _owner.addDepend(_searchPath.lookup(source.getSystemId())); 815 is = _searchPath.lookup(source.getSystemId()).openRead(); 816 } 817 else if (systemId != null && ! systemId.equals("")) { 818 String path = systemId; 819 if (path.startsWith("file:")) 820 path = path.substring(5); 821 if (_searchPath.lookup(path).isFile()) { 822 _owner.addDepend(_searchPath.lookup(path)); 823 is = _searchPath.lookup(path).openRead(); 824 } 825 } 826 827 if (is == null) 828 return false; 829 830 _filename = systemId; 831 _systemId = systemId; 832 833 Path oldSearchPath = _searchPath; 834 Path path = is.getPath(); 835 if (path != null) { 836 _owner.addDepend(path); 837 838 if (_searchPath != null) { 839 _searchPath = path.getParent(); 840 _reader.setSearchPath(oldSearchPath); 841 } 842 } 843 844 _is = is; 845 _line = 1; 846 847 XmlReader oldReader = _reader; 848 _reader = null; 849 850 parseXMLDeclaration(oldReader); 851 852 return true; 853 } 854 855 private boolean isAttributeChar(int ch) 856 { 857 switch (ch) { 858 case ' ': case '\t': case '\n': case '\r': 859 return false; 860 case '<': case '>': case '\'':case '"': case '=': 861 return false; 862 default: 863 return true; 864 } 865 } 866 867 private int parsePI() 868 throws IOException , SAXException  869 { 870 int ch; 871 872 ch = read(); 873 if (! XmlChar.isNameStart(ch)) 874 throw error(L.l("expected name after '<?' at {0}. Processing instructions expect a name like <?foo ... ?>", badChar(ch))); 875 ch = _reader.parseName(_text, ch); 876 877 String piName = _text.toString(); 878 if (! piName.equals("xml")) 879 return parsePITail(piName, ch); 880 else { 881 throw error(L.l("<?xml ... ?> occurs after content. The <?xml ... ?> prolog must be at the document start.")); 882 883 } 884 } 885 886 private int parsePITail(String piName, int ch) 887 throws IOException , SAXException  888 { 889 ch = skipWhitespace(ch); 890 891 _text.clear(); 892 while (ch != -1) { 893 if (ch == '?') { 894 if ((ch = read()) == '>') 895 break; 896 else 897 _text.append('?'); 898 } else { 899 _text.append((char) ch); 900 ch = read(); 901 } 902 } 903 904 _contentHandler.processingInstruction(piName, _text.toString()); 905 906 return read(); 907 } 908 909 912 private void parseComment() 913 throws IOException , SAXException  914 { 915 int ch = read(); 916 917 if (ch != '-') 918 throw error(L.l("expected comment at {0}", badChar(ch))); 919 920 ch = read(); 921 922 if (! _skipComments) 923 _buf.clear(); 924 925 comment: 926 while (ch != -1) { 927 if (ch == '-') { 928 ch = read(); 929 930 while (ch == '-') { 931 if ((ch = read()) == '>') 932 break comment; 933 else if (_strictComments) 934 throw error(L.l("XML forbids '--' in comments")); 935 else if (ch == '-') { 936 if (! _skipComments) 937 _buf.append('-'); 938 } 939 else { 940 if (! _skipComments) 941 _buf.append("--"); 942 break; 943 } 944 } 945 946 _buf.append('-'); 947 } else if (! XmlChar.isChar(ch)) { 948 throw error(L.l("bad character {0}", hex(ch))); 949 } else { 950 _buf.append((char) ch); 951 ch = read(); 952 } 953 } 954 955 if (_skipComments) { 956 } 957 else if (_contentHandler instanceof XMLWriter && ! _skipComments) { 958 ((XMLWriter) _contentHandler).comment(_buf.toString()); 959 _isIgnorableWhitespace = true; 960 } 961 else if (_lexicalHandler != null) { 962 _lexicalHandler.comment(_buf.getBuffer(), 0, _buf.getLength()); 963 _isIgnorableWhitespace = true; 964 } 965 } 966 967 974 private void parseCdata() 975 throws IOException , SAXException  976 { 977 int ch; 978 979 if ((ch = read()) != 'C' || 980 (ch = read()) != 'D' || 981 (ch = read()) != 'A' || 982 (ch = read()) != 'T' || 983 (ch = read()) != 'A' || 984 (ch = read()) != '[') { 985 throw error(L.l("expected '<![CDATA[' at {0}", badChar(ch))); 986 } 987 988 ch = read(); 989 990 if (_lexicalHandler != null) { 991 _lexicalHandler.startCDATA(); 992 } 993 994 cdata: 995 while (ch != -1) { 996 if (ch == ']') { 997 ch = read(); 998 999 while (ch == ']') { 1000 if ((ch = read()) == '>') 1001 break cdata; 1002 else if (ch == ']') 1003 addText(']'); 1004 else { 1005 addText(']'); 1006 break; 1007 } 1008 } 1009 1010 addText(']'); 1011 } else if (_strictCharacters && ! isChar(ch)) { 1012 throw error(L.l("expected character in cdata at {0}", badChar(ch))); 1013 } else { 1014 addText((char) ch); 1015 ch = read(); 1016 } 1017 } 1018 1019 if (_lexicalHandler != null) { 1020 _lexicalHandler.endCDATA(); 1021 } 1022 } 1023 1024 1027 private void addPEReference(CharBuffer value, String name) 1028 throws IOException , SAXException  1029 { 1030 QEntity entity = _dtd.getParameterEntity(name); 1031 1032 if (entity == null && ! _dtd.isExternal()) 1033 throw error(L.l("'%{0};' is an unknown parameter entity. Parameter entities must be defined in an <!ENTITY> declaration before use.", name)); 1034 else if (entity != null && entity._value != null) { 1035 setMacro(entity._value); 1036 } 1037 else if (entity != null && entity.getSystemId() != null) { 1038 pushInclude(entity.getPublicId(), entity.getSystemId()); 1039 } 1040 else { 1041 value.append("%"); 1042 value.append(name); 1043 value.append(";"); 1044 } 1045 } 1046 1047 private static String toAttrDefault(CharBuffer text) 1048 { 1049 for (int i = 0; i < text.length(); i++) { 1050 int ch = text.charAt(i); 1051 1052 if (ch == '"') { 1053 text.delete(i, i + 1); 1054 text.insert(i, """); 1055 i--; 1056 } else if (ch == '\'') { 1057 text.delete(i, i + 1); 1058 text.insert(i, "'"); 1059 i--; 1060 } 1061 } 1062 1063 return text.toString(); 1064 } 1065 1066 1070 private int parseExternalID(int ch) 1071 throws IOException , SAXException  1072 { 1073 ch = _reader.parseName(_text, ch); 1074 String key = _text.toString(); 1075 ch = skipWhitespace(ch); 1076 1077 _extSystemId = null; 1078 _extPublicId = null; 1079 if (key.equals("PUBLIC")) { 1080 _extPublicId = parseValue(ch); 1081 ch = skipWhitespace(read()); 1082 1083 if (_extPublicId.indexOf('&') > 0) 1084 throw error(L.l("Illegal character '&' in PUBLIC identifier '{0}'", 1085 _extPublicId)); 1086 1087 _extSystemId = parseValue(ch); 1088 ch = skipWhitespace(read()); 1089 } 1090 else if (key.equals("SYSTEM")) { 1091 _extSystemId = parseValue(ch); 1092 ch = read(); 1093 } 1094 else 1095 throw error(L.l("expected PUBLIC or SYSTEM at '{0}'", key)); 1096 1097 return ch; 1098 } 1099 1100 1115 private String parseValue(int ch) 1116 throws IOException , SAXException  1117 { 1118 int end = ch; 1119 1120 char []valueBuffer = _valueBuffer; 1121 int valueLength = 0; 1122 1123 if (end != '\'' && end != '"') { 1124 valueBuffer[valueLength++] = (char) end; 1125 for (ch = read(); 1126 ch >= 0 && XmlChar.isNameChar(ch); 1127 ch = read()) { 1128 valueBuffer[valueLength++] = (char) ch; 1129 } 1130 1131 String value = new String (valueBuffer, 0, valueLength); 1132 1133 throw error(L.l("XML attribute value must be quoted at '{0}'. XML attribute syntax is either attr=\"value\" or attr='value'.", 1134 value)); 1135 } 1136 1137 ch = read(); 1138 1139 while (ch >= 0 && ch != end) { 1140 if (ch == '&') { 1141 if ((ch = read()) == '#') { 1142 valueBuffer[valueLength++] = (char) parseCharacterReference(); 1143 } 1144 else if (XmlChar.isNameStart(ch)) { 1145 ch = _reader.parseName(_buf, ch); 1146 String name = _buf.toString(); 1147 1148 if (ch != ';') 1149 throw error(L.l("expected '{0}' at {1}", ";", badChar(ch))); 1150 else { 1151 int lookup = _entities.getEntity(name); 1152 1153 if (lookup >= 0 && lookup <= 0xffff) { 1154 ch = read(); 1155 valueBuffer[valueLength++] = (char) lookup; 1156 continue; 1157 } 1158 1159 QEntity entity = _dtd == null ? null : _dtd.getEntity(name); 1160 if (entity != null && entity._value != null) 1161 setMacroAttr(entity._value); 1162 else 1163 throw error(L.l("expected local reference at '&{0};'", name)); 1164 } 1165 } 1166 } 1167 else { 1168 if (ch == '\r') { 1169 ch = read(); 1170 if (ch != '\n') { 1171 valueBuffer[valueLength++] = '\n'; 1172 continue; 1173 } 1174 } 1175 1176 valueBuffer[valueLength++] = (char) ch; 1177 } 1178 1179 ch = read(); 1180 } 1181 1182 return new String (valueBuffer, 0, valueLength); 1183 } 1184 1185 private boolean isWhitespace(int ch) 1186 { 1187 return ch <= 0x20 && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd); 1188 } 1189 1190 private boolean isChar(int ch) 1191 { 1192 return (ch >= 0x20 && ch <= 0xd7ff || 1193 ch == 0x9 || 1194 ch == 0xa || 1195 ch == 0xd || 1196 ch >= 0xe000 && ch <= 0xfffd); 1197 } 1198 1199 1202 private static String hex(int value) 1203 { 1204 CharBuffer cb = CharBuffer.allocate(); 1205 1206 for (int b = 3; b >= 0; b--) { 1207 int v = (value >> (4 * b)) & 0xf; 1208 if (v < 10) 1209 cb.append((char) (v + '0')); 1210 else 1211 cb.append((char) (v - 10 + 'a')); 1212 } 1213 1214 return cb.close(); 1215 } 1216 1217 1220 public String getFilename() 1221 { 1222 return _filename; 1223 } 1224 1225 1228 public int getLine() 1229 { 1230 return _line; 1231 } 1232 1233 1236 int getColumn() 1237 { 1238 return -1; 1239 } 1240 1241 1244 int getNodeLine() 1245 { 1246 if (_elementTop > 0) 1247 return _elementLines[_elementTop - 1]; 1248 else 1249 return 1; 1250 } 1251 1252 1255 public String getPublicId() 1256 { 1257 if (_reader != null) 1258 return _reader.getPublicId(); 1259 else 1260 return _publicId; 1261 } 1262 1263 1266 public String getSystemId() 1267 { 1268 if (_reader != null) 1269 return _reader.getSystemId(); 1270 else if (_systemId != null) 1271 return _systemId; 1272 else 1273 return _filename; 1274 } 1275 1276 public void setLine(int line) 1277 { 1278 _line = line; 1279 } 1280 1281 public int getLineNumber() { return getLine(); } 1282 public int getColumnNumber() { return getColumn(); } 1283 1284 1287 private void addText(String s) 1288 throws IOException , SAXException  1289 { 1290 int len = s.length(); 1291 1292 for (int i = 0; i < len; i++) 1293 addText(s.charAt(i)); 1294 } 1295 1296 1299 private void addText(char ch) 1300 throws IOException , SAXException  1301 { 1302 if (_textLength > 0 && _textBuffer[_textLength - 1] == '\r') { 1303 _textBuffer[_textLength - 1] = '\n'; 1304 if (ch == '\n') 1305 return; 1306 } 1307 1308 if (_isIgnorableWhitespace && ! XmlChar.isWhitespace(ch)) 1309 _isIgnorableWhitespace = false; 1310 1311 _textBuffer[_textLength++] = ch; 1312 } 1313 1314 1317 private void addText(char []buffer, int offset, int length, 1318 boolean isWhitespace) 1319 throws IOException , SAXException  1320 { 1321 if (length <= 0) 1322 return; 1323 1324 if (_namespace.getDepth() == 1) { 1325 if (! isWhitespace) { 1326 throw error(L.l("expected top element at '{0}'", 1327 new String (buffer, offset, length))); 1328 } 1329 else { 1330 _contentHandler.ignorableWhitespace(buffer, offset, length); 1331 } 1332 } 1333 else 1334 _contentHandler.characters(buffer, offset, length); 1335 } 1336 1337 1340 private SaxIntern.Entry parseName(int offset, boolean isAttribute) 1341 throws IOException  1342 { 1343 char []inputBuf = _inputBuffer; 1344 int inputLength = _inputLength; 1345 int inputOffset = _inputOffset; 1346 1347 char []valueBuf = _valueBuffer; 1348 int valueLength = offset; 1349 1350 int colon = 0; 1351 1352 while (true) { 1353 if (inputOffset < inputLength) { 1354 char ch = inputBuf[inputOffset++]; 1355 1356 if (XML_NAME_CHAR[ch]) { 1357 valueBuf[valueLength++] = ch; 1358 } 1359 else if (ch == ':') { 1360 if (colon <= 0) 1361 colon = valueLength; 1362 1363 valueBuf[valueLength++] = ch; 1364 } 1365 else { 1366 _inputOffset = inputOffset - 1; 1367 1368 return _intern.add(valueBuf, offset, valueLength - offset, 1369 colon, isAttribute); 1370 } 1371 } 1372 else if (fillBuffer()) { 1373 inputLength = _inputLength; 1374 inputOffset = 0; 1375 } 1376 else { 1377 return _intern.add(valueBuf, offset, valueLength - offset, 1378 colon, isAttribute); 1379 } 1380 } 1381 } 1382 1383 final int skipWhitespace(int ch) 1384 throws IOException , SAXException  1385 { 1386 while (ch <= 0x20 && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd)) { 1387 ch = read(); 1388 } 1389 1390 return ch; 1391 } 1392 1393 1394 public void setReader(XmlReader reader) 1395 { 1396 _reader = reader; 1397 } 1398 1399 1402 void setMacroAttr(String text) 1403 throws IOException , SAXException  1404 { 1405 if (_reader != _macro) { 1406 _macro.init(this, _reader); 1407 _reader = _macro; 1408 } 1409 1410 int j = _macroIndex; 1411 for (int i = 0; i < text.length(); i++) { 1412 int ch = text.charAt(i); 1413 1414 if (ch == '\'') 1415 _macro.add("'"); 1416 else if (ch == '"') 1417 _macro.add("""); 1418 else 1419 _macro.add((char) ch); 1420 } 1421 } 1422 1423 void pushInclude(String systemId) 1424 throws IOException , SAXException  1425 { 1426 pushInclude(null, systemId); 1427 } 1428 1433 void pushInclude(String publicId, String systemId) 1434 throws IOException , SAXException  1435 { 1436 InputStream stream = openStream(systemId, publicId); 1437 if (stream == null) 1438 throw new FileNotFoundException (systemId); 1439 _is = Vfs.openRead(stream); 1440 Path oldSearchPath = _searchPath; 1441 Path path = _is.getPath(); 1442 if (path != null) { 1443 _owner.addDepend(path); 1444 1445 if (_searchPath != null) { 1446 _searchPath = path.getParent(); 1447 _reader.setSearchPath(oldSearchPath); 1448 } 1449 } 1450 1451 _filename = systemId; 1452 1462 1463 XmlReader oldReader = _reader; 1464 _reader = null; 1465 1466 _line = 1; 1467 parseXMLDeclaration(oldReader); 1468 int ch = read(); 1469 1470 XmlReader reader = _reader; 1471 1472 if (reader instanceof MacroReader) 1473 reader = reader.getNext(); 1474 1475 reader.setSystemId(systemId); 1476 reader.setFilename(systemId); 1477 reader.setPublicId(publicId); 1478 reader.setNext(oldReader); 1479 1480 unread(ch); 1481 } 1482 1483 private void popInclude() 1484 throws IOException , SAXException  1485 { 1486 XmlReader oldReader = _reader; 1487 _reader = _reader.getNext(); 1488 oldReader.setNext(null); 1489 _filename = _reader.getFilename(); 1490 _line = _reader.getLine(); 1491 _is = _reader.getReadStream(); 1492 if (_reader.getSearchPath() != null) 1493 _searchPath = _reader.getSearchPath(); 1494 } 1495 1496 void setMacro(String text) 1497 throws IOException , SAXException  1498 { 1499 if (_reader == _macro) { 1500 } 1501 else if (_macro.getNext() == null) { 1502 _macro.init(this, _reader); 1503 _reader = _macro; 1504 } 1505 else { 1506 _macro = new MacroReader(); 1507 _macro.init(this, _reader); 1508 _reader = _macro; 1509 } 1510 1511 _macro.add(text); 1512 } 1513 1514 protected final int read() 1515 throws IOException , SAXException  1516 { 1517 int inputOffset = _inputOffset; 1518 1519 if (inputOffset < _inputLength) { 1520 char ch = _inputBuffer[inputOffset]; 1521 1522 _inputOffset = inputOffset + 1; 1523 1524 return ch; 1525 } 1526 else if (fillBuffer()) { 1527 return _inputBuffer[_inputOffset++]; 1528 } 1529 else 1530 return -1; 1531 } 1532 1533 public final void unread(int ch) 1534 { 1535 if (ch < 0 || _inputOffset <= 0) 1536 return; 1537 1538 _inputOffset--; 1539 } 1540 1541 protected boolean fillBuffer() 1542 throws IOException  1543 { 1544 int len = _is.read(_inputBuffer, 0, _inputBuffer.length); 1545 1546 if (len >= 0) { 1547 _inputLength = len; 1548 _inputOffset = 0; 1549 1550 return true; 1551 } 1552 else { 1553 _inputLength = 0; 1554 _inputOffset = 0; 1555 1556 return false; 1557 } 1558 } 1559 1560 private void parseXMLDeclaration(XmlReader oldReader) 1561 throws IOException , SAXException  1562 { 1563 int startOffset = _is.getOffset(); 1564 boolean isEBCDIC = false; 1565 int ch = _is.read(); 1566 1567 XmlReader reader = null; 1568 1569 if (ch == 0xfe) { 1571 ch = _is.read(); 1572 if (ch == 0xff) { 1573 _owner.setAttribute("encoding", "UTF-16"); 1574 _is.setEncoding("utf-16"); 1575 1576 reader = new Utf16Reader(this, _is); 1577 1578 ch = reader.read(); 1579 } 1580 } 1581 else if (ch == 0xff) { 1583 ch = _is.read(); 1584 if (ch == 0xfe) { 1585 _owner.setAttribute("encoding", "UTF-16"); 1586 _is.setEncoding("utf-16"); 1587 1588 reader = new Utf16Reader(this, _is); 1589 ((Utf16Reader) reader).setReverse(true); 1590 1591 ch = reader.read(); 1592 } 1593 } 1594 else if (ch == 0x00) { 1596 ch = _is.read(); 1597 _owner.setAttribute("encoding", "UTF-16"); 1598 _is.setEncoding("utf-16"); 1599 1600 reader = new Utf16Reader(this, _is); 1601 } 1602 else if (ch == 0xef) { 1604 ch = _is.read(); 1605 if (ch == 0xbb) { 1606 ch = _is.read(); 1607 1608 if (ch == 0xbf) { 1609 ch = _is.read(); 1610 1611 _owner.setAttribute("encoding", "UTF-8"); 1612 _is.setEncoding("utf-8"); 1613 1614 reader = new Utf8Reader(this, _is); 1615 } 1616 } 1617 } 1618 else if (ch == 0x4c) { 1619 _is.unread(); 1622 _is.setEncoding("cp500"); 1624 1625 isEBCDIC = true; 1626 1627 reader = new XmlReader(this, _is); 1628 1629 ch = reader.read(); 1630 } 1631 else { 1632 int ch2 = _is.read(); 1633 1634 if (ch2 == 0x00) { 1635 _owner.setAttribute("encoding", "UTF-16LE"); 1636 _is.setEncoding("utf-16le"); 1637 1638 reader = new Utf16Reader(this, _is); 1639 ((Utf16Reader) reader).setReverse(true); 1640 } 1641 else if (ch2 > 0) 1642 _is.unread(); 1643 } 1644 1645 if (reader != null && reader != oldReader) { 1646 } 1647 else if (_is.getSource() instanceof ReaderWriterStream) { 1648 reader = new XmlReader(this, _is); 1649 } 1650 else { 1651 reader = new Utf8Reader(this, _is); 1652 } 1653 1654 if (ch == '\n') 1655 reader.setLine(2); 1656 1657 reader.setSystemId(_systemId); 1658 if (_systemId == null) 1659 reader.setSystemId(_filename); 1660 reader.setFilename(_filename); 1661 reader.setPublicId(_publicId); 1662 1663 reader.setNext(oldReader); 1664 1665 _reader = reader; 1666 1667 1668 1674 1675 if (ch != '<') { 1676 unreadByte(ch); 1677 return; 1678 } 1679 1680 if (parseXMLDecl(_reader) && isEBCDIC) { 1681 _is.setOffset(startOffset); 1683 1684 ch = read(); 1685 if (ch != '<') 1686 throw new IllegalStateException (); 1687 1688 parseXMLDecl(_reader); 1689 } 1690 } 1691 1692 private boolean parseXMLDecl(XmlReader reader) 1693 throws IOException , SAXException  1694 { 1695 int ch = readByte(); 1696 if (ch != '?') { 1697 unreadByte((char) ch); 1698 unreadByte('<'); 1699 return false; 1700 } 1701 1702 ch = read(); 1703 if (! XmlChar.isNameStart(ch)) 1704 throw error(L.l("expected name after '<?' at {0}. Processing instructions expect a name like <?foo ... ?>", badChar(ch))); 1705 ch = _reader.parseName(_text, ch); 1706 1707 String piName = _text.toString(); 1708 if (! piName.equals("xml")) { 1709 ch = parsePITail(piName, ch); 1710 unreadByte(ch); 1711 return false; 1712 } 1713 1714 ch = parseAttributes(ch, false); 1715 1716 if (ch != '?') 1717 throw error(L.l("expected '?' at {0}. Processing instructions end with '?>' like <?foo ... ?>", badChar(ch))); 1718 if ((ch = read()) != '>') 1719 throw error(L.l("expected '>' at {0}. Processing instructions end with '?>' like <?foo ... ?>", ">", badChar(ch))); 1720 1721 for (int i = 0; i < _attributes.getLength(); i++) { 1722 QName name = _attributes.getName(i); 1723 String value = _attributes.getValue(i); 1724 1725 if (_owner != null) 1726 _owner.setAttribute(name.getLocalPart(), value); 1727 1728 if (name.getLocalPart().equals("encoding")) { String encoding = value; 1730 1731 if (! _isStaticEncoding && 1732 ! encoding.equalsIgnoreCase("UTF-8") && 1733 ! encoding.equalsIgnoreCase("UTF-16") && 1734 ! (_is.getSource() instanceof ReaderWriterStream)) { 1735 _is.setEncoding(encoding); 1736 1737 XmlReader oldReader = _reader; 1738 1739 _reader = new XmlReader(this, _is); 1740 1742 _reader.setLine(oldReader.getLine()); 1743 1744 _reader.setSystemId(_filename); 1745 _reader.setPublicId(null); 1746 } 1747 } 1748 } 1749 1750 return true; 1751 } 1752 1753 protected int readByte() 1754 throws IOException  1755 { 1756 return _is.read(); 1757 } 1758 1759 protected void unreadByte(int ch) 1760 { 1761 _is.unread(); 1762 } 1763 1764 1769 XmlParseException error(String text) 1770 { 1771 if (_errorHandler != null) { 1772 SAXParseException e = new SAXParseException (text, _locator); 1773 1774 try { 1775 _errorHandler.fatalError(e); 1776 } catch (SAXException e1) { 1777 } 1778 } 1779 1780 return new XmlParseException(_filename + ":" + _line + ": " + text); 1781 } 1782 1783 public void free() 1784 { 1785 } 1786 1787 int parseName(CharBuffer cb, int ch) 1788 throws IOException , SAXException  1789 { 1790 return _reader.parseName(cb, ch); 1791 } 1792 1793 1796 static String badChar(int ch) 1797 { 1798 if (ch < 0 || ch == 0xffff) 1799 return L.l("end of file"); 1800 else if (ch == '\n' || ch == '\r') 1801 return L.l("end of line"); 1802 else if (ch >= 0x20 && ch <= 0x7f) 1803 return "'" + (char) ch + "'"; 1804 else 1805 return "'" + (char) ch + "' (\\u" + hex(ch) + ")"; 1806 } 1807 1808 private void printDebugNode(WriteStream s, Node node, int depth) 1809 throws IOException  1810 { 1811 if (node == null) 1812 return; 1813 1814 for (int i = 0; i < depth; i++) 1815 s.print(' '); 1816 1817 if (node.getFirstChild() != null) { 1818 s.println("<" + node.getNodeName() + ">"); 1819 for (Node child = node.getFirstChild(); 1820 child != null; 1821 child = child.getNextSibling()) { 1822 printDebugNode(s, child, depth + 2); 1823 } 1824 for (int i = 0; i < depth; i++) 1825 s.print(' '); 1826 s.println("</" + node.getNodeName() + ">"); 1827 } 1828 else 1829 s.println("<" + node.getNodeName() + "/>"); 1830 } 1831 1832 public void close() 1833 { 1834 TempCharBuffer tempInputBuffer = _tempInputBuffer; 1835 _tempInputBuffer = null; 1836 1837 _inputBuffer = null; 1838 1839 if (tempInputBuffer != null) 1840 TempCharBuffer.free(tempInputBuffer); 1841 } 1842 1843 public static class LocatorImpl implements ExtendedLocator { 1844 XmlParser _parser; 1845 1846 LocatorImpl(XmlParser parser) 1847 { 1848 _parser = parser; 1849 } 1850 1851 public String getSystemId() 1852 { 1853 if (_parser._reader != null && _parser._reader.getSystemId() != null) 1854 return _parser._reader.getSystemId(); 1855 else if (_parser.getSystemId() != null) 1856 return _parser.getSystemId(); 1857 else if (_parser._reader != null && _parser._reader.getFilename() != null) 1858 return _parser._reader.getFilename(); 1859 else if (_parser.getFilename() != null) 1860 return _parser.getFilename(); 1861 else 1862 return null; 1863 } 1864 1865 public String getFilename() 1866 { 1867 if (_parser._reader != null && _parser._reader.getFilename() != null) 1868 return _parser._reader.getFilename(); 1869 else if (_parser.getFilename() != null) 1870 return _parser.getFilename(); 1871 else if (_parser._reader != null && _parser._reader.getSystemId() != null) 1872 return _parser._reader.getSystemId(); 1873 else if (_parser.getSystemId() != null) 1874 return _parser.getSystemId(); 1875 else 1876 return null; 1877 } 1878 1879 public String getPublicId() 1880 { 1881 if (_parser._reader != null) 1882 return _parser._reader.getPublicId(); 1883 else 1884 return _parser.getPublicId(); 1885 } 1886 1887 public int getLineNumber() 1888 { 1889 if (_parser._reader != null) 1890 return _parser._reader.getLine(); 1891 else 1892 return _parser.getLineNumber(); 1893 } 1894 1895 public int getColumnNumber() 1896 { 1897 return _parser.getColumnNumber(); 1898 } 1899 } 1900 1901 static { 1902 XML_NAME_CHAR = new boolean[65536]; 1903 1904 for (int i = 0; i < 65536; i++) { 1905 XML_NAME_CHAR[i] = XmlChar.isNameChar(i) && i != ':'; 1906 } 1907 } 1908} 1909 | Popular Tags |