1 28 29 package com.caucho.xml; 30 31 import com.caucho.server.util.CauchoSystem; 32 import com.caucho.util.L10N; 33 import com.caucho.vfs.Path; 34 import com.caucho.vfs.ReadStream; 35 import com.caucho.vfs.Vfs; 36 import com.caucho.vfs.VfsStream; 37 38 import org.w3c.dom.Document ; 39 import org.xml.sax.*; 40 import org.xml.sax.ext.LexicalHandler ; 41 42 import javax.xml.parsers.DocumentBuilderFactory ; 43 import java.io.FileNotFoundException ; 44 import java.io.IOException ; 45 import java.io.InputStream ; 46 import java.util.Hashtable ; 47 import java.util.Locale ; 48 import java.util.logging.Logger ; 49 50 abstract public class AbstractParser implements XMLReader, Parser 51 { 52 static final Logger log = Logger.getLogger(AbstractParser.class.getName()); 53 static final L10N L = new L10N(AbstractParser.class); 54 55 static Hashtable <String ,String > _attrTypes = new Hashtable <String ,String >(); 56 static Entities _xmlEntities = new XmlEntities(); 57 58 Policy _policy; 59 60 boolean _isCoalescing = true; 61 62 boolean _optionalTags = true; 63 boolean _skipWhitespace; 64 boolean _skipComments; 65 boolean _strictComments; 66 boolean _strictAttributes; 67 boolean _entitiesAsText = false; 68 boolean _expandEntities = true; 69 boolean _strictCharacters; 70 boolean _strictXml; 71 boolean _singleTopElement; 72 boolean _normalizeWhitespace = false; 73 boolean _forgiving; 74 boolean _extraForgiving; 75 boolean _switchToXml = false; 76 boolean _doResinInclude = false; 77 78 boolean _isNamespaceAware = true; 79 boolean _isNamespacePrefixes = true; 80 boolean _isSAXNamespaces = false; 81 82 boolean _isXmlnsPrefix; 83 boolean _isXmlnsAttribute; 84 85 boolean _isValidating = false; 86 87 boolean _isJsp; 88 89 boolean _isStaticEncoding = false; 90 String _defaultEncoding = "UTF-8"; 91 92 ContentHandler _contentHandler; 94 EntityResolver _entityResolver; 95 DTDHandler _dtdHandler; 96 LexicalHandler _lexicalHandler; 97 ErrorHandler _errorHandler; 98 Locale _locale; 99 100 Entities _entities; 101 QDocument _owner; 102 QDocumentType _dtd; 103 104 DOMBuilder _builder; 105 106 Path _searchPath; 107 108 String _publicId; 109 String _systemId; 110 String _filename; 111 int _line = 1; 112 113 116 AbstractParser() 117 { 118 this(new XmlPolicy(), null); 119 120 _policy.strictComments = true; 121 _policy.strictAttributes = true; 122 _policy.strictCharacters = true; 123 _policy.strictXml = true; 124 _policy.singleTopElement = true; 125 _policy.optionalTags = false; 126 } 127 128 134 AbstractParser(Policy policy, QDocumentType dtd) 135 { 136 _policy = policy; 137 138 if (dtd == null) 139 dtd = new QDocumentType(null); 140 _dtd = dtd; 141 142 _entities = _xmlEntities; 143 if (policy instanceof HtmlPolicy) 144 _entities = HtmlEntities.create(4.0); 145 } 146 147 void clear() 148 { 149 _isCoalescing = true; 150 151 _isNamespaceAware = true; 152 _isSAXNamespaces = false; 153 _isNamespacePrefixes = false; 154 _optionalTags = true; 155 _skipWhitespace = false; 156 _skipComments = false; 157 _strictComments = false; 158 _strictAttributes = false; 159 _entitiesAsText = false; 160 _expandEntities = true; 161 _strictCharacters = false; 162 _strictXml = false; 163 _singleTopElement = false; 164 _normalizeWhitespace = false; 165 _forgiving = false; 166 _extraForgiving = false; 167 _switchToXml = false; 168 _doResinInclude = false; 169 170 _isJsp = false; 171 172 _defaultEncoding = "UTF-8"; 173 _isStaticEncoding = false; 174 } 175 176 void init() 177 { 178 183 _isXmlnsPrefix = _isNamespaceAware || _isNamespacePrefixes; 184 _isXmlnsAttribute = _isNamespacePrefixes || ! _isNamespaceAware; 185 } 186 187 190 public void setOwner(QDocument doc) 191 { 192 _owner = doc; 193 } 194 195 198 public void setConfig(DocumentBuilderFactory factory) 199 { 200 if (_builder == null) 201 _builder = new DOMBuilder(); 202 203 _isCoalescing = factory.isCoalescing(); 204 setExpandEntities(factory.isExpandEntityReferences()); 205 setSkipComments(factory.isIgnoringComments()); 206 setSkipWhitespace(factory.isIgnoringElementContentWhitespace()); 207 setNamespaceAware(factory.isNamespaceAware()); 208 setNamespacePrefixes(false); 209 setValidating(factory.isValidating()); 210 } 211 212 public void setEntitiesAsText(boolean entitiesAsText) 213 { 214 _entitiesAsText = entitiesAsText; 215 } 216 217 public boolean getEntitiesAsText() 218 { 219 return _entitiesAsText; 220 } 221 222 public void setExpandEntities(boolean expandEntities) 223 { 224 _expandEntities = expandEntities; 225 _policy.expandEntities = expandEntities; 226 } 227 228 232 public void setSkipComments(boolean skipComments) 233 { 234 _skipComments = skipComments; 235 } 236 237 240 public void setSkipWhitespace(boolean skipWhitespace) 241 { 242 _skipWhitespace = skipWhitespace; 243 } 244 245 248 public boolean isCoalescing() 249 { 250 return _isCoalescing; 251 } 252 253 256 public void setCoalescing(boolean isCoalescing) 257 { 258 _isCoalescing = isCoalescing; 259 } 260 261 264 public boolean isValidating() 265 { 266 return _isValidating; 267 } 268 269 272 public void setValidating(boolean isValidating) 273 { 274 _isValidating = isValidating; 275 } 276 277 280 public boolean isNamespaceAware() 281 { 282 return _isNamespaceAware; 283 } 284 285 288 public void setNamespaceAware(boolean isNamespaceAware) 289 { 290 _isNamespaceAware = isNamespaceAware; 291 } 292 293 296 public boolean isSAXNamespaces() 297 { 298 return _isSAXNamespaces; 299 } 300 301 304 public void setSAXNamespaces(boolean isNamespaces) 305 { 306 _isSAXNamespaces = isNamespaces; 307 } 308 309 312 public boolean isNamespacePrefixes() 313 { 314 return _isNamespacePrefixes; 315 } 316 317 320 public void setNamespacePrefixes(boolean isNamespaces) 321 { 322 _isNamespacePrefixes = isNamespaces; 323 } 324 325 328 public void setToLower(boolean toLower) 329 { 330 if (_policy instanceof HtmlPolicy) 331 ((HtmlPolicy) _policy).setToLower(toLower); 332 } 333 334 public boolean getSkipComments() 335 { 336 return _skipComments; 337 } 338 339 345 public void setForgiving(boolean forgiving) 346 { 347 _forgiving = forgiving; 348 } 349 350 355 public boolean getForgiving() 356 { 357 return _forgiving; 358 } 359 360 364 public void setAutodetectXml(boolean autodetectXml) 365 { 366 _switchToXml = autodetectXml; 367 } 368 369 374 public void setJsp(boolean isJsp) 375 { 376 _isJsp = isJsp; 377 378 if (_policy instanceof HtmlPolicy) 379 ((HtmlPolicy) _policy).setJsp(isJsp); 380 } 381 382 387 public boolean getJsp() 388 { 389 return _isJsp; 390 } 391 392 398 public void setSearchPath(Path path) 399 { 400 _searchPath = path; 401 } 402 403 409 public Path getSearchPath() 410 { 411 return _searchPath; 412 } 413 414 419 public void setDefaultEncoding(String encoding) 420 { 421 _defaultEncoding = encoding; 422 } 423 424 427 public String getDefaultEncoding() 428 { 429 return _defaultEncoding; 430 } 431 432 437 public void setResinInclude(boolean doResinInclude) 438 { 439 _doResinInclude = doResinInclude; 440 } 441 442 447 public boolean getResinInclude() 448 { 449 return _doResinInclude; 450 } 451 452 public Object getProperty(String name) 453 throws SAXNotRecognizedException 454 { 455 if (name.equals("http://xml.org/sax/properties/lexical-handler")) 456 return _lexicalHandler; 457 else if (name.equals("http://xml.org/sax/properties/dom-node")) 458 return null; 459 else if (name.equals("http://xml.org/sax/properties/xml-string")) 460 return null; 461 else 462 throw new SAXNotRecognizedException(name); 463 } 464 465 public void setProperty(String name, Object obj) 466 throws SAXNotSupportedException 467 { 468 if (name.equals("http://xml.org/sax/properties/lexical-handler")) 469 _lexicalHandler = (LexicalHandler ) obj; 470 else if (name.equals("http://xml.org/sax/handlers/LexicalHandler")) 471 _lexicalHandler = (LexicalHandler ) obj; 472 else 473 throw new SAXNotSupportedException(name); 474 } 475 476 public boolean getFeature(String name) 477 throws SAXNotRecognizedException 478 { 479 if (name.equals("http://xml.org/sax/features/namespaces")) 480 return _isSAXNamespaces; 481 else if (name.equals("http://xml.org/sax/features/namespace-prefixes")) 482 return _isNamespacePrefixes; 483 else if (name.equals("http://xml.org/sax/features/string-interning")) 484 return true; 485 else if (name.equals("http://xml.org/sax/features/validation")) 486 return _isValidating; 487 else if (name.equals("http://xml.org/sax/features/external-general-entities")) 488 return true; 489 else if (name.equals("http://xml.org/sax/features/external-parameter-entities")) 490 return false; 491 else if (name.equals("http://caucho.com/xml/features/skip-comments")) 492 return _skipComments; 493 else if (name.equals("http://caucho.com/xml/features/resin-include")) 494 return _doResinInclude; 495 else 496 throw new SAXNotRecognizedException(name); 497 } 498 499 public void setFeature(String name, boolean value) 500 throws SAXNotSupportedException 501 { 502 if (name.equals("http://xml.org/sax/features/namespaces")) { 503 _isNamespaceAware = value; 504 } 505 else if (name.equals("http://xml.org/sax/features/namespace-prefixes")) { 506 _isNamespacePrefixes = value; 509 _isNamespaceAware = true; 510 } 511 else if (name.equals("http://caucho.com/xml/features/skip-comments")) { 512 _skipComments = value; 513 } 514 else if (name.equals("http://caucho.com/xml/features/resin-include")) 515 _doResinInclude = value; 516 else if (name.equals("http://xml.org/sax/features/validation")) 517 _isValidating = value; 518 else 519 throw new SAXNotSupportedException(name); 520 } 521 522 public void setLexicalHandler(LexicalHandler handler) 523 { 524 _lexicalHandler = handler; 525 } 526 527 532 public void setEntityResolver(EntityResolver resolver) 533 { 534 _entityResolver = resolver; 535 } 536 537 542 public EntityResolver getEntityResolver() 543 { 544 return _entityResolver; 545 } 546 547 public void setDTDHandler(DTDHandler handler) 548 { 549 _dtdHandler = handler; 550 } 551 552 public DTDHandler getDTDHandler() 553 { 554 return _dtdHandler; 555 } 556 557 public void setContentHandler(ContentHandler handler) 558 { 559 _contentHandler = handler; 560 } 561 562 public ContentHandler getContentHandler() 563 { 564 return _contentHandler; 565 } 566 567 572 public void setDocumentHandler(DocumentHandler handler) 573 { 574 if (handler == null) 575 _contentHandler = null; 576 else 577 _contentHandler = new ContentHandlerAdapter(handler); 578 } 579 580 public void setErrorHandler(ErrorHandler handler) 581 { 582 _errorHandler = handler; 583 } 584 585 public ErrorHandler getErrorHandler() 586 { 587 return _errorHandler; 588 } 589 590 public void setLocale(Locale locale) 591 { 592 _locale = locale; 593 } 594 595 600 public void parse(InputSource source) 601 throws IOException , SAXException 602 { 603 init(); 604 605 if (_searchPath == null) { 606 if (source.getSystemId() != null) 607 _searchPath = Vfs.lookup(source.getSystemId()).getParent(); 608 } 609 610 _systemId = source.getSystemId(); 611 _publicId = source.getPublicId(); 612 ReadStream stream; 613 String encoding = null; 614 615 if (source.getByteStream() != null) { 616 stream = Vfs.openRead(source.getByteStream()); 617 encoding = source.getEncoding(); 618 } 619 else if (source.getCharacterStream() != null) { 620 encoding = "UTF-8"; 621 _isStaticEncoding = true; 622 stream = Vfs.openRead(source.getCharacterStream()); 623 } 624 else if (source.getSystemId() != null) { 625 InputStream is = openStream(source.getSystemId(), 626 source.getPublicId(), 627 null, 628 true); 629 stream = Vfs.openRead(is); 630 encoding = source.getEncoding(); 631 } 632 else 633 throw new FileNotFoundException (L.l("invalid InputSource")); 634 635 if (encoding != null) 636 stream.setEncoding(encoding); 637 638 try { 639 parseInt(stream); 640 } finally { 641 stream.close(); 642 } 643 } 644 645 650 public void parse(InputStream is) 651 throws IOException , SAXException 652 { 653 init(); 654 655 _systemId = "stream"; 656 657 if (is instanceof ReadStream) { 658 Path path = ((ReadStream) is).getPath(); 659 _systemId = path.getURL(); 660 _filename = path.getUserPath(); 661 662 if (_searchPath != null) { 663 } 664 else if (path != null) 665 _searchPath = path.getParent(); 666 667 parseInt((ReadStream) is); 668 } 669 else { 670 ReadStream rs = VfsStream.openRead(is); 671 try { 672 parseInt(rs); 673 } finally { 674 if (rs != is) 675 rs.close(); 676 } 677 } 678 } 679 680 685 public void parse(InputStream is, String systemId) 686 throws IOException , SAXException 687 { 688 init(); 689 690 parseImpl(is, systemId); 691 } 692 693 698 public void parseImpl(InputStream is, String systemId) 699 throws IOException , SAXException 700 { 701 if (is instanceof ReadStream) { 702 Path path = ((ReadStream) is).getPath(); 703 704 if (_searchPath != null) { 705 } 706 else if (path != null) { 707 _searchPath = path.getParent(); 708 if (systemId != null) 709 _searchPath = _searchPath.lookup(systemId).getParent(); 710 } 711 else if (systemId != null) 712 _searchPath = Vfs.lookup(systemId).getParent(); 713 714 if (systemId == null) { 715 systemId = path.getURL(); 716 _filename = ((ReadStream) is).getUserPath(); 717 } 718 else 719 _filename = systemId; 720 721 _systemId = systemId; 722 723 parseInt((ReadStream) is); 724 } 725 else { 726 if (systemId == null) { 727 _systemId = "anonymous.xml"; 728 } 729 else { 730 _searchPath = Vfs.lookup(systemId).getParent(); 731 _systemId = systemId; 732 } 733 734 ReadStream rs = VfsStream.openRead(is); 735 try { 736 parseInt(rs); 737 } finally { 738 if (rs != is) 739 rs.close(); 740 } 741 } 742 } 743 744 749 public void parse(String systemId) 750 throws IOException , SAXException 751 { 752 InputStream is = openTopStream(systemId, null); 753 try { 754 parse(is); 755 } finally { 756 is.close(); 757 } 758 } 759 760 763 public void parse(Path path) 764 throws IOException , SAXException 765 { 766 init(); 767 768 if (_searchPath == null) 769 _searchPath = path.getParent(); 770 771 ReadStream is = path.openRead(); 772 try { 773 parseInt(is); 774 } finally { 775 is.close(); 776 } 777 } 778 779 784 public void parseString(String string) 785 throws IOException , SAXException 786 { 787 init(); 788 789 ReadStream is = Vfs.openString(string); 790 791 try { 792 parseInt(is); 793 } finally { 794 is.close(); 795 } 796 } 797 798 803 public Document parseDocument(InputSource source) 804 throws IOException , SAXException 805 { 806 init(); 807 808 QDocument doc = new QDocument(); 809 810 if (_builder == null) 811 _builder = new DOMBuilder(); 812 813 _builder.init(doc); 814 setOwner(doc); 815 816 doc.setSystemId(source.getSystemId()); 817 _builder.setSystemId(source.getSystemId()); 818 _builder.setStrictXML(_strictXml); 819 _builder.setCoalescing(_isCoalescing); 820 _builder.setSkipWhitespace(_skipWhitespace); 821 _contentHandler = _builder; 822 823 parse(source); 824 825 return doc; 826 } 827 828 833 public Document parseDocument(String systemId) 834 throws IOException , SAXException 835 { 836 InputStream is = openTopStream(systemId, null); 837 try { 838 return parseDocument(is); 839 } finally { 840 is.close(); 841 } 842 } 843 844 849 public Document parseDocument(Path path) 850 throws IOException , SAXException 851 { 852 if (_searchPath == null) 853 _searchPath = path.getParent(); 854 855 ReadStream is = path.openRead(); 856 try { 857 return parseDocument(is); 858 } finally { 859 is.close(); 860 } 861 } 862 863 870 public Document parseDocument(InputStream is) 871 throws IOException , SAXException 872 { 873 return parseDocument(is, null); 874 } 875 876 884 public Document parseDocument(InputStream is, String systemId) 885 throws IOException , SAXException 886 { 887 init(); 888 889 QDocument doc = new QDocument(); 890 parseDocument(doc, is, systemId); 891 892 return doc; 893 } 894 895 public void parseDocument(QDocument doc, InputStream is, String systemId) 896 throws IOException , SAXException 897 { 898 _owner = doc; 899 900 if (_builder == null) 901 _builder = new DOMBuilder(); 902 903 _builder.init(_owner); 904 _builder.setSystemId(systemId); 905 _builder.setCoalescing(_isCoalescing); 906 _builder.setSkipWhitespace(_skipWhitespace); 907 _contentHandler = _builder; 908 909 parseImpl(is, systemId); 910 } 911 912 917 public Document parseDocumentString(String string) 918 throws IOException , SAXException 919 { 920 ReadStream is = Vfs.openString(string); 921 922 try { 923 _isStaticEncoding = true; 924 return parseDocument(is); 925 } finally { 926 is.close(); 927 } 928 } 929 930 933 public InputStream openStream(String systemId, String publicId) 934 throws IOException , SAXException 935 { 936 return openStream(systemId, publicId, _entityResolver, false); 937 } 938 939 942 public InputStream openTopStream(String systemId, String publicId) 943 throws IOException , SAXException 944 { 945 return openStream(systemId, publicId, _entityResolver, true); 946 } 947 948 951 public InputStream openStream(String systemId, String publicId, 952 EntityResolver entityResolver) 953 throws IOException , SAXException 954 { 955 return openStream(systemId, publicId, entityResolver, false); 956 } 957 958 961 protected InputStream openStream(String systemId, String publicId, 962 EntityResolver entityResolver, 963 boolean isTop) 964 throws IOException , SAXException 965 { 966 int colon = systemId.indexOf(':'); 967 int slash = systemId.indexOf('/'); 968 969 boolean isAbsolute = colon > 0 && (colon < slash || slash < 0); 970 971 if (slash == 0 || ! isAbsolute) { 972 Path pwd; 973 974 if (_searchPath != null) 975 pwd = _searchPath; 976 else 977 pwd = Vfs.lookup(systemId).getParent(); 978 979 String newId = pwd.lookup(systemId).getURL(); 980 if (! newId.startsWith("error:")) 981 systemId = newId; 982 else { 983 int tail = _systemId.lastIndexOf('/'); 984 if (tail >= 0) 985 systemId = _systemId.substring(0, tail + 1) + systemId; 986 } 987 } 988 989 if (entityResolver != null) { 991 InputSource source = entityResolver.resolveEntity(publicId, systemId); 992 993 if (source != null) { 994 _filename = systemId; 995 _systemId = systemId; 996 997 return openSource(source); 998 } 999 } 1000 1001 int ch; 1002 if (CauchoSystem.isWindows() && systemId.startsWith("file:") && 1003 systemId.length() > 7 && systemId.charAt(6) == ':' && 1004 (((ch = systemId.charAt(5)) >= 'a' && ch <= 'z') || 1005 ch >= 'A' && ch <= 'Z')) { 1006 colon = 1; 1007 isAbsolute = false; 1008 systemId = "/" + systemId.substring(5); 1009 } 1010 1011 if (! isTop && 1012 isAbsolute && ! systemId.startsWith("file:") && 1013 ! systemId.startsWith("jar:") && 1014 ! (colon == 1 && CauchoSystem.isWindows())) { 1015 throw new RemoteURLException(L.l("URL `{0}' was not opened because it is a remote URL. Any URL scheme other than file: must be handled by a custom entity resolver.", 1016 systemId)); 1017 } 1018 else if (_searchPath != null) { 1019 return _searchPath.lookup(systemId).openRead(); 1020 } 1021 else 1022 return Vfs.lookup(systemId).openRead(); 1023 } 1024 1025 1028 protected InputStream openSource(InputSource source) 1029 throws IOException , SAXException 1030 { 1031 if (source.getByteStream() != null) { 1032 return source.getByteStream(); 1033 } 1034 else if (source.getCharacterStream() != null) { 1035 return Vfs.openRead(source.getCharacterStream()); 1036 } 1037 else if (source.getSystemId() != null) { 1038 return Vfs.openRead(source.getSystemId()); 1039 } 1040 else 1041 throw new FileNotFoundException (L.l("invalid InputSource {0}", source)); 1042 } 1043 1044 1051 abstract Document parseInt(ReadStream is) 1052 throws IOException , SAXException; 1053 1054 static { 1055 _attrTypes.put("CDATA", "CDATA"); 1056 _attrTypes.put("ID", "ID"); 1057 _attrTypes.put("IDREF", "IDREF"); 1058 _attrTypes.put("IDREFS", "IDREFS"); 1059 _attrTypes.put("ENTITY", "ENTITY"); 1060 _attrTypes.put("ENTITIES", "ENTITIES"); 1061 _attrTypes.put("NMTOKEN", "NMTOKEN"); 1062 _attrTypes.put("NMTOKENS", "NMTOKENS"); 1063 } 1064} 1065 | Popular Tags |