1 21 22 package nu.xom; 23 24 import java.io.CharConversionException ; 25 import java.io.File ; 26 import java.io.FileInputStream ; 27 import java.io.IOException ; 28 import java.io.InputStream ; 29 import java.io.Reader ; 30 import java.io.StringReader ; 31 import java.io.UTFDataFormatException ; 32 import java.net.MalformedURLException ; 33 import java.net.URL ; 34 35 import org.xml.sax.ErrorHandler ; 36 import org.xml.sax.InputSource ; 37 import org.xml.sax.SAXException ; 38 import org.xml.sax.SAXNotRecognizedException ; 39 import org.xml.sax.SAXNotSupportedException ; 40 import org.xml.sax.SAXParseException ; 41 import org.xml.sax.XMLFilter ; 42 import org.xml.sax.XMLReader ; 43 import org.xml.sax.helpers.XMLReaderFactory ; 44 45 import org.apache.xerces.impl.Version; 46 47 59 public class Builder { 60 61 62 private XMLReader parser; 63 private NodeFactory factory; 64 65 private static double xercesVersion = 2.6; 66 67 static { 68 69 try { 70 String versionString = Version.getVersion(); 71 versionString = versionString.substring(9, 12); 72 xercesVersion = Double.valueOf(versionString).doubleValue(); 73 } 74 catch (Exception ex) { 75 } 78 catch (Error err) { 79 } 81 82 } 83 84 85 117 public Builder() { 118 this(false); 119 } 120 121 122 137 public Builder(boolean validate) { 138 this(findParser(validate), validate, null); 139 } 140 141 142 159 public Builder(boolean validate, NodeFactory factory) { 160 this(findParser(validate), validate, factory); 161 } 162 163 164 private static String [] parsers = { 166 "nu.xom.XML1_0Parser", 167 "nu.xom.JDK15XML1_0Parser", 168 "org.apache.xerces.parsers.SAXParser", 169 "com.sun.org.apache.xerces.internal.parsers.SAXParser", 170 "gnu.xml.aelfred2.XmlReader", 171 "org.apache.crimson.parser.XMLReaderImpl", 172 "com.bluecast.xml.Piccolo", 173 "oracle.xml.parser.v2.SAXParser", 174 "com.jclark.xml.sax.SAX2Driver", 175 "net.sf.saxon.aelfred.SAXDriver", 176 "com.icl.saxon.aelfred.SAXDriver", 177 "org.dom4j.io.aelfred2.SAXDriver", 178 "org.dom4j.io.aelfred.SAXDriver" 179 }; 180 181 182 private static XMLReader findParser(boolean validate) { 183 184 XMLReader parser; 188 try { 189 parser = new XML1_0Parser(); 190 setupParser(parser, validate); 191 return parser; 192 } 193 catch (SAXException ex) { 194 } 196 catch (NoClassDefFoundError err) { 197 } 199 200 try { 201 parser = (XMLReader ) Class.forName( 202 "nu.xom.JDK15XML1_0Parser").newInstance(); 203 setupParser(parser, validate); 204 return parser; 205 } 206 catch (SAXException ex) { 207 } 209 catch (InstantiationException ex) { 210 } 212 catch (ClassNotFoundException ex) { 213 } 215 catch (IllegalAccessException ex) { 216 } 218 catch (NoClassDefFoundError err) { 219 } 221 222 for (int i = 2; i < parsers.length; i++) { 226 try { 227 parser = XMLReaderFactory.createXMLReader(parsers[i]); 228 setupParser(parser, validate); 229 return parser; 230 } 231 catch (SAXException ex) { 232 } 234 catch (NoClassDefFoundError err) { 235 } 237 } 238 239 try { parser = XMLReaderFactory.createXMLReader(); 241 setupParser(parser, validate); 242 return parser; 243 } 244 catch (SAXException ex) { 245 throw new XMLException( 246 "Could not find a suitable SAX2 parser", ex); 247 } 248 249 } 250 251 252 private static void setupParser(XMLReader parser, boolean validate) 253 throws SAXNotRecognizedException , SAXNotSupportedException { 254 255 XMLReader baseParser = parser; 256 while (baseParser instanceof XMLFilter ) { 257 baseParser = ((XMLFilter ) baseParser).getParent(); 258 } 259 String parserName = baseParser.getClass().getName(); 260 if (!validate) { 261 parser.setFeature( 262 "http://xml.org/sax/features/namespace-prefixes", true); 263 if (parserName.equals( "org.apache.crimson.parser.XMLReaderImpl")) { 265 parser.setErrorHandler( 266 new NamespaceWellformednessRequired() 267 ); 268 } 269 else { 270 parser.setFeature( 271 "http://xml.org/sax/features/external-general-entities", 272 true 273 ); 274 parser.setFeature( 275 "http://xml.org/sax/features/external-parameter-entities", 276 true 277 ); 278 } 279 } 280 else { 281 parser.setFeature( 282 "http://xml.org/sax/features/namespace-prefixes", true); 283 parser.setFeature( 284 "http://xml.org/sax/features/validation", true); 285 parser.setErrorHandler(new ValidityRequired()); 286 } 287 288 try { 289 parser.setFeature( 290 "http://xml.org/sax/features/string-interning", true); 291 } 292 catch (SAXException ex) { 293 } 296 297 if (parserName.equals("nu.xom.XML1_0Parser") 299 || parserName.equals("nu.xom.JDK15XML1_0Parser") 300 || parserName.equals("org.apache.xerces.parsers.SAXParser") 301 || parserName.equals("com.sun.org.apache.xerces.internal.parsers.SAXParser")) { 302 try { 303 parser.setFeature( 304 "http://apache.org/xml/features/allow-java-encodings", true); 305 } 306 catch (SAXException ex) { 307 } 310 try { 314 parser.setFeature( 315 "http://apache.org/xml/features/standard-uri-conformant", 316 true); 317 } 318 catch (SAXException ex) { 319 } 323 } 324 325 } 326 327 328 343 public Builder(XMLReader parser) { 344 this(parser, false); 345 } 346 347 348 361 public Builder(NodeFactory factory) { 362 this(findParser(false), false, factory); 363 } 364 365 366 386 public Builder(XMLReader parser, boolean validate) { 387 this(parser, validate, null); 388 } 389 390 391 416 public Builder( 417 XMLReader parser, boolean validate, NodeFactory factory) { 418 419 try { 420 setupParser(parser, validate); 421 } 422 catch (SAXException ex) { 423 if (validate) { 424 throw new XMLException(parser.getClass().getName() 425 + " does not support validation.", ex); 426 } 427 else { 428 throw new XMLException(parser.getClass().getName() 429 + " does not support the entity resolution" 430 + " features XOM requires.", ex); 431 } 432 } 433 434 this.parser = parser; 436 this.factory = factory; 437 setHandlers(); 438 439 } 440 441 442 private static boolean knownGoodParser(XMLReader parser) { 443 444 String parserName = parser.getClass().getName(); 445 446 if (parserName.equals("org.apache.xml.resolver.tools.ResolvingXMLReader") 450 || parserName.equals("org.apache.xml.resolver.tools.ResolvingXMLFilter")) { 451 XMLFilter filter = (XMLFilter ) parser; 452 parserName = filter.getParent().getClass().getName(); 453 } 454 455 if (parserName.equals("gnu.xml.aelfred2.XmlReader")) return false; 458 if (parserName.equals("net.sf.saxon.aelfred.SAXDriver")) return false; 459 if (parserName.equals("com.icl.saxon.aelfred.SAXDriver")) return false; 460 461 if (parserName.equals("org.apache.xerces.parsers.SAXParser") 462 && xercesVersion >= 2.4) { 463 return false; 464 } 465 466 for (int i = 0; i < parsers.length; i++) { 467 if (parserName.equals(parsers[i])) return true; 468 } 469 return false; 470 471 } 472 473 474 private void setHandlers() { 475 476 XOMHandler handler; 477 if ((factory == null 478 || factory.getClass().getName().equals("nu.xom.NodeFactory")) 479 && knownGoodParser(parser)) { 480 NodeFactory tempFactory = factory; 483 if (tempFactory == null) tempFactory = new NodeFactory(); 484 handler = new NonVerifyingHandler(tempFactory); 485 } 486 else { 487 if (factory == null) factory = new NodeFactory(); 488 handler = new XOMHandler(factory); 489 } 490 parser.setContentHandler(handler); 491 parser.setDTDHandler(handler); 492 493 try { 494 parser.setProperty( 495 "http://xml.org/sax/properties/lexical-handler", 496 handler); 497 } 498 catch (SAXException ex) { 499 } 504 505 try { 506 parser.setProperty( 507 "http://xml.org/sax/properties/declaration-handler", 508 handler); 509 } 510 catch (SAXException ex) { 511 } 515 516 } 517 518 519 548 public Document build(String systemID) 549 throws ParsingException, ValidityException, IOException { 550 551 systemID = canonicalizeURL(systemID); 552 InputSource source = new InputSource (systemID); 553 return build(source); 554 555 } 556 557 558 573 public Document build(InputStream in) 574 throws ParsingException, ValidityException, IOException { 575 576 InputSource source = new InputSource (in); 577 return build(source); 578 579 } 580 581 582 599 public Document build(InputStream in, String baseURI) 600 throws ParsingException, ValidityException, IOException { 601 602 baseURI = canonicalizeURL(baseURI); 603 InputSource source = new InputSource (in); 604 source.setSystemId(baseURI); 605 return build(source); 606 607 } 608 609 610 private static String fileURLPrefix = "file://"; 613 private static boolean isWindows = false; 614 615 static { 616 String os = System.getProperty("os.name", "Unix"); 617 if (os.indexOf("Windows") >= 0) { 621 fileURLPrefix = "file:/"; 622 isWindows = true; 623 } 624 } 625 626 627 644 public Document build(File in) 645 throws ParsingException, ValidityException, IOException { 646 647 InputStream fin = new FileInputStream (in); 648 String absolute = in.getAbsolutePath(); 651 StringBuffer url = new StringBuffer (fileURLPrefix); 652 int length = absolute.length(); 653 char separatorChar = File.separatorChar; 654 for (int i = 0; i < length; i++) { 655 char c = absolute.charAt(i); 656 if (c == separatorChar) url.append('/'); 657 else { 658 switch(c) { 659 case ' ': 660 url.append("%20"); 661 break; 662 case '!': 663 url.append(c); 664 break; 665 case '"': 666 url.append("%22"); 667 break; 668 case '#': 669 url.append("%23"); 670 break; 671 case '$': 672 url.append(c); 673 break; 674 case '%': 675 url.append("%25"); 676 break; 677 case '&': 678 url.append('&'); 681 break; 682 case '\'': 683 url.append(c); 684 break; 685 case '(': 686 url.append(c); 687 break; 688 case ')': 689 url.append(c); 690 break; 691 case '*': 692 url.append(c); 693 break; 694 case '+': 695 url.append("%2B"); 696 break; 697 case ',': 698 url.append(c); 699 break; 700 case '-': 701 url.append(c); 702 break; 703 case '.': 704 url.append(c); 705 break; 706 case '/': 707 url.append("%2F"); 708 break; 709 case '0': 710 url.append(c); 711 break; 712 case '1': 713 url.append(c); 714 break; 715 case '2': 716 url.append(c); 717 break; 718 case '3': 719 url.append(c); 720 break; 721 case '4': 722 url.append(c); 723 break; 724 case '5': 725 url.append(c); 726 break; 727 case '6': 728 url.append(c); 729 break; 730 case '7': 731 url.append(c); 732 break; 733 case '8': 734 url.append(c); 735 break; 736 case '9': 737 url.append(c); 738 break; 739 case ':': 740 url.append(c); 741 break; 742 case ';': 743 url.append(c); 744 break; 745 case '<': 746 url.append("%3C"); 747 break; 748 case '=': 749 url.append(c); 750 break; 751 case '>': 752 url.append("%3E"); 753 break; 754 case '?': 755 url.append("%3F"); 756 break; 757 case '@': 758 url.append("%40"); 759 break; 760 case 'A': 761 url.append(c); 762 break; 763 case 'B': 764 url.append(c); 765 break; 766 case 'C': 767 url.append(c); 768 break; 769 case 'D': 770 url.append(c); 771 break; 772 case 'E': 773 url.append(c); 774 break; 775 case 'F': 776 url.append(c); 777 break; 778 case 'G': 779 url.append(c); 780 break; 781 case 'H': 782 url.append(c); 783 break; 784 case 'I': 785 url.append(c); 786 break; 787 case 'J': 788 url.append(c); 789 break; 790 case 'K': 791 url.append(c); 792 break; 793 case 'L': 794 url.append(c); 795 break; 796 case 'M': 797 url.append(c); 798 break; 799 case 'N': 800 url.append(c); 801 break; 802 case 'O': 803 url.append(c); 804 break; 805 case 'P': 806 url.append(c); 807 break; 808 case 'Q': 809 url.append(c); 810 break; 811 case 'R': 812 url.append(c); 813 break; 814 case 'S': 815 url.append(c); 816 break; 817 case 'T': 818 url.append(c); 819 break; 820 case 'U': 821 url.append(c); 822 break; 823 case 'V': 824 url.append(c); 825 break; 826 case 'W': 827 url.append(c); 828 break; 829 case 'X': 830 url.append(c); 831 break; 832 case 'Y': 833 url.append(c); 834 break; 835 case 'Z': 836 url.append(c); 837 break; 838 case '[': 839 url.append("%5B"); 840 break; 841 case '\\': 842 url.append("%5C"); 843 break; 844 case ']': 845 url.append("%5D"); 846 break; 847 case '^': 848 url.append("%5E"); 849 break; 850 case '_': 851 url.append(c); 852 break; 853 case '`': 854 url.append("%60"); 855 break; 856 case 'a': 857 url.append(c); 858 break; 859 case 'b': 860 url.append(c); 861 break; 862 case 'c': 863 url.append(c); 864 break; 865 case 'd': 866 url.append(c); 867 break; 868 case 'e': 869 url.append(c); 870 break; 871 case 'f': 872 url.append(c); 873 break; 874 case 'g': 875 url.append(c); 876 break; 877 case 'h': 878 url.append(c); 879 break; 880 case 'i': 881 url.append(c); 882 break; 883 case 'j': 884 url.append(c); 885 break; 886 case 'k': 887 url.append(c); 888 break; 889 case 'l': 890 url.append(c); 891 break; 892 case 'm': 893 url.append(c); 894 break; 895 case 'n': 896 url.append(c); 897 break; 898 case 'o': 899 url.append(c); 900 break; 901 case 'p': 902 url.append(c); 903 break; 904 case 'q': 905 url.append(c); 906 break; 907 case 'r': 908 url.append(c); 909 break; 910 case 's': 911 url.append(c); 912 break; 913 case 't': 914 url.append(c); 915 break; 916 case 'u': 917 url.append(c); 918 break; 919 case 'v': 920 url.append(c); 921 break; 922 case 'w': 923 url.append(c); 924 break; 925 case 'x': 926 url.append(c); 927 break; 928 case 'y': 929 url.append(c); 930 break; 931 case 'z': 932 url.append(c); 933 break; 934 case '{': 935 url.append("%7B"); 936 break; 937 case '|': 938 url.append("%7C"); 939 break; 940 case '}': 941 url.append("%7D"); 942 break; 943 case '~': 944 url.append(c); 945 break; 946 default: 947 if (c < 0xD800 || c > 0xDFFF) { 948 url.append(URIUtil.percentEscape(c)); 949 } 950 else if (c <= 0xDBFF) { 951 i++; 954 try { 955 char low = absolute.charAt(i); 956 String character = String.valueOf(c)+String.valueOf(low); 957 byte[] data = character.getBytes("UTF8"); 958 for (int j=0; j < 4; j++) { 960 url.append('%'); 961 String hex = Integer.toHexString(data[j]).toUpperCase(); 962 url.append(hex.substring(hex.length()-2)); 963 } 964 } 965 catch (IndexOutOfBoundsException ex) { 966 url = new StringBuffer (); 968 break; 969 } 970 } 971 else { 972 url = new StringBuffer (); 975 break; 976 } 977 } 978 } 979 } 980 981 String base = url.toString(); 982 try { 983 Document doc = build(fin, base); 984 return doc; 985 } 986 finally { 987 fin.close(); 988 } 989 990 } 991 992 993 1008 public Document build(Reader in) 1009 throws ParsingException, ValidityException, IOException { 1010 1011 InputSource source = new InputSource (in); 1012 return build(source); 1013 1014 } 1015 1016 1017 1036 public Document build(Reader in, String baseURI) 1037 throws ParsingException, ValidityException, IOException { 1038 1039 baseURI = canonicalizeURL(baseURI); 1040 InputSource source = new InputSource (in); 1041 source.setSystemId(baseURI); 1042 return build(source); 1043 1044 } 1045 1046 1047 1065 public Document build(String document, String baseURI) 1066 throws ParsingException, ValidityException, IOException { 1067 1068 Reader reader = new StringReader (document); 1069 return build(reader, baseURI); 1070 1071 } 1072 1073 private String canonicalizeURL(String uri) { 1077 1078 try { 1079 URL u = new URL (uri); 1080 String path = u.getFile(); 1081 if (path == null || path.length() == 0 1082 || ("/".equals(path) && !(uri.endsWith("/")))) { 1083 uri += '/'; 1084 } 1085 return uri; 1086 } 1087 catch (MalformedURLException ex) { 1088 return uri; 1089 } 1090 } 1091 1092 1093 1110 private Document build(InputSource in) 1111 throws ParsingException, ValidityException, IOException { 1112 1113 try { 1114 parser.parse(in); 1115 } 1116 catch (SAXParseException ex) { 1117 ParsingException pex = new ParsingException( 1118 ex.getMessage(), 1119 ex.getSystemId(), 1120 ex.getLineNumber(), 1121 ex.getColumnNumber(), 1122 ex.getException()); 1123 throw pex; 1124 } 1125 catch (SAXException ex) { 1126 ParsingException pex 1127 = new ParsingException(ex.getMessage(), in.getSystemId(), ex); 1128 throw pex; 1129 } 1130 catch (XMLException ex) { 1131 throw new ParsingException(ex.getMessage(), ex); 1132 } 1133 catch (RuntimeException ex) { 1134 ParsingException pex 1136 = new ParsingException(ex.getMessage(), in.getSystemId(), ex); 1137 throw pex; 1138 } 1139 catch (UTFDataFormatException ex) { 1140 ParsingException pex 1143 = new ParsingException(ex.getMessage(), in.getSystemId(), ex); 1144 throw pex; 1145 } 1146 catch (CharConversionException ex) { 1147 ParsingException pex 1150 = new ParsingException(ex.getMessage(), in.getSystemId(), ex); 1151 throw pex; 1152 } 1153 catch (IOException ex) { 1154 if (ex.getClass().getName().equals( 1158 "org.apache.xerces.util.URI$MalformedURIException")) { 1159 throw new ParsingException(ex.getMessage(), in.getSystemId(), ex); 1160 } 1161 else { 1162 throw ex; 1163 } 1164 } 1165 1166 XOMHandler handler = (XOMHandler) parser.getContentHandler(); 1167 ErrorHandler errorHandler = parser.getErrorHandler(); 1168 Document result = handler.getDocument(); 1169 if (result != null && "".equals(result.getBaseURI())) { 1170 result.setBaseURI(in.getSystemId()); 1171 } 1172 1173 if (errorHandler instanceof ValidityRequired) { 1174 ValidityRequired validityHandler 1175 = (ValidityRequired) errorHandler; 1176 if (!validityHandler.isValid()) { 1177 ValidityException vex = validityHandler.vexception; 1178 vex.setDocument(result); 1179 validityHandler.reset(); 1180 throw vex; 1181 } 1182 } 1183 return result; 1184 1185 } 1186 1187 1188 private static class ValidityRequired implements ErrorHandler { 1189 1190 ValidityException vexception = null; 1191 1192 void reset() { 1193 vexception = null; 1194 } 1195 1196 public void warning(SAXParseException exception) { 1197 } 1199 1200 public void error(SAXParseException exception) { 1201 1202 if (vexception == null) { 1203 vexception = new ValidityException( 1204 exception.getMessage(), 1205 exception.getSystemId(), 1206 exception.getLineNumber(), 1207 exception.getColumnNumber(), 1208 exception); 1209 } 1210 vexception.addError(exception); 1211 } 1212 1213 public void fatalError(SAXParseException exception) 1214 throws SAXParseException { 1215 throw exception; 1216 } 1217 1218 boolean isValid() { 1219 return vexception == null; 1220 } 1221 1222 } 1223 1224 1225 private static class NamespaceWellformednessRequired 1227 implements ErrorHandler { 1228 1229 public void warning(SAXParseException exception) { 1230 } 1232 1233 public void error(SAXParseException exception) 1234 throws SAXParseException { 1235 throw exception; 1236 } 1237 1238 public void fatalError(SAXParseException exception) 1239 throws SAXParseException { 1240 throw exception; 1241 } 1242 1243 } 1244 1245 1246 1255 public NodeFactory getNodeFactory() { 1256 return factory; 1257 } 1258 1259 1260} | Popular Tags |