1 21 22 package nu.xom.xinclude; 23 24 import java.io.BufferedInputStream ; 25 import java.io.BufferedReader ; 26 import java.io.IOException ; 27 import java.io.InputStream ; 28 import java.io.InputStreamReader ; 29 import java.io.Reader ; 30 import java.io.UnsupportedEncodingException ; 31 import java.net.MalformedURLException ; 32 import java.net.URL ; 33 import java.net.URLConnection ; 34 import java.util.Locale ; 35 import java.util.Stack ; 36 37 import nu.xom.Attribute; 38 import nu.xom.Builder; 39 import nu.xom.DocType; 40 import nu.xom.Document; 41 import nu.xom.Element; 42 import nu.xom.Elements; 43 import nu.xom.MalformedURIException; 44 import nu.xom.Node; 45 import nu.xom.NodeFactory; 46 import nu.xom.Nodes; 47 import nu.xom.ParentNode; 48 import nu.xom.ParsingException; 49 import nu.xom.Text; 50 51 68 public class XIncluder { 69 70 private static String version = System.getProperty("java.version"); 71 72 76 private XIncluder() {} 78 79 84 public final static String XINCLUDE_NS 85 = "http://www.w3.org/2001/XInclude"; 86 87 127 public static Document resolve(Document in) 128 throws BadParseAttributeException, InclusionLoopException, 129 IOException , NoIncludeLocationException, ParsingException, 130 UnsupportedEncodingException , XIncludeException { 131 132 Builder builder = new Builder(); 133 return resolve(in, builder); 134 135 } 136 137 178 public static Document resolve(Document in, Builder builder) 179 throws BadParseAttributeException, InclusionLoopException, 180 IOException , NoIncludeLocationException, ParsingException, 181 UnsupportedEncodingException , XIncludeException { 182 183 Document copy = new Document(in); 184 resolveInPlace(copy, builder); 185 return copy; 186 187 } 188 189 231 public static void resolveInPlace(Document in) 232 throws BadParseAttributeException, InclusionLoopException, 233 IOException , NoIncludeLocationException, ParsingException, 234 UnsupportedEncodingException , XIncludeException { 235 resolveInPlace(in, new Builder()); 236 } 237 238 281 public static void resolveInPlace(Document in, Builder builder) 282 throws BadParseAttributeException, InclusionLoopException, 283 IOException , NoIncludeLocationException, ParsingException, 284 UnsupportedEncodingException , XIncludeException { 285 286 Stack stack = new Stack (); 287 resolveInPlace(in, builder, stack); 288 289 } 290 291 292 private static void resolveInPlace( 293 Document in, Builder builder, Stack baseURLs) 294 throws IOException , ParsingException, XIncludeException { 295 296 String base = in.getBaseURI(); 297 if (base != null && base.startsWith("file:///")) { 299 base = "file:/" + base.substring(8); 300 } 301 302 baseURLs.push(base); 303 Element root = in.getRootElement(); 304 resolve(root, builder, baseURLs); 305 baseURLs.pop(); 306 307 } 308 309 310 private static void resolve( 311 Element element, Builder builder, Stack baseURLs) 312 throws IOException , ParsingException, XIncludeException { 313 314 resolve(element, builder, baseURLs, null); 315 316 } 317 318 319 private static void resolve( 320 Element element, Builder builder, Stack baseURLs, Document originalDoc) 321 throws IOException , ParsingException, XIncludeException { 322 323 if (isIncludeElement(element)) { 324 verifyIncludeElement(element); 325 326 String parse = element.getAttributeValue("parse"); 327 if (parse == null) parse = "xml"; 328 String xpointer = element.getAttributeValue("xpointer"); 329 String encoding = element.getAttributeValue("encoding"); 330 String href = element.getAttributeValue("href"); 331 if ("".equals(href)) href = null; 333 334 ParentNode parent = element.getParent(); 335 String base = element.getBaseURI(); 336 URL baseURL = null; 337 try { 338 baseURL = new URL (base); 339 } 340 catch (MalformedURLException ex) { 341 } 343 URL url = null; 344 try { 345 348 if (baseURL != null && href != null) { 349 url = absolutize(baseURL, href); 350 } 351 else if (href != null) { 352 testURISyntax(href); 353 url = new URL (href); 354 } 355 356 String accept = element.getAttributeValue("accept"); 357 checkHeader(accept); 358 String acceptLanguage = element.getAttributeValue("accept-language"); 359 checkHeader(acceptLanguage); 360 361 if (parse.equals("xml")) { 362 363 String parentLanguage = ""; 364 if (parent instanceof Element) { 365 parentLanguage = getXMLLangValue((Element) parent); 366 } 367 368 Nodes replacements; 369 if (url != null) { 370 replacements = downloadXMLDocument(url, 371 xpointer, builder, baseURLs, accept, acceptLanguage, parentLanguage); 372 for (int i = 0; i < replacements.size(); i++) { 380 Node child = replacements.get(i); 381 if (child instanceof Element) { 382 String noFragment = child.getBaseURI(); 383 if (noFragment.indexOf('#') >= 0) { 384 noFragment = noFragment.substring( 385 0, noFragment.indexOf('#')); 386 } 387 Element baseless = (Element) child; 388 Attribute baseAttribute = new Attribute( 389 "xml:base", 390 "http://www.w3.org/XML/1998/namespace", 391 noFragment 392 ); 393 baseless.addAttribute(baseAttribute); 394 395 } 396 } 397 } 398 else { 399 Document parentDoc = element.getDocument(); 400 if (parentDoc == null) { 401 parentDoc = originalDoc; 402 } 403 Nodes originals = XPointer.query(parentDoc, xpointer); 404 replacements = new Nodes(); 405 for (int i = 0; i < originals.size(); i++) { 406 Node original = originals.get(i); 407 if (contains((Element) original, element)) { 409 throw new InclusionLoopException( 410 "Element tried to include itself" 411 ); 412 } 413 Node copy = original.copy(); 414 replacements.append(copy); 415 } 416 replacements = resolveXPointerSelection( 417 replacements, builder, baseURLs, parentDoc); 418 419 } 420 421 if (parent instanceof Element) { 429 int position = parent.indexOf(element); 430 for (int i = 0; i < replacements.size(); i++) { 431 Node child = replacements.get(i); 432 parent.insertChild(child, position+i); 433 } 434 element.detach(); 435 } 436 else { Document doc = (Document) parent; 443 int i = 0; 444 while (true) { 446 Node child = replacements.get(i); 447 i++; 448 if (child instanceof Element) { 449 doc.setRootElement((Element) child); 450 break; 451 } 452 else { 453 doc.insertChild( 454 child, doc.indexOf(element) 455 ); 456 } 457 458 } 459 Element root = doc.getRootElement(); 461 int position = doc.indexOf(root); 462 for (int j=i; j < replacements.size(); j++) { 463 doc.insertChild( 464 replacements.get(j), position+1+j-i 465 ); 466 } 467 } 468 } 469 else if (parse.equals("text")) { 470 Nodes replacements 471 = downloadTextDocument(url, encoding, builder, accept, acceptLanguage); 472 for (int j = 0; j < replacements.size(); j++) { 473 Node replacement = replacements.get(j); 474 if (replacement instanceof Attribute) { 475 ((Element) parent).addAttribute((Attribute) replacement); 476 } 477 else { 478 parent.insertChild(replacement, parent.indexOf(element)); 479 } 480 } 481 parent.removeChild(element); 482 } 483 else { 484 throw new BadParseAttributeException( 485 "Bad value for parse attribute: " + parse, 486 element.getDocument().getBaseURI()); 487 } 488 489 } 490 catch (IOException ex) { 491 processFallback(element, builder, baseURLs, parent, ex); 492 } 493 catch (XPointerSyntaxException ex) { 494 processFallback(element, builder, baseURLs, parent, ex); 495 } 496 catch (XPointerResourceException ex) { 497 processFallback(element, builder, baseURLs, parent, ex); 503 } 504 505 } 506 else if (isFallbackElement(element)) { 507 throw new MisplacedFallbackException( 508 "Fallback element outside include element", 509 element.getDocument().getBaseURI() 510 ); 511 } 512 else { 513 Elements children = element.getChildElements(); 514 for (int i = 0; i < children.size(); i++) { 515 resolve(children.get(i), builder, baseURLs); 516 } 517 } 518 519 } 520 521 522 private static void verifyIncludeElement(Element element) 523 throws XIncludeException { 524 525 testHref(element); 526 testForFragmentIdentifier(element); 527 verifyEncoding(element); 528 testForForbiddenChildElements(element); 529 } 530 531 532 private static void testHref(Element include) throws NoIncludeLocationException { 533 534 String href = include.getAttributeValue("href"); 535 String xpointer = include.getAttributeValue("xpointer"); 536 if (href == null && xpointer == null) { 537 throw new NoIncludeLocationException( 538 "Missing href attribute", 539 include.getDocument().getBaseURI() 540 ); 541 } 542 } 543 544 545 private static void testForFragmentIdentifier(Element include) 546 throws BadHrefAttributeException { 547 548 String href = include.getAttributeValue("href"); 549 if (href != null) { 550 if (href.indexOf('#') > -1) { 551 throw new BadHrefAttributeException( 552 "fragment identifier in URI " + href, include.getBaseURI() 553 ); 554 } 555 } 556 557 } 558 559 560 private static void verifyEncoding(Element include) 561 throws BadEncodingAttributeException { 562 563 String encoding = include.getAttributeValue("encoding"); 564 if (encoding == null) return; 565 char[] text = encoding.toCharArray(); 568 if (text.length == 0) { 569 throw new BadEncodingAttributeException( 570 "Empty encoding attribute", include.getBaseURI()); 571 } 572 char c = text[0]; 573 if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { 574 throw new BadEncodingAttributeException( 575 "Illegal value for encoding attribute: " + encoding, include.getBaseURI() 576 ); 577 } 578 for (int i = 1; i < text.length; i++) { 579 c = text[i]; 580 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') 581 || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.') { 582 continue; 583 } 584 throw new BadEncodingAttributeException( 585 "Illegal value for encoding attribute: " + encoding, include.getBaseURI() 586 ); 587 } 588 589 } 590 591 592 private static URL absolutize(URL baseURL, String href) 594 throws MalformedURLException , BadHrefAttributeException { 595 596 Element parent = new Element("c"); 597 parent.setBaseURI(baseURL.toExternalForm()); 598 Element child = new Element("c"); 599 parent.appendChild(child); 600 child.addAttribute(new Attribute( 601 "xml:base", "http://www.w3.org/XML/1998/namespace", href)); 602 URL result = new URL (child.getBaseURI()); 603 if (!"".equals(href) && result.equals(baseURL)) { 604 if (! baseURL.toExternalForm().endsWith(href)) { 605 throw new BadHrefAttributeException(href 606 + " is not a syntactically correct IRI"); 607 } 608 } 609 return result; 610 611 } 612 613 614 private static void testURISyntax(String href) 615 throws BadHrefAttributeException { 616 617 try { 618 Element e = new Element("e"); 619 e.setNamespaceURI(href); 620 } 621 catch (MalformedURIException ex) { 622 throw new BadHrefAttributeException("Illegal IRI in href attribute", href); 623 } 624 625 } 626 627 628 private static String getXMLLangValue(Element element) { 629 630 while (true) { 631 Attribute lang = element.getAttribute( 632 "lang", "http://www.w3.org/XML/1998/namespace"); 633 if (lang != null) return lang.getValue(); 634 ParentNode parent = element.getParent(); 635 if (parent == null) return ""; 636 else if (parent instanceof Document) return ""; 637 else element = (Element) parent; 638 } 639 640 } 641 642 643 private static Nodes resolveXPointerSelection(Nodes in, 646 Builder builder, Stack baseURLs, Document original) 647 throws IOException , ParsingException, XIncludeException { 648 649 Element preinclude = (Element) in.get(0); 650 return resolveSilently(preinclude, builder, baseURLs, original); 651 652 } 653 654 655 private static boolean contains(ParentNode ancestor, Node descendant) { 656 657 for (Node parent = descendant; 658 parent != null; 659 parent=parent.getParent()) { 660 if (parent == ancestor) return true; 661 } 662 663 return false; 664 665 } 666 667 668 private static Nodes resolveSilently( 669 Element element, Builder builder, Stack baseURLs, Document originalDoc) 670 throws IOException , ParsingException, XIncludeException { 671 672 if (isIncludeElement(element) || isFallbackElement(element) ) { 675 throw new RuntimeException ( 676 "XOM BUG: include or fallback element passed to resolveSilently;" 677 + " please report with a test case"); 678 } 679 680 Elements children = element.getChildElements(); 681 for (int i = 0; i < children.size(); i++) { 682 resolve(children.get(i), builder, baseURLs, originalDoc); 683 } 684 return new Nodes(element); 685 686 } 687 688 689 private static void testForForbiddenChildElements(Element element) 690 throws XIncludeException { 691 692 int fallbacks = 0; 693 Elements children = element.getChildElements(); 694 int size = children.size(); 695 for (int i = 0; i < size; i++) { 696 Element child = children.get(i); 697 if (XINCLUDE_NS.equals(child.getNamespaceURI())) { 698 if ("fallback".equals(child.getLocalName())) { 699 fallbacks++; 700 if (fallbacks > 1) { 701 throw new XIncludeException("Multiple fallback elements", 702 element.getDocument().getBaseURI()); 703 } 704 } 705 else { 706 throw new XIncludeException( 707 "Include element contains an include child", 708 element.getDocument().getBaseURI()); 709 } 710 } 711 } 712 713 } 714 715 716 private static void processFallback(Element includeElement, 717 Builder builder, Stack baseURLs, ParentNode parent, Exception ex) 718 throws XIncludeException, IOException , ParsingException { 719 720 Element fallback 721 = includeElement.getFirstChildElement("fallback", XINCLUDE_NS); 722 if (fallback == null) { 723 if (ex instanceof IOException ) throw (IOException ) ex; 724 XIncludeException ex2 = new XIncludeException( 725 ex.getMessage(), includeElement.getDocument().getBaseURI()); 726 ex2.initCause(ex); 727 throw ex2; 728 } 729 730 while (fallback.getChildCount() > 0) { 731 Node child = fallback.getChild(0); 732 if (child instanceof Element) { 733 resolve((Element) child, builder, baseURLs); 734 } 735 child = fallback.getChild(0); 736 child.detach(); 737 parent.insertChild(child, parent.indexOf(includeElement)); 738 } 739 includeElement.detach(); 740 741 } 742 743 744 private static Nodes downloadXMLDocument( 746 URL source, String xpointer, Builder builder, Stack baseURLs, 747 String accept, String acceptLanguage, String parentLanguage) 748 throws IOException , ParsingException, XIncludeException, 749 XPointerSyntaxException, XPointerResourceException { 750 751 String base = source.toExternalForm(); 752 if (xpointer == null && baseURLs.indexOf(base) != -1) { 753 throw new InclusionLoopException( 754 "Tried to include the already included document " + base + 755 " from " + baseURLs.peek(), (String ) baseURLs.peek()); 756 } 757 758 URLConnection uc = source.openConnection(); 759 setHeaders(uc, accept, acceptLanguage); 760 InputStream in = new BufferedInputStream (uc.getInputStream()); 761 Document doc; 762 try { 763 doc = builder.build(in, source.toExternalForm()); 764 } 765 finally { 766 in.close(); 767 } 768 769 resolveInPlace(doc, builder, baseURLs); 770 Nodes included; 771 if (xpointer != null && xpointer.length() != 0) { 772 included = XPointer.query(doc, xpointer); 773 for (int i = 0; i < included.size(); i++) { 775 Node node = included.get(i); 776 Element top = (Element) node; 778 Attribute lang = top.getAttribute("lang", 779 "http://www.w3.org/XML/1998/namespace"); 780 if (lang == null) { 781 String childLanguage = getXMLLangValue(top); 782 if (!parentLanguage.equals(childLanguage)) { 783 top.addAttribute(new Attribute("xml:lang", 784 "http://www.w3.org/XML/1998/namespace", 785 childLanguage)); 786 } 787 } 788 } 789 } 790 else { 791 included = new Nodes(); 792 for (int i = 0; i < doc.getChildCount(); i++) { 793 Node child = doc.getChild(i); 794 if (!(child instanceof DocType)) { 795 included.append(child); 796 } 797 } 798 } 799 doc.setRootElement(new Element("f")); 801 for (int i = 0; i < included.size(); i++) { 802 Node node = included.get(i); 803 String noFragment = node.getBaseURI(); 806 if (noFragment.indexOf('#') >= 0) { 807 noFragment = noFragment.substring(0, noFragment.indexOf('#')); 808 } 809 node.detach(); 810 if (node instanceof Element) { 811 ((Element) node).setBaseURI(noFragment); 812 } 813 } 814 815 return included; 816 817 } 818 819 820 838 private static Nodes downloadTextDocument( 839 URL source, String encoding, Builder builder, 840 String accept, String language) 841 throws IOException , XIncludeException { 842 843 if (encoding == null || encoding.length() == 0) { 844 encoding = "UTF-8"; 845 } 846 847 URLConnection uc = source.openConnection(); 848 setHeaders(uc, accept, language); 849 850 String encodingFromHeader = uc.getContentEncoding(); 851 String contentType = uc.getContentType(); 852 int contentLength = uc.getContentLength(); 853 if (contentLength < 0) contentLength = 1024; 854 InputStream in = new BufferedInputStream (uc.getInputStream()); 855 try { 856 if (encodingFromHeader != null) encoding = encodingFromHeader; 857 else { 858 if (contentType != null) { 859 contentType = contentType.toLowerCase(Locale.ENGLISH); 860 if (contentType.equals("text/xml") 861 || contentType.equals("application/xml") 862 || (contentType.startsWith("text/") 863 && contentType.endsWith("+xml") ) 864 || (contentType.startsWith("application/") 865 && contentType.endsWith("+xml"))) { 866 encoding 867 = EncodingHeuristics.readEncodingFromStream(in); 868 } 869 } 870 } 871 if (version.startsWith("1.2") || version.startsWith("1.1")) { 873 if (encoding.equalsIgnoreCase("UTF-16")) { 874 in.mark(2); 876 int first = in.read(); 877 if (first == 0xFF) encoding = "UnicodeLittle"; 878 else encoding="UnicodeBig"; 879 in.reset(); 880 } 881 else if (encoding.equalsIgnoreCase("UnicodeBigUnmarked")) { 882 encoding = "UnicodeBig"; 883 } 884 else if (encoding.equalsIgnoreCase("UnicodeLittleUnmarked")) { 885 encoding = "UnicodeLittle"; 886 } 887 } 888 Reader reader = new BufferedReader ( 889 new InputStreamReader (in, encoding) 890 ); 891 StringBuffer sb = new StringBuffer (contentLength); 892 for (int c = reader.read(); c != -1; c = reader.read()) { 893 sb.append((char) c); 894 } 895 896 NodeFactory factory = builder.getNodeFactory(); 897 if (factory != null) { 898 return factory.makeText(sb.toString()); 899 } 900 else return new Nodes(new Text(sb.toString())); 901 } 902 finally { 903 in.close(); 904 } 905 906 } 907 908 909 private static void setHeaders(URLConnection uc, String accept, 910 String language) throws BadHTTPHeaderException { 911 912 if (accept != null) { 913 checkHeader(accept); 914 uc.setRequestProperty("accept", accept); 915 } 916 if (language != null) { 917 checkHeader(language); 918 uc.setRequestProperty("accept-language", language); 919 } 920 921 } 922 923 924 private static void checkHeader(String header) 925 throws BadHTTPHeaderException { 926 927 if (header == null) return; 928 int length = header.length(); 929 for (int i = 0; i < length; i++) { 930 char c = header.charAt(i); 931 if (c < 0x20 || c > 0x7E) { 932 throw new BadHTTPHeaderException( 933 "Header contains illegal character 0x" 934 + Integer.toHexString(c).toUpperCase()); 935 } 936 } 937 938 } 939 940 941 private static boolean isIncludeElement(Element element) { 942 943 return element.getLocalName().equals("include") 944 && element.getNamespaceURI().equals(XINCLUDE_NS); 945 946 } 947 948 949 private static boolean isFallbackElement(Element element) { 950 951 return element.getLocalName().equals("fallback") 952 && element.getNamespaceURI().equals(XINCLUDE_NS); 953 954 } 955 956 957 } | Popular Tags |