1 4 package gnu.xml; 5 import gnu.lists.*; 6 import java.io.*; 7 import gnu.text.Char; 8 import gnu.math.RealNum; 9 import gnu.text.PrettyWriter; 10 import gnu.mapping.OutPort; 11 import gnu.mapping.ThreadLocation; 12 import gnu.mapping.Symbol; 13 import java.math.BigDecimal ; 14 import gnu.expr.Keyword; 15 16 17 18 public class XMLPrinter extends OutPort 19 implements PositionConsumer, XConsumer 20 { 21 24 public int printIndent = -1; 25 26 public boolean indentAttributes; 27 28 boolean printXMLdecl = false; 29 public void setPrintXMLdecl (boolean value) { printXMLdecl = value; } 30 boolean inDocument; 31 boolean inAttribute = false; 32 boolean inStartTag = false; 33 34 int inComment; 35 boolean needXMLdecl = false; 36 boolean canonicalize = true; 37 public boolean canonicalizeCDATA; 38 44 public int useEmptyElementTag = 2; 45 public boolean escapeText = true; 46 public boolean escapeNonAscii = true; 47 boolean isHtml = false; 48 boolean undeclareNamespaces = false; 49 Object style; 50 52 public static final ThreadLocation doctypeSystem 53 = new ThreadLocation("doctype-system"); 54 57 public static final ThreadLocation doctypePublic 58 = new ThreadLocation("doctype-public"); 59 public static final ThreadLocation indentLoc 60 = new ThreadLocation("xml-indent"); 61 62 63 NamespaceBinding namespaceBindings = NamespaceBinding.predefinedXML; 64 65 66 NamespaceBinding[] namespaceSaveStack = new NamespaceBinding[20]; 67 68 Object [] groupNameStack = new Object [20]; 69 70 71 int groupNesting; 72 73 74 private static final int WORD = -2; 75 private static final int ELEMENT_START = -3; 76 private static final int ELEMENT_END = -4; 77 private static final int COMMENT = -5; 78 private static final int KEYWORD = -6; 79 int prev = ' '; 80 81 char savedHighSurrogate; 83 public XMLPrinter (OutPort out, boolean autoFlush) 84 { 85 super(out, autoFlush); 86 } 87 88 public XMLPrinter (Writer out, boolean autoFlush) 89 { 90 super(out, autoFlush); 91 } 92 93 public XMLPrinter (OutputStream out, boolean autoFlush) 94 { 95 super(new OutputStreamWriter(out), true, autoFlush); 96 } 97 98 public XMLPrinter (Writer out) 99 { 100 super(out); 101 } 102 103 public XMLPrinter (OutputStream out) 104 { 105 super(new OutputStreamWriter(out), false, false); 106 } 107 108 public XMLPrinter (OutputStream out, String fname) 109 { 110 super(new OutputStreamWriter(out), true, false, fname); 111 } 112 113 public static XMLPrinter make(OutPort out, Object style) 114 { 115 XMLPrinter xout = new XMLPrinter(out, true); 116 xout.setStyle(style); 117 return xout; 118 } 119 120 121 122 public static String toString (Object value) 123 { 124 StringWriter stringWriter = new StringWriter(); 125 new XMLPrinter(stringWriter).writeObject(value); 126 return stringWriter.toString(); 127 } 128 129 public void setStyle (Object style) 130 { 131 this.style = style; 132 useEmptyElementTag = canonicalize ? 0 : 1; 133 if ("html".equals(style)) 134 { 135 isHtml = true; 136 useEmptyElementTag = 2; 137 } 138 if ("xhtml".equals(style)) 139 useEmptyElementTag = 2; 140 if ("plain".equals(style)) 141 escapeText = false; 142 } 143 144 public void write (int v) 145 { 146 closeTag(); 147 if (printIndent >= 0) 148 { 149 if ((v == '\r' || v == '\n')) 150 { 151 if (v != '\n' || prev != '\r') 152 writeBreak(PrettyWriter.NEWLINE_MANDATORY); 153 if (inComment > 0) 154 inComment = 1; 155 return; 156 } 157 } 158 if (! escapeText) 159 { 160 bout.write(v); 161 prev = v; 162 } 163 else if (inComment > 0) 164 { 165 if (v == '-') 166 { 167 if (inComment == 1) 168 inComment = 2; 169 else 170 bout.write(' '); 171 } 172 else 173 inComment = 1; 174 super.write(v); 175 } 176 else 177 { 178 prev = ';'; 179 if (v == '<' && ! (isHtml && inAttribute)) 180 bout.write("<"); 181 else if (v == '>') 182 bout.write(">"); 183 else if (v == '&') 184 bout.write("&"); 185 else if (v == '\"' && inAttribute) 186 bout.write("""); 187 else if ((escapeNonAscii && v >= 127) 188 || (v < ' ' && (inAttribute || (v != '\t' && v != '\n')))) 191 { 192 int i = v; 193 if (v >= 0xD800) 194 { 195 if (v < 0xDC00) 196 { 197 savedHighSurrogate = (char) v; 198 return; 199 } 200 else if (v < 0xE000) 201 { i = (savedHighSurrogate - 0xD800) * 0x400 205 + (i - 0xDC00) + 0x10000; 206 savedHighSurrogate = 0; 207 } 208 } 209 bout.write("&#x"+Integer.toHexString(i).toUpperCase()+";"); 210 } 211 else 212 { 213 bout.write(v); 214 prev = v; 215 } 216 } 217 } 218 219 private void startWord() 220 { 221 closeTag(); 222 writeWordStart(); 223 } 224 225 public void writeBoolean(boolean v) 226 { 227 startWord(); 228 super.print(v); 229 writeWordEnd(); 230 } 231 232 protected void startNumber() 233 { 234 startWord(); 235 } 236 237 protected void endNumber() 238 { 239 writeWordEnd(); 240 } 241 242 public void closeTag() 243 { 244 if (inStartTag && ! inAttribute) 245 { 246 if (printIndent >= 0 && indentAttributes) 247 endLogicalBlock(""); 248 bout.write('>'); 249 inStartTag = false; 250 prev = ELEMENT_START; 251 } 252 else if (needXMLdecl) 253 { 254 bout.write("<?xml version=\"1.0\"?>\n"); 256 if (printIndent >= 0) 257 { 258 startLogicalBlock("", "", 2); 259 } 260 needXMLdecl = false; 261 } 262 } 263 264 void setIndentMode () 265 { 266 Object xmlIndent = indentLoc.get(null); 267 String indent = xmlIndent == null ? null : xmlIndent.toString(); 268 if (indent == null) 269 printIndent = -1; 270 else if (indent.equals("pretty")) 271 printIndent = 0; 272 else if (indent.equals("always") || indent.equals("yes")) 273 printIndent = 1; 274 else printIndent = -1; 276 } 277 278 public void beginDocument() 279 { 280 if (printXMLdecl) 281 { 282 needXMLdecl = true; 285 } 286 setIndentMode(); 287 inDocument = true; 288 if (printIndent >= 0 && ! needXMLdecl) 289 startLogicalBlock("", "", 2); 290 } 291 292 public void endDocument() 293 { 294 inDocument = false; 295 if (printIndent >= 0) 296 endLogicalBlock(""); 297 freshLine(); 298 } 299 300 public void beginEntity (Object base) 301 { 302 } 303 304 public void endEntity () 305 { 306 } 307 308 protected void writeQName (Object name) 309 { 310 if (name instanceof Symbol) 311 { 312 Symbol sname = (Symbol) name; 313 String prefix = sname.getPrefix(); 314 if (prefix != null && prefix.length() > 0) 315 { 316 bout.write(prefix); 317 bout.write(':'); 318 } 319 bout.write(sname.getLocalPart()); 320 } 321 else 322 bout.write(name == null ? "{null name}" : (String ) name); 323 } 324 325 public void beginGroup(Object type) 326 { 327 closeTag(); 328 if (groupNesting == 0) 329 { 330 if (! inDocument) 331 setIndentMode(); 332 Object systemIdentifier = doctypeSystem.get(null); 333 if (systemIdentifier != null) 334 { 335 String systemId = systemIdentifier.toString(); 336 if (systemId.length() > 0) 337 { 338 Object publicIdentifier = doctypePublic.get(null); 339 bout.write("<!DOCTYPE "); 340 bout.write(type.toString()); 341 String publicId = publicIdentifier == null ? null 342 : publicIdentifier.toString(); 343 if (publicId != null && publicId.length() > 0) 344 { 345 bout.write(" PUBLIC \""); 346 bout.write(publicId); 347 bout.write("\" \""); 348 } 349 else 350 { 351 bout.write(" SYSTEM \""); 352 } 353 bout.write(systemId); 354 bout.write("\">"); 355 println(); 356 } 357 } 358 } 359 if (printIndent >= 0) 360 { 361 if (prev == ELEMENT_START || prev == ELEMENT_END || prev == COMMENT) 362 writeBreak(printIndent > 0 ? PrettyWriter.NEWLINE_MANDATORY 363 : PrettyWriter.NEWLINE_LINEAR); 364 startLogicalBlock("", "", 2); 365 } 366 bout.write('<'); 367 writeQName(type); 368 if (printIndent >= 0 && indentAttributes) 369 startLogicalBlock("", "", 2); 370 groupNameStack[groupNesting] = type; 371 NamespaceBinding groupBindings = null; 372 namespaceSaveStack[groupNesting++] = namespaceBindings; 373 if (type instanceof XName) 374 { 375 groupBindings = ((XName) type).namespaceNodes; 376 NamespaceBinding join 377 = NamespaceBinding.commonAncestor(groupBindings, namespaceBindings); 378 int numBindings = groupBindings == null ? 0 379 : groupBindings.count(join); 380 NamespaceBinding[] sortedBindings = new NamespaceBinding[numBindings]; 381 int i = 0; 382 boolean sortNamespaces = canonicalize; 383 check_namespaces: 384 for (NamespaceBinding ns = groupBindings; ns != join; ns = ns.next) 385 { 386 int j = i; 387 boolean skip = false; 388 String uri = ns.getUri(); 389 String prefix = ns.getPrefix(); 390 while (--j >= 0) 391 { 392 NamespaceBinding ns_j = sortedBindings[j]; 393 String prefix_j = ns_j.getPrefix(); 395 if (prefix == prefix_j) 396 continue check_namespaces; 397 if (! sortNamespaces) 402 continue; 403 if (prefix == null) 404 break; 405 if (prefix_j != null && prefix.compareTo(prefix_j) <= 0) 406 break; 407 sortedBindings[j+1] = ns_j; 408 } 409 if (sortNamespaces) 410 j++; 411 else 412 j = i; 413 sortedBindings[j] = ns; 414 i++; 415 } 416 numBindings = i; 417 for (i = numBindings; --i >= 0; ) 420 { 421 NamespaceBinding ns = sortedBindings[i]; 422 String prefix = ns.prefix; 423 String uri = ns.uri; 424 if (uri == namespaceBindings.resolve(prefix)) 425 continue; 427 bout.write(' '); if (prefix == null) 429 bout.write("xmlns"); 430 else 431 { 432 bout.write("xmlns:"); 433 bout.write(prefix); 434 } 435 bout.write("=\""); 436 inAttribute = true; 437 if (uri != null) 438 write(uri); 439 inAttribute = false; 440 bout.write('\"'); 441 } 442 if (undeclareNamespaces) 443 { 444 for (NamespaceBinding ns = namespaceBindings; 448 ns != join; ns = ns.next) 449 { 450 String prefix = ns.prefix; 451 if (ns.uri != null && groupBindings.resolve(prefix) == null) 452 { 453 bout.write(' '); if (prefix == null) 455 bout.write("xmlns"); 456 else 457 { 458 bout.write("xmlns:"); 459 bout.write(prefix); 460 } 461 bout.write("=\"\""); 462 } 463 } 464 } 465 namespaceBindings = groupBindings; 466 } 467 if (groupNesting >= namespaceSaveStack.length) 468 { 469 NamespaceBinding[] nstmp = new NamespaceBinding[2 * groupNesting]; 470 System.arraycopy(namespaceSaveStack, 0, nstmp, 0, groupNesting); 471 namespaceSaveStack = nstmp; 472 Object [] nmtmp = new Object [2 * groupNesting]; 473 System.arraycopy(groupNameStack, 0, nmtmp, 0, groupNesting); 474 groupNameStack = nmtmp; 475 } 476 477 inStartTag = true; 478 if (isHtml) 479 { 480 String typeName = (type instanceof Symbol 481 ? ((Symbol) type).getLocalPart() 482 : type.toString()); 483 if ("script".equals(typeName) || "style".equals(typeName)) 484 escapeText = false; 485 } 486 } 487 488 static final String HtmlEmptyTags 489 = "/area/base/basefont/br/col/frame/hr/img/input/isindex/link/meta/para/"; 490 491 public static boolean isHtmlEmptyElementTag(String name) 492 { 493 int index = HtmlEmptyTags.indexOf(name); 494 return index > 0 && HtmlEmptyTags.charAt(index-1) == '/' 495 && HtmlEmptyTags.charAt(index+name.length()) == '/'; 496 } 497 498 public void endGroup () 499 { 500 if (useEmptyElementTag == 0) 501 closeTag(); 502 Object type = groupNameStack[groupNesting-1]; 503 String typeName = ! isHtml ? null : type instanceof Symbol ? ((Symbol) type).getLocalPart() 505 : type.toString(); 506 if (inStartTag) 507 { 508 if (printIndent >= 0 && indentAttributes) 509 { 510 endLogicalBlock(""); 511 } 512 bout.write(isHtml 513 ? (isHtmlEmptyElementTag(typeName) ? ">" : "></"+typeName+">") 514 : (useEmptyElementTag == 2 ? " />" : "/>")); 515 inStartTag = false; 516 } 517 else 518 { 519 if (printIndent >= 0) 520 { 521 setIndentation(0, false); 522 if (prev == ELEMENT_END) 523 writeBreak(printIndent > 0 ? PrettyWriter.NEWLINE_MANDATORY 524 : PrettyWriter.NEWLINE_LINEAR); 525 } 526 bout.write("</"); 527 writeQName(type); 528 bout.write(">"); 529 } 530 if (printIndent >= 0) 531 { 532 endLogicalBlock(""); 533 } 534 prev = ELEMENT_END; 535 if (isHtml && ! escapeText 536 && ("script".equals(typeName) || "style".equals(typeName))) 537 escapeText = true; 538 539 namespaceBindings = namespaceSaveStack[--groupNesting]; 540 namespaceSaveStack[groupNesting] = null; 541 groupNameStack[groupNesting] = null; 542 } 543 544 546 public void beginAttribute(Object attrType) 547 { 548 if (inAttribute) 549 bout.write('"'); 550 inAttribute = true; 551 bout.write(' '); 552 if (printIndent >= 0) 553 writeBreakFill(); 554 bout.write(attrType.toString()); bout.write("=\""); 556 prev = ' '; 557 } 558 559 public void endAttribute() 560 { 561 if (inAttribute) 562 { 563 if (prev != KEYWORD) 564 { 565 bout.write('"'); 566 inAttribute = false; 567 } 568 prev = ' '; 569 } 570 } 571 572 public void writeDouble (double d) 573 { 574 startWord(); 575 bout.write(formatDouble(d)); 576 } 577 578 public void writeFloat (float f) 579 { 580 startWord(); 581 bout.write(formatFloat(f)); 582 } 583 584 585 public static String formatDouble (double d) 586 { 587 if (Double.isNaN(d)) 588 return "NaN"; 589 boolean neg = d < 0; 590 if (Double.isInfinite(d)) 591 return neg ? "-INF" : "INF"; 592 double dabs = neg ? -d : d; 593 String dstr = Double.toString(d); 594 if ((dabs >= 1000000 || dabs < 0.000001) && dabs != 0.0) 597 return RealNum.toStringScientific(dstr); 598 else 599 return formatDecimal(RealNum.toStringDecimal(dstr)); 600 } 601 602 603 public static String formatFloat (float f) 604 { 605 if (Float.isNaN(f)) 606 return "NaN"; 607 boolean neg = f < 0; 608 if (Float.isInfinite(f)) 609 return neg ? "-INF" : "INF"; 610 float fabs = neg ? -f : f; 611 String fstr = Float.toString(f); 612 if ((fabs >= 1000000 || fabs < 0.000001) && fabs != 0.0) 615 return RealNum.toStringScientific(fstr); 616 else 617 return formatDecimal(RealNum.toStringDecimal(fstr)); 618 } 619 620 625 public static String formatDecimal (BigDecimal dec) 626 { 627 628 630 return formatDecimal(dec.toString()); 631 632 } 633 634 static String formatDecimal (String str) 635 { 636 int dot = str.indexOf('.'); 637 if (dot >= 0) 638 { 639 int len = str.length(); 640 for (int pos = len; ; ) 641 { 642 char ch = str.charAt(--pos); 643 if (ch != '0') 644 { 645 if (ch != '.') 646 pos++; 647 return pos == len ? str : str.substring(0, pos); 648 } 649 } 650 } 651 return str; 652 } 653 654 public void print(Object v) 655 { 656 if (v instanceof BigDecimal ) 657 v = formatDecimal((BigDecimal ) v); 658 else if (v instanceof Double || v instanceof gnu.math.DFloNum) 659 v = formatDouble(((Number ) v).doubleValue()); 660 else if (v instanceof Float ) 661 v = formatFloat(((Float ) v).floatValue()); 662 write(v == null ? "(null)" : v.toString()); 663 } 664 665 public void writeObject(Object v) 666 { 667 if (v instanceof SeqPosition) 668 { 669 bout.clearWordEnd(); 670 SeqPosition pos = (SeqPosition) v; 671 pos.sequence.consumeNext(pos.ipos, this); 672 if (pos.sequence instanceof NodeTree) 673 prev = '-'; 674 return; 675 } 676 if (v instanceof Consumable && ! (v instanceof UnescapedData)) 677 { 678 ((Consumable) v).consume(this); 679 return; 680 } 681 if (v instanceof Keyword) 682 { 683 beginAttribute(((Keyword) v).getName()); 684 prev = KEYWORD; 685 return; 686 } 687 closeTag(); 688 if (v instanceof UnescapedData) 689 { 690 bout.clearWordEnd(); 691 bout.write(((UnescapedData) v).getData()); 692 prev = '-'; 693 } 694 else if (v instanceof Char) 695 Char.print(((Char) v).intValue(), this); 696 else 697 { 698 startWord(); 699 prev = ' '; 700 print(v); 701 writeWordEnd(); 702 prev = WORD; 703 } 704 } 705 706 708 710 712 public boolean ignoring() 713 { 714 return false; 715 } 716 717 public void write (String str, int start, int length) 718 { 719 if (length > 0) 720 { 721 closeTag(); 722 int limit = start + length; 723 int count = 0; 724 while (start < limit) 725 { 726 char c = str.charAt(start++); 727 if (c >= 127 || c == '\n' || c == '\r' 728 || (inComment > 0 ? (c == '-' || inComment == 2) 729 : (c == '<' || c == '>' || c == '&' 730 || (inAttribute && (c == '"' || c < ' ' ))))) 731 { 732 if (count > 0) 733 bout.write(str, start - 1 - count, count); 734 write(c); 735 count = 0; 736 } 737 else 738 count++; 739 } 740 if (count > 0) 741 bout.write(str, limit - count, count); 742 } 743 prev = '-'; 744 } 745 746 public void write(char[] buf, int off, int len) 747 { 748 if (len > 0) 749 { 750 closeTag(); 751 int limit = off + len; 752 int count = 0; 753 while (off < limit) 754 { 755 char c = buf[off++]; 756 if (c >= 127 || c == '\n' || c == '\r' 757 || (inComment > 0 ? (c == '-' || inComment == 2) 758 : (c == '<' || c == '>' || c == '&' 759 || (inAttribute && (c == '"' || c < ' ' ))))) 760 { 761 if (count > 0) 762 bout.write(buf, off - 1 - count, count); 763 write(c); 764 count = 0; 765 } 766 else 767 count++; 768 } 769 if (count > 0) 770 bout.write(buf, limit - count, count); 771 } 772 prev = '-'; 773 } 774 775 public void writePosition(AbstractSequence seq, int ipos) 776 { 777 seq.consumeNext(ipos, this); 778 } 779 780 public void writeBaseUri (Object uri) 781 { 782 } 783 784 public void beginComment () 785 { 786 closeTag(); 787 if (printIndent >= 0) 788 { 789 if (prev == ELEMENT_START || prev == ELEMENT_END || prev == COMMENT) 790 writeBreak(printIndent > 0 ? PrettyWriter.NEWLINE_MANDATORY 791 : PrettyWriter.NEWLINE_LINEAR); 792 } 793 bout.write("<!--"); 794 inComment = 1; 795 } 796 797 public void endComment () 798 { 799 bout.write("-->"); 800 prev = COMMENT; 801 inComment = 0; 802 } 803 804 public void writeComment(String chars) 805 { 806 beginComment(); 807 write(chars); 808 endComment(); 809 } 810 811 public void writeComment(char[] chars, int offset, int length) 812 { 813 beginComment(); 814 write(chars, offset, length); 815 endComment(); 816 } 817 818 public void writeCDATA (char[] chars, int offset, int length) 819 { 820 if (canonicalizeCDATA) 821 { 822 write(chars, offset, length); 823 return; 824 } 825 closeTag(); 826 bout.write("<![CDATA["); 827 int limit = offset+length; 828 for (int i = offset; i < limit - 2; i++) 831 { 832 if (chars[i] == ']' && chars[i+1] == ']' && chars[i+2] == '>') 833 { 834 if (i > offset) 835 bout.write(chars, offset, i - offset); 836 print("]]]><![CDATA[]>"); 837 offset = i + 3; 838 length = limit - offset; 839 i = i + 2; 840 } 841 } 842 bout.write(chars, offset, length); 843 bout.write("]]>"); 844 prev = '>'; 845 } 846 847 public void writeProcessingInstruction(String target, char[] content, 848 int offset, int length) 849 { 850 if ("xml".equals(target)) 851 needXMLdecl = false; 852 closeTag(); 853 bout.write("<?"); 854 print(target); 855 print(' '); 856 bout.write(content, offset, length); 857 bout.write("?>"); 858 prev = '>'; 859 } 860 861 public void consume (SeqPosition position) 862 { 863 position.sequence.consumeNext(position.ipos, this); 864 } 865 } 866 | Popular Tags |