1 47 package com.lowagie.text.pdf; 48 49 import java.io.*; 50 import java.util.Stack; 51 import java.util.HashMap; 52 53 72 public class SimpleXMLParser { 73 private static final HashMap fIANA2JavaMap = new HashMap(); 74 private static final HashMap entityMap = new HashMap(); 75 76 private static int popMode(Stack st) { 77 if(!st.empty()) 78 return ((Integer)st.pop()).intValue(); 79 else 80 return PRE; 81 } 82 83 private final static int 84 TEXT = 1, 85 ENTITY = 2, 86 OPEN_TAG = 3, 87 CLOSE_TAG = 4, 88 START_TAG = 5, 89 ATTRIBUTE_LVALUE = 6, 90 ATTRIBUTE_EQUAL = 9, 91 ATTRIBUTE_RVALUE = 10, 92 QUOTE = 7, 93 IN_TAG = 8, 94 SINGLE_TAG = 12, 95 COMMENT = 13, 96 DONE = 11, 97 DOCTYPE = 14, 98 PRE = 15, 99 CDATA = 16; 100 101 private SimpleXMLParser() { 102 } 103 104 110 public static void parse(SimpleXMLDocHandler doc, InputStream in) throws IOException { 111 byte b4[] = new byte[4]; 112 int count = in.read(b4); 113 if (count != 4) 114 throw new IOException("Insufficient length."); 115 String encoding = getEncodingName(b4); 116 String decl = null; 117 if (encoding.equals("UTF-8")) { 118 StringBuffer sb = new StringBuffer(); 119 int c; 120 while ((c = in.read()) != -1) { 121 if (c == '>') 122 break; 123 sb.append((char)c); 124 } 125 decl = sb.toString(); 126 } 127 else if (encoding.equals("CP037")) { 128 ByteArrayOutputStream bi = new ByteArrayOutputStream(); 129 int c; 130 while ((c = in.read()) != -1) { 131 if (c == 0x6e) break; 133 bi.write(c); 134 } 135 decl = new String(bi.toByteArray(), "CP037"); 136 } 137 if (decl != null) { 138 decl = getDeclaredEncoding(decl); 139 if (decl != null) 140 encoding = decl; 141 } 142 parse(doc, new InputStreamReader(in, getJavaEncoding(encoding))); 143 } 144 145 private static String getDeclaredEncoding(String decl) { 146 if (decl == null) 147 return null; 148 int idx = decl.indexOf("encoding"); 149 if (idx < 0) 150 return null; 151 int idx1 = decl.indexOf('"', idx); 152 int idx2 = decl.indexOf('\'', idx); 153 if (idx1 == idx2) 154 return null; 155 if ((idx1 < 0 && idx2 > 0) || (idx2 > 0 && idx2 < idx1)) { 156 int idx3 = decl.indexOf('\'', idx2 + 1); 157 if (idx3 < 0) 158 return null; 159 return decl.substring(idx2 + 1, idx3); 160 } 161 if ((idx2 < 0 && idx1 > 0) || (idx1 > 0 && idx1 < idx2)) { 162 int idx3 = decl.indexOf('"', idx1 + 1); 163 if (idx3 < 0) 164 return null; 165 return decl.substring(idx1 + 1, idx3); 166 } 167 return null; 168 } 169 170 176 public static String getJavaEncoding(String iana) { 177 String IANA = iana.toUpperCase(); 178 String jdec = (String)fIANA2JavaMap.get(IANA); 179 if (jdec == null) 180 jdec = iana; 181 return jdec; 182 } 183 184 public static void parse(SimpleXMLDocHandler doc,Reader r) throws IOException { 185 parse(doc, null, r, false); 186 } 187 188 194 public static void parse(SimpleXMLDocHandler doc, SimpleXMLDocHandlerComment comment, Reader r, boolean html) throws IOException { 195 BufferedReader reader; 196 if (r instanceof BufferedReader) 197 reader = (BufferedReader)r; 198 else 199 reader = new BufferedReader(r); 200 Stack st = new Stack(); 201 int depth = 0; 202 int mode = PRE; 203 int c = 0; 204 int quotec = '"'; 205 depth = 0; 206 StringBuffer sb = new StringBuffer(); 207 StringBuffer etag = new StringBuffer(); 208 String tagName = null; 209 String lvalue = null; 210 String rvalue = null; 211 HashMap attrs = null; 212 st = new Stack(); 213 doc.startDocument(); 214 int line=1, col=0; 215 boolean eol = false; 216 if (html) 217 mode = TEXT; 218 int pushBack = -1; 219 while(true) { 220 if (pushBack != -1) { 221 c = pushBack; 222 pushBack = -1; 223 } 224 else 225 c = reader.read(); 226 if (c == -1) 227 break; 228 229 if(c == '\n' && eol) { 232 eol = false; 233 continue; 234 } else if(eol) { 235 eol = false; 236 } else if(c == '\n') { 237 line++; 238 col=0; 239 } else if(c == '\r') { 240 eol = true; 241 c = '\n'; 242 line++; 243 col=0; 244 } else { 245 col++; 246 } 247 248 if(mode == DONE) { 249 doc.endDocument(); 250 return; 251 252 } else if(mode == TEXT) { 254 if(c == '<') { 255 st.push(new Integer(mode)); 256 mode = START_TAG; 257 if(sb.length() > 0) { 258 doc.text(sb.toString()); 259 sb.setLength(0); 260 } 261 } else if(c == '&') { 262 st.push(new Integer(mode)); 263 mode = ENTITY; 264 etag.setLength(0); 265 } else 266 sb.append((char)c); 267 268 } else if(mode == CLOSE_TAG) { 270 if(c == '>') { 271 mode = popMode(st); 272 tagName = sb.toString(); 273 if (html) 274 tagName = tagName.toLowerCase(); 275 sb.setLength(0); 276 depth--; 277 if(!html && depth==0) 278 mode = DONE; 279 doc.endElement(tagName); 280 } else { 281 if (!Character.isWhitespace((char)c)) 282 sb.append((char)c); 283 } 284 285 } else if(mode == CDATA) { 287 if(c == '>' 288 && sb.toString().endsWith("]]")) { 289 sb.setLength(sb.length()-2); 290 doc.text(sb.toString()); 291 sb.setLength(0); 292 mode = popMode(st); 293 } else 294 sb.append((char)c); 295 296 } else if(mode == COMMENT) { 299 if(c == '>' 300 && sb.toString().endsWith("--")) { 301 if (comment != null) { 302 sb.setLength(sb.length() - 2); 303 comment.comment(sb.toString()); 304 } 305 sb.setLength(0); 306 mode = popMode(st); 307 } else 308 sb.append((char)c); 309 310 } else if(mode == PRE) { 312 if(c == '<') { 313 mode = TEXT; 314 st.push(new Integer(mode)); 315 mode = START_TAG; 316 } 317 318 } else if(mode == DOCTYPE) { 321 if(c == '>') { 322 mode = popMode(st); 323 if(mode == TEXT) mode = PRE; 324 } 325 326 } else if(mode == START_TAG) { 330 mode = popMode(st); 331 if(c == '/') { 332 st.push(new Integer(mode)); 333 mode = CLOSE_TAG; 334 } else if (c == '?') { 335 mode = DOCTYPE; 336 } else { 337 st.push(new Integer(mode)); 338 mode = OPEN_TAG; 339 tagName = null; 340 attrs = new HashMap(); 341 sb.append((char)c); 342 } 343 344 } else if(mode == ENTITY) { 346 if(c == ';') { 347 mode = popMode(st); 348 String cent = etag.toString(); 349 etag.setLength(0); 350 if(cent.startsWith("#x")) { 351 try { 352 char ci = (char)Integer.parseInt(cent.substring(2),16); 353 sb.append(ci); 354 } 355 catch (Exception es) { 356 sb.append('&').append(cent).append(';'); 357 } 358 } 359 else if(cent.startsWith("#")) { 360 try { 361 char ci = (char)Integer.parseInt(cent.substring(1)); 362 sb.append(ci); 363 } 364 catch (Exception es) { 365 sb.append('&').append(cent).append(';'); 366 } 367 } 368 else { 369 char ce = decodeEntity(cent); 370 if (ce == '\0') 371 sb.append('&').append(cent).append(';'); 372 else 373 sb.append(ce); 374 } 375 } else if ((c != '#' && (c < '0' || c > '9') && (c < 'a' || c > 'z') 376 && (c < 'A' || c > 'Z')) || etag.length() >= 7) { 377 mode = popMode(st); 378 pushBack = c; 379 sb.append('&').append(etag.toString()); 380 etag.setLength(0); 381 } 382 else { 383 etag.append((char)c); 384 } 385 386 } else if(mode == SINGLE_TAG) { 390 if(tagName == null) 391 tagName = sb.toString(); 392 if (html) 393 tagName = tagName.toLowerCase(); 394 if(c != '>') 395 exc("Expected > for tag: <"+tagName+"/>",line,col); 396 doc.startElement(tagName,attrs); 397 doc.endElement(tagName); 398 if(!html && depth==0) { 399 doc.endDocument(); 400 return; 401 } 402 sb.setLength(0); 403 attrs = new HashMap(); 404 tagName = null; 405 mode = popMode(st); 406 407 } else if(mode == OPEN_TAG) { 411 if(c == '>') { 412 if(tagName == null) 413 tagName = sb.toString(); 414 if (html) 415 tagName = tagName.toLowerCase(); 416 sb.setLength(0); 417 depth++; 418 doc.startElement(tagName,attrs); 419 tagName = null; 420 attrs = new HashMap(); 421 mode = popMode(st); 422 } else if(c == '/') { 423 mode = SINGLE_TAG; 424 } else if(c == '-' && sb.toString().equals("!-")) { 425 mode = COMMENT; 426 sb.setLength(0); 427 } else if(c == '[' && sb.toString().equals("![CDATA")) { 428 mode = CDATA; 429 sb.setLength(0); 430 } else if(c == 'E' && sb.toString().equals("!DOCTYP")) { 431 sb.setLength(0); 432 mode = DOCTYPE; 433 } else if(Character.isWhitespace((char)c)) { 434 tagName = sb.toString(); 435 if (html) 436 tagName = tagName.toLowerCase(); 437 sb.setLength(0); 438 mode = IN_TAG; 439 } else { 440 sb.append((char)c); 441 } 442 443 } else if(mode == QUOTE) { 446 if (html && quotec == ' ' && c == '>') { 447 rvalue = sb.toString(); 448 sb.setLength(0); 449 attrs.put(lvalue,rvalue); 450 mode = popMode(st); 451 doc.startElement(tagName,attrs); 452 depth++; 453 tagName = null; 454 attrs = new HashMap(); 455 } 456 else if (html && quotec == ' ' && Character.isWhitespace((char)c)) { 457 rvalue = sb.toString(); 458 sb.setLength(0); 459 attrs.put(lvalue,rvalue); 460 mode = IN_TAG; 461 } 462 else if (html && quotec == ' ') { 463 sb.append((char)c); 464 } 465 else if(c == quotec) { 466 rvalue = sb.toString(); 467 sb.setLength(0); 468 attrs.put(lvalue,rvalue); 469 mode = IN_TAG; 470 } else if(" \r\n\u0009".indexOf(c)>=0) { 473 sb.append(' '); 474 } else if(c == '&') { 475 st.push(new Integer(mode)); 476 mode = ENTITY; 477 etag.setLength(0); 478 } else { 479 sb.append((char)c); 480 } 481 482 } else if(mode == ATTRIBUTE_RVALUE) { 483 if(c == '"' || c == '\'') { 484 quotec = c; 485 mode = QUOTE; 486 } else if(Character.isWhitespace((char)c)) { 487 ; 488 } else if (html && c == '>') { 489 attrs.put(lvalue,sb.toString()); 490 sb.setLength(0); 491 mode = popMode(st); 492 doc.startElement(tagName,attrs); 493 depth++; 494 tagName = null; 495 attrs = new HashMap(); 496 } else if (html) { 497 sb.append((char)c); 498 quotec = ' '; 499 mode = QUOTE; 500 } else { 501 exc("Error in attribute processing",line,col); 502 } 503 504 } else if(mode == ATTRIBUTE_LVALUE) { 505 if(Character.isWhitespace((char)c)) { 506 lvalue = sb.toString(); 507 if (html) 508 lvalue = lvalue.toLowerCase(); 509 sb.setLength(0); 510 mode = ATTRIBUTE_EQUAL; 511 } else if(c == '=') { 512 lvalue = sb.toString(); 513 if (html) 514 lvalue = lvalue.toLowerCase(); 515 sb.setLength(0); 516 mode = ATTRIBUTE_RVALUE; 517 } else if (html && c == '>') { 518 sb.setLength(0); 519 mode = popMode(st); 520 doc.startElement(tagName,attrs); 521 depth++; 522 tagName = null; 523 attrs = new HashMap(); 524 } else { 525 sb.append((char)c); 526 } 527 528 } else if(mode == ATTRIBUTE_EQUAL) { 529 if(c == '=') { 530 mode = ATTRIBUTE_RVALUE; 531 } else if(Character.isWhitespace((char)c)) { 532 ; 533 } else if (html && c == '>') { 534 sb.setLength(0); 535 mode = popMode(st); 536 doc.startElement(tagName,attrs); 537 depth++; 538 tagName = null; 539 attrs = new HashMap(); 540 } else if (html && c == '/') { 541 sb.setLength(0); 542 mode = SINGLE_TAG; 543 } else if (html) { 544 sb.setLength(0); 545 sb.append((char)c); 546 mode = ATTRIBUTE_LVALUE; 547 } else { 548 exc("Error in attribute processing.",line,col); 549 } 550 551 } else if(mode == IN_TAG) { 552 if(c == '>') { 553 mode = popMode(st); 554 doc.startElement(tagName,attrs); 555 depth++; 556 tagName = null; 557 attrs = new HashMap(); 558 } else if(c == '/') { 559 mode = SINGLE_TAG; 560 } else if(Character.isWhitespace((char)c)) { 561 ; 562 } else { 563 mode = ATTRIBUTE_LVALUE; 564 sb.append((char)c); 565 } 566 } 567 } 568 if(html || mode == DONE) { 569 if (html && mode == TEXT) 570 doc.text(sb.toString()); 571 doc.endDocument(); 572 } 573 else 574 exc("missing end tag",line,col); 575 } 576 private static void exc(String s,int line,int col) throws IOException { 577 throw new IOException(s+" near line "+line+", column "+col); 578 } 579 580 586 public static String escapeXML(String s, boolean onlyASCII) { 587 char cc[] = s.toCharArray(); 588 int len = cc.length; 589 StringBuffer sb = new StringBuffer(); 590 for (int k = 0; k < len; ++k) { 591 int c = cc[k]; 592 switch (c) { 593 case '<': 594 sb.append("<"); 595 break; 596 case '>': 597 sb.append(">"); 598 break; 599 case '&': 600 sb.append("&"); 601 break; 602 case '"': 603 sb.append("""); 604 break; 605 case '\'': 606 sb.append("'"); 607 break; 608 default: 609 if (onlyASCII && c > 127) 610 sb.append("&#").append(c).append(";"); 611 else 612 sb.append((char)c); 613 } 614 } 615 return sb.toString(); 616 } 617 618 public static char decodeEntity(String s) { 619 Character c = (Character)entityMap.get(s); 620 if (c == null) 621 return '\0'; 622 else 623 return c.charValue(); 624 } 625 626 private static String getEncodingName(byte[] b4) { 627 628 int b0 = b4[0] & 0xFF; 630 int b1 = b4[1] & 0xFF; 631 if (b0 == 0xFE && b1 == 0xFF) { 632 return "UTF-16BE"; 634 } 635 if (b0 == 0xFF && b1 == 0xFE) { 636 return "UTF-16LE"; 638 } 639 640 int b2 = b4[2] & 0xFF; 642 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 643 return "UTF-8"; 644 } 645 646 int b3 = b4[3] & 0xFF; 648 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 649 return "ISO-10646-UCS-4"; 651 } 652 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 653 return "ISO-10646-UCS-4"; 655 } 656 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 657 return "ISO-10646-UCS-4"; 660 } 661 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 662 return "ISO-10646-UCS-4"; 665 } 666 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 667 return "UTF-16BE"; 671 } 672 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 673 return "UTF-16LE"; 676 } 677 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 678 return "CP037"; 681 } 682 683 return "UTF-8"; 685 } 686 687 static { 688 fIANA2JavaMap.put("BIG5", "Big5"); 690 fIANA2JavaMap.put("CSBIG5", "Big5"); 691 fIANA2JavaMap.put("CP037", "CP037"); 692 fIANA2JavaMap.put("IBM037", "CP037"); 693 fIANA2JavaMap.put("CSIBM037", "CP037"); 694 fIANA2JavaMap.put("EBCDIC-CP-US", "CP037"); 695 fIANA2JavaMap.put("EBCDIC-CP-CA", "CP037"); 696 fIANA2JavaMap.put("EBCDIC-CP-NL", "CP037"); 697 fIANA2JavaMap.put("EBCDIC-CP-WT", "CP037"); 698 fIANA2JavaMap.put("IBM277", "CP277"); 699 fIANA2JavaMap.put("CP277", "CP277"); 700 fIANA2JavaMap.put("CSIBM277", "CP277"); 701 fIANA2JavaMap.put("EBCDIC-CP-DK", "CP277"); 702 fIANA2JavaMap.put("EBCDIC-CP-NO", "CP277"); 703 fIANA2JavaMap.put("IBM278", "CP278"); 704 fIANA2JavaMap.put("CP278", "CP278"); 705 fIANA2JavaMap.put("CSIBM278", "CP278"); 706 fIANA2JavaMap.put("EBCDIC-CP-FI", "CP278"); 707 fIANA2JavaMap.put("EBCDIC-CP-SE", "CP278"); 708 fIANA2JavaMap.put("IBM280", "CP280"); 709 fIANA2JavaMap.put("CP280", "CP280"); 710 fIANA2JavaMap.put("CSIBM280", "CP280"); 711 fIANA2JavaMap.put("EBCDIC-CP-IT", "CP280"); 712 fIANA2JavaMap.put("IBM284", "CP284"); 713 fIANA2JavaMap.put("CP284", "CP284"); 714 fIANA2JavaMap.put("CSIBM284", "CP284"); 715 fIANA2JavaMap.put("EBCDIC-CP-ES", "CP284"); 716 fIANA2JavaMap.put("EBCDIC-CP-GB", "CP285"); 717 fIANA2JavaMap.put("IBM285", "CP285"); 718 fIANA2JavaMap.put("CP285", "CP285"); 719 fIANA2JavaMap.put("CSIBM285", "CP285"); 720 fIANA2JavaMap.put("EBCDIC-CP-FR", "CP297"); 721 fIANA2JavaMap.put("IBM297", "CP297"); 722 fIANA2JavaMap.put("CP297", "CP297"); 723 fIANA2JavaMap.put("CSIBM297", "CP297"); 724 fIANA2JavaMap.put("EBCDIC-CP-AR1", "CP420"); 725 fIANA2JavaMap.put("IBM420", "CP420"); 726 fIANA2JavaMap.put("CP420", "CP420"); 727 fIANA2JavaMap.put("CSIBM420", "CP420"); 728 fIANA2JavaMap.put("EBCDIC-CP-HE", "CP424"); 729 fIANA2JavaMap.put("IBM424", "CP424"); 730 fIANA2JavaMap.put("CP424", "CP424"); 731 fIANA2JavaMap.put("CSIBM424", "CP424"); 732 fIANA2JavaMap.put("EBCDIC-CP-CH", "CP500"); 733 fIANA2JavaMap.put("IBM500", "CP500"); 734 fIANA2JavaMap.put("CP500", "CP500"); 735 fIANA2JavaMap.put("CSIBM500", "CP500"); 736 fIANA2JavaMap.put("EBCDIC-CP-CH", "CP500"); 737 fIANA2JavaMap.put("EBCDIC-CP-BE", "CP500"); 738 fIANA2JavaMap.put("IBM868", "CP868"); 739 fIANA2JavaMap.put("CP868", "CP868"); 740 fIANA2JavaMap.put("CSIBM868", "CP868"); 741 fIANA2JavaMap.put("CP-AR", "CP868"); 742 fIANA2JavaMap.put("IBM869", "CP869"); 743 fIANA2JavaMap.put("CP869", "CP869"); 744 fIANA2JavaMap.put("CSIBM869", "CP869"); 745 fIANA2JavaMap.put("CP-GR", "CP869"); 746 fIANA2JavaMap.put("IBM870", "CP870"); 747 fIANA2JavaMap.put("CP870", "CP870"); 748 fIANA2JavaMap.put("CSIBM870", "CP870"); 749 fIANA2JavaMap.put("EBCDIC-CP-ROECE", "CP870"); 750 fIANA2JavaMap.put("EBCDIC-CP-YU", "CP870"); 751 fIANA2JavaMap.put("IBM871", "CP871"); 752 fIANA2JavaMap.put("CP871", "CP871"); 753 fIANA2JavaMap.put("CSIBM871", "CP871"); 754 fIANA2JavaMap.put("EBCDIC-CP-IS", "CP871"); 755 fIANA2JavaMap.put("IBM918", "CP918"); 756 fIANA2JavaMap.put("CP918", "CP918"); 757 fIANA2JavaMap.put("CSIBM918", "CP918"); 758 fIANA2JavaMap.put("EBCDIC-CP-AR2", "CP918"); 759 fIANA2JavaMap.put("EUC-JP", "EUCJIS"); 760 fIANA2JavaMap.put("CSEUCPkdFmtJapanese", "EUCJIS"); 761 fIANA2JavaMap.put("EUC-KR", "KSC5601"); 762 fIANA2JavaMap.put("GB2312", "GB2312"); 763 fIANA2JavaMap.put("CSGB2312", "GB2312"); 764 fIANA2JavaMap.put("ISO-2022-JP", "JIS"); 765 fIANA2JavaMap.put("CSISO2022JP", "JIS"); 766 fIANA2JavaMap.put("ISO-2022-KR", "ISO2022KR"); 767 fIANA2JavaMap.put("CSISO2022KR", "ISO2022KR"); 768 fIANA2JavaMap.put("ISO-2022-CN", "ISO2022CN"); 769 770 fIANA2JavaMap.put("X0201", "JIS0201"); 771 fIANA2JavaMap.put("CSISO13JISC6220JP", "JIS0201"); 772 fIANA2JavaMap.put("X0208", "JIS0208"); 773 fIANA2JavaMap.put("ISO-IR-87", "JIS0208"); 774 fIANA2JavaMap.put("X0208dbiJIS_X0208-1983", "JIS0208"); 775 fIANA2JavaMap.put("CSISO87JISX0208", "JIS0208"); 776 fIANA2JavaMap.put("X0212", "JIS0212"); 777 fIANA2JavaMap.put("ISO-IR-159", "JIS0212"); 778 fIANA2JavaMap.put("CSISO159JISX02121990", "JIS0212"); 779 fIANA2JavaMap.put("SHIFT_JIS", "SJIS"); 780 fIANA2JavaMap.put("CSSHIFT_JIS", "SJIS"); 781 fIANA2JavaMap.put("MS_Kanji", "SJIS"); 782 783 fIANA2JavaMap.put("WINDOWS-1250", "Cp1250"); 785 fIANA2JavaMap.put("WINDOWS-1251", "Cp1251"); 786 fIANA2JavaMap.put("WINDOWS-1252", "Cp1252"); 787 fIANA2JavaMap.put("WINDOWS-1253", "Cp1253"); 788 fIANA2JavaMap.put("WINDOWS-1254", "Cp1254"); 789 fIANA2JavaMap.put("WINDOWS-1255", "Cp1255"); 790 fIANA2JavaMap.put("WINDOWS-1256", "Cp1256"); 791 fIANA2JavaMap.put("WINDOWS-1257", "Cp1257"); 792 fIANA2JavaMap.put("WINDOWS-1258", "Cp1258"); 793 fIANA2JavaMap.put("TIS-620", "TIS620"); 794 795 fIANA2JavaMap.put("ISO-8859-1", "ISO8859_1"); 796 fIANA2JavaMap.put("ISO-IR-100", "ISO8859_1"); 797 fIANA2JavaMap.put("ISO_8859-1", "ISO8859_1"); 798 fIANA2JavaMap.put("LATIN1", "ISO8859_1"); 799 fIANA2JavaMap.put("CSISOLATIN1", "ISO8859_1"); 800 fIANA2JavaMap.put("L1", "ISO8859_1"); 801 fIANA2JavaMap.put("IBM819", "ISO8859_1"); 802 fIANA2JavaMap.put("CP819", "ISO8859_1"); 803 804 fIANA2JavaMap.put("ISO-8859-2", "ISO8859_2"); 805 fIANA2JavaMap.put("ISO-IR-101", "ISO8859_2"); 806 fIANA2JavaMap.put("ISO_8859-2", "ISO8859_2"); 807 fIANA2JavaMap.put("LATIN2", "ISO8859_2"); 808 fIANA2JavaMap.put("CSISOLATIN2", "ISO8859_2"); 809 fIANA2JavaMap.put("L2", "ISO8859_2"); 810 811 fIANA2JavaMap.put("ISO-8859-3", "ISO8859_3"); 812 fIANA2JavaMap.put("ISO-IR-109", "ISO8859_3"); 813 fIANA2JavaMap.put("ISO_8859-3", "ISO8859_3"); 814 fIANA2JavaMap.put("LATIN3", "ISO8859_3"); 815 fIANA2JavaMap.put("CSISOLATIN3", "ISO8859_3"); 816 fIANA2JavaMap.put("L3", "ISO8859_3"); 817 818 fIANA2JavaMap.put("ISO-8859-4", "ISO8859_4"); 819 fIANA2JavaMap.put("ISO-IR-110", "ISO8859_4"); 820 fIANA2JavaMap.put("ISO_8859-4", "ISO8859_4"); 821 fIANA2JavaMap.put("LATIN4", "ISO8859_4"); 822 fIANA2JavaMap.put("CSISOLATIN4", "ISO8859_4"); 823 fIANA2JavaMap.put("L4", "ISO8859_4"); 824 825 fIANA2JavaMap.put("ISO-8859-5", "ISO8859_5"); 826 fIANA2JavaMap.put("ISO-IR-144", "ISO8859_5"); 827 fIANA2JavaMap.put("ISO_8859-5", "ISO8859_5"); 828 fIANA2JavaMap.put("CYRILLIC", "ISO8859_5"); 829 fIANA2JavaMap.put("CSISOLATINCYRILLIC", "ISO8859_5"); 830 831 fIANA2JavaMap.put("ISO-8859-6", "ISO8859_6"); 832 fIANA2JavaMap.put("ISO-IR-127", "ISO8859_6"); 833 fIANA2JavaMap.put("ISO_8859-6", "ISO8859_6"); 834 fIANA2JavaMap.put("ECMA-114", "ISO8859_6"); 835 fIANA2JavaMap.put("ASMO-708", "ISO8859_6"); 836 fIANA2JavaMap.put("ARABIC", "ISO8859_6"); 837 fIANA2JavaMap.put("CSISOLATINARABIC", "ISO8859_6"); 838 839 fIANA2JavaMap.put("ISO-8859-7", "ISO8859_7"); 840 fIANA2JavaMap.put("ISO-IR-126", "ISO8859_7"); 841 fIANA2JavaMap.put("ISO_8859-7", "ISO8859_7"); 842 fIANA2JavaMap.put("ELOT_928", "ISO8859_7"); 843 fIANA2JavaMap.put("ECMA-118", "ISO8859_7"); 844 fIANA2JavaMap.put("GREEK", "ISO8859_7"); 845 fIANA2JavaMap.put("CSISOLATINGREEK", "ISO8859_7"); 846 fIANA2JavaMap.put("GREEK8", "ISO8859_7"); 847 848 fIANA2JavaMap.put("ISO-8859-8", "ISO8859_8"); 849 fIANA2JavaMap.put("ISO-8859-8-I", "ISO8859_8"); fIANA2JavaMap.put("ISO-IR-138", "ISO8859_8"); 851 fIANA2JavaMap.put("ISO_8859-8", "ISO8859_8"); 852 fIANA2JavaMap.put("HEBREW", "ISO8859_8"); 853 fIANA2JavaMap.put("CSISOLATINHEBREW", "ISO8859_8"); 854 855 fIANA2JavaMap.put("ISO-8859-9", "ISO8859_9"); 856 fIANA2JavaMap.put("ISO-IR-148", "ISO8859_9"); 857 fIANA2JavaMap.put("ISO_8859-9", "ISO8859_9"); 858 fIANA2JavaMap.put("LATIN5", "ISO8859_9"); 859 fIANA2JavaMap.put("CSISOLATIN5", "ISO8859_9"); 860 fIANA2JavaMap.put("L5", "ISO8859_9"); 861 862 fIANA2JavaMap.put("KOI8-R", "KOI8_R"); 863 fIANA2JavaMap.put("CSKOI8-R", "KOI8_R"); 864 fIANA2JavaMap.put("US-ASCII", "ASCII"); 865 fIANA2JavaMap.put("ISO-IR-6", "ASCII"); 866 fIANA2JavaMap.put("ANSI_X3.4-1986", "ASCII"); 867 fIANA2JavaMap.put("ISO_646.IRV:1991", "ASCII"); 868 fIANA2JavaMap.put("ASCII", "ASCII"); 869 fIANA2JavaMap.put("CSASCII", "ASCII"); 870 fIANA2JavaMap.put("ISO646-US", "ASCII"); 871 fIANA2JavaMap.put("US", "ASCII"); 872 fIANA2JavaMap.put("IBM367", "ASCII"); 873 fIANA2JavaMap.put("CP367", "ASCII"); 874 fIANA2JavaMap.put("UTF-8", "UTF8"); 875 fIANA2JavaMap.put("UTF-16", "Unicode"); 876 fIANA2JavaMap.put("UTF-16BE", "UnicodeBig"); 877 fIANA2JavaMap.put("UTF-16LE", "UnicodeLittle"); 878 879 entityMap.put("nbsp", new Character('\u00a0')); entityMap.put("iexcl", new Character('\u00a1')); entityMap.put("cent", new Character('\u00a2')); entityMap.put("pound", new Character('\u00a3')); entityMap.put("curren", new Character('\u00a4')); entityMap.put("yen", new Character('\u00a5')); entityMap.put("brvbar", new Character('\u00a6')); entityMap.put("sect", new Character('\u00a7')); entityMap.put("uml", new Character('\u00a8')); entityMap.put("copy", new Character('\u00a9')); entityMap.put("ordf", new Character('\u00aa')); entityMap.put("laquo", new Character('\u00ab')); entityMap.put("not", new Character('\u00ac')); entityMap.put("shy", new Character('\u00ad')); entityMap.put("reg", new Character('\u00ae')); entityMap.put("macr", new Character('\u00af')); entityMap.put("deg", new Character('\u00b0')); entityMap.put("plusmn", new Character('\u00b1')); entityMap.put("sup2", new Character('\u00b2')); entityMap.put("sup3", new Character('\u00b3')); entityMap.put("acute", new Character('\u00b4')); entityMap.put("micro", new Character('\u00b5')); entityMap.put("para", new Character('\u00b6')); entityMap.put("middot", new Character('\u00b7')); entityMap.put("cedil", new Character('\u00b8')); entityMap.put("sup1", new Character('\u00b9')); entityMap.put("ordm", new Character('\u00ba')); entityMap.put("raquo", new Character('\u00bb')); entityMap.put("frac14", new Character('\u00bc')); entityMap.put("frac12", new Character('\u00bd')); entityMap.put("frac34", new Character('\u00be')); entityMap.put("iquest", new Character('\u00bf')); entityMap.put("Agrave", new Character('\u00c0')); entityMap.put("Aacute", new Character('\u00c1')); entityMap.put("Acirc", new Character('\u00c2')); entityMap.put("Atilde", new Character('\u00c3')); entityMap.put("Auml", new Character('\u00c4')); entityMap.put("Aring", new Character('\u00c5')); entityMap.put("AElig", new Character('\u00c6')); entityMap.put("Ccedil", new Character('\u00c7')); entityMap.put("Egrave", new Character('\u00c8')); entityMap.put("Eacute", new Character('\u00c9')); entityMap.put("Ecirc", new Character('\u00ca')); entityMap.put("Euml", new Character('\u00cb')); entityMap.put("Igrave", new Character('\u00cc')); entityMap.put("Iacute", new Character('\u00cd')); entityMap.put("Icirc", new Character('\u00ce')); entityMap.put("Iuml", new Character('\u00cf')); entityMap.put("ETH", new Character('\u00d0')); entityMap.put("Ntilde", new Character('\u00d1')); entityMap.put("Ograve", new Character('\u00d2')); entityMap.put("Oacute", new Character('\u00d3')); entityMap.put("Ocirc", new Character('\u00d4')); entityMap.put("Otilde", new Character('\u00d5')); entityMap.put("Ouml", new Character('\u00d6')); entityMap.put("times", new Character('\u00d7')); entityMap.put("Oslash", new Character('\u00d8')); entityMap.put("Ugrave", new Character('\u00d9')); entityMap.put("Uacute", new Character('\u00da')); entityMap.put("Ucirc", new Character('\u00db')); entityMap.put("Uuml", new Character('\u00dc')); entityMap.put("Yacute", new Character('\u00dd')); entityMap.put("THORN", new Character('\u00de')); entityMap.put("szlig", new Character('\u00df')); entityMap.put("agrave", new Character('\u00e0')); entityMap.put("aacute", new Character('\u00e1')); entityMap.put("acirc", new Character('\u00e2')); entityMap.put("atilde", new Character('\u00e3')); entityMap.put("auml", new Character('\u00e4')); entityMap.put("aring", new Character('\u00e5')); entityMap.put("aelig", new Character('\u00e6')); entityMap.put("ccedil", new Character('\u00e7')); entityMap.put("egrave", new Character('\u00e8')); entityMap.put("eacute", new Character('\u00e9')); entityMap.put("ecirc", new Character('\u00ea')); entityMap.put("euml", new Character('\u00eb')); entityMap.put("igrave", new Character('\u00ec')); entityMap.put("iacute", new Character('\u00ed')); entityMap.put("icirc", new Character('\u00ee')); entityMap.put("iuml", new Character('\u00ef')); entityMap.put("eth", new Character('\u00f0')); entityMap.put("ntilde", new Character('\u00f1')); entityMap.put("ograve", new Character('\u00f2')); entityMap.put("oacute", new Character('\u00f3')); entityMap.put("ocirc", new Character('\u00f4')); entityMap.put("otilde", new Character('\u00f5')); entityMap.put("ouml", new Character('\u00f6')); entityMap.put("divide", new Character('\u00f7')); entityMap.put("oslash", new Character('\u00f8')); entityMap.put("ugrave", new Character('\u00f9')); entityMap.put("uacute", new Character('\u00fa')); entityMap.put("ucirc", new Character('\u00fb')); entityMap.put("uuml", new Character('\u00fc')); entityMap.put("yacute", new Character('\u00fd')); entityMap.put("thorn", new Character('\u00fe')); entityMap.put("yuml", new Character('\u00ff')); entityMap.put("fnof", new Character('\u0192')); entityMap.put("Alpha", new Character('\u0391')); entityMap.put("Beta", new Character('\u0392')); entityMap.put("Gamma", new Character('\u0393')); entityMap.put("Delta", new Character('\u0394')); entityMap.put("Epsilon", new Character('\u0395')); entityMap.put("Zeta", new Character('\u0396')); entityMap.put("Eta", new Character('\u0397')); entityMap.put("Theta", new Character('\u0398')); entityMap.put("Iota", new Character('\u0399')); entityMap.put("Kappa", new Character('\u039a')); entityMap.put("Lambda", new Character('\u039b')); entityMap.put("Mu", new Character('\u039c')); entityMap.put("Nu", new Character('\u039d')); entityMap.put("Xi", new Character('\u039e')); entityMap.put("Omicron", new Character('\u039f')); entityMap.put("Pi", new Character('\u03a0')); entityMap.put("Rho", new Character('\u03a1')); entityMap.put("Sigma", new Character('\u03a3')); entityMap.put("Tau", new Character('\u03a4')); entityMap.put("Upsilon", new Character('\u03a5')); entityMap.put("Phi", new Character('\u03a6')); entityMap.put("Chi", new Character('\u03a7')); entityMap.put("Psi", new Character('\u03a8')); entityMap.put("Omega", new Character('\u03a9')); entityMap.put("alpha", new Character('\u03b1')); entityMap.put("beta", new Character('\u03b2')); entityMap.put("gamma", new Character('\u03b3')); entityMap.put("delta", new Character('\u03b4')); entityMap.put("epsilon", new Character('\u03b5')); entityMap.put("zeta", new Character('\u03b6')); entityMap.put("eta", new Character('\u03b7')); entityMap.put("theta", new Character('\u03b8')); entityMap.put("iota", new Character('\u03b9')); entityMap.put("kappa", new Character('\u03ba')); entityMap.put("lambda", new Character('\u03bb')); entityMap.put("mu", new Character('\u03bc')); entityMap.put("nu", new Character('\u03bd')); entityMap.put("xi", new Character('\u03be')); entityMap.put("omicron", new Character('\u03bf')); entityMap.put("pi", new Character('\u03c0')); entityMap.put("rho", new Character('\u03c1')); entityMap.put("sigmaf", new Character('\u03c2')); entityMap.put("sigma", new Character('\u03c3')); entityMap.put("tau", new Character('\u03c4')); entityMap.put("upsilon", new Character('\u03c5')); entityMap.put("phi", new Character('\u03c6')); entityMap.put("chi", new Character('\u03c7')); entityMap.put("psi", new Character('\u03c8')); entityMap.put("omega", new Character('\u03c9')); entityMap.put("thetasym", new Character('\u03d1')); entityMap.put("upsih", new Character('\u03d2')); entityMap.put("piv", new Character('\u03d6')); entityMap.put("bull", new Character('\u2022')); entityMap.put("hellip", new Character('\u2026')); entityMap.put("prime", new Character('\u2032')); entityMap.put("Prime", new Character('\u2033')); entityMap.put("oline", new Character('\u203e')); entityMap.put("frasl", new Character('\u2044')); entityMap.put("weierp", new Character('\u2118')); entityMap.put("image", new Character('\u2111')); entityMap.put("real", new Character('\u211c')); entityMap.put("trade", new Character('\u2122')); entityMap.put("alefsym", new Character('\u2135')); entityMap.put("larr", new Character('\u2190')); entityMap.put("uarr", new Character('\u2191')); entityMap.put("rarr", new Character('\u2192')); entityMap.put("darr", new Character('\u2193')); entityMap.put("harr", new Character('\u2194')); entityMap.put("crarr", new Character('\u21b5')); entityMap.put("lArr", new Character('\u21d0')); entityMap.put("uArr", new Character('\u21d1')); entityMap.put("rArr", new Character('\u21d2')); entityMap.put("dArr", new Character('\u21d3')); entityMap.put("hArr", new Character('\u21d4')); entityMap.put("forall", new Character('\u2200')); entityMap.put("part", new Character('\u2202')); entityMap.put("exist", new Character('\u2203')); entityMap.put("empty", new Character('\u2205')); entityMap.put("nabla", new Character('\u2207')); entityMap.put("isin", new Character('\u2208')); entityMap.put("notin", new Character('\u2209')); entityMap.put("ni", new Character('\u220b')); entityMap.put("prod", new Character('\u220f')); entityMap.put("sum", new Character('\u2211')); entityMap.put("minus", new Character('\u2212')); entityMap.put("lowast", new Character('\u2217')); entityMap.put("radic", new Character('\u221a')); entityMap.put("prop", new Character('\u221d')); entityMap.put("infin", new Character('\u221e')); entityMap.put("ang", new Character('\u2220')); entityMap.put("and", new Character('\u2227')); entityMap.put("or", new Character('\u2228')); entityMap.put("cap", new Character('\u2229')); entityMap.put("cup", new Character('\u222a')); entityMap.put("int", new Character('\u222b')); entityMap.put("there4", new Character('\u2234')); entityMap.put("sim", new Character('\u223c')); entityMap.put("cong", new Character('\u2245')); entityMap.put("asymp", new Character('\u2248')); entityMap.put("ne", new Character('\u2260')); entityMap.put("equiv", new Character('\u2261')); entityMap.put("le", new Character('\u2264')); entityMap.put("ge", new Character('\u2265')); entityMap.put("sub", new Character('\u2282')); entityMap.put("sup", new Character('\u2283')); entityMap.put("nsub", new Character('\u2284')); entityMap.put("sube", new Character('\u2286')); entityMap.put("supe", new Character('\u2287')); entityMap.put("oplus", new Character('\u2295')); entityMap.put("otimes", new Character('\u2297')); entityMap.put("perp", new Character('\u22a5')); entityMap.put("sdot", new Character('\u22c5')); entityMap.put("lceil", new Character('\u2308')); entityMap.put("rceil", new Character('\u2309')); entityMap.put("lfloor", new Character('\u230a')); entityMap.put("rfloor", new Character('\u230b')); entityMap.put("lang", new Character('\u2329')); entityMap.put("rang", new Character('\u232a')); entityMap.put("loz", new Character('\u25ca')); entityMap.put("spades", new Character('\u2660')); entityMap.put("clubs", new Character('\u2663')); entityMap.put("hearts", new Character('\u2665')); entityMap.put("diams", new Character('\u2666')); entityMap.put("quot", new Character('\u0022')); entityMap.put("amp", new Character('\u0026')); entityMap.put("apos", new Character('\'')); 1138 entityMap.put("lt", new Character('\u003c')); entityMap.put("gt", new Character('\u003e')); entityMap.put("OElig", new Character('\u0152')); entityMap.put("oelig", new Character('\u0153')); entityMap.put("Scaron", new Character('\u0160')); entityMap.put("scaron", new Character('\u0161')); entityMap.put("Yuml", new Character('\u0178')); entityMap.put("circ", new Character('\u02c6')); entityMap.put("tilde", new Character('\u02dc')); entityMap.put("ensp", new Character('\u2002')); entityMap.put("emsp", new Character('\u2003')); entityMap.put("thinsp", new Character('\u2009')); entityMap.put("zwnj", new Character('\u200c')); entityMap.put("zwj", new Character('\u200d')); entityMap.put("lrm", new Character('\u200e')); entityMap.put("rlm", new Character('\u200f')); entityMap.put("ndash", new Character('\u2013')); entityMap.put("mdash", new Character('\u2014')); entityMap.put("lsquo", new Character('\u2018')); entityMap.put("rsquo", new Character('\u2019')); entityMap.put("sbquo", new Character('\u201a')); entityMap.put("ldquo", new Character('\u201c')); entityMap.put("rdquo", new Character('\u201d')); entityMap.put("bdquo", new Character('\u201e')); entityMap.put("dagger", new Character('\u2020')); entityMap.put("Dagger", new Character('\u2021')); entityMap.put("permil", new Character('\u2030')); entityMap.put("lsaquo", new Character('\u2039')); entityMap.put("rsaquo", new Character('\u203a')); entityMap.put("euro", new Character('\u20ac')); 1175 1176 } 1177} | Popular Tags |