1 7 25 26 42 43 package com.icl.saxon.aelfred; 44 45 import java.io.BufferedInputStream; 46 import java.io.CharConversionException; 47 import java.io.EOFException; 48 import java.io.InputStream; 49 import java.io.InputStreamReader; 50 import java.io.IOException; 51 import java.io.Reader; 52 import java.net.URL; 53 import java.net.URLConnection; 54 import java.util.Enumeration; 55 import java.util.Hashtable; 56 import java.util.Stack; 57 58 import org.xml.sax.SAXException; 59 60 61 63 74 final class XmlParser 75 { 76 private final static boolean USE_CHEATS = true; 78 79 private final static int DEFAULT_ATTR_COUNT = 23; 81 82 83 87 88 93 XmlParser () 95 { 96 cleanupVariables (); 97 } 98 99 100 105 void setHandler (SAXDriver handler) 107 { 108 this.handler = handler; 109 } 110 111 112 129 void doParse ( 131 String systemId, 132 String publicId, 133 Reader reader, 134 InputStream stream, 135 String encoding 136 ) throws Exception 137 { 138 if (handler == null) 139 throw new IllegalStateException ("no callback handler"); 140 141 basePublicId = publicId; 142 baseURI = systemId; 143 baseReader = reader; 144 baseInputStream = stream; 145 146 initializeVariables (); 147 148 setInternalEntity ("amp", "&"); 152 setInternalEntity ("lt", "<"); 153 setInternalEntity ("gt", ">"); 154 setInternalEntity ("apos", "'"); 155 setInternalEntity ("quot", """); 156 157 handler.startDocument (); 158 159 pushURL ("[document]", basePublicId, baseURI, 160 baseReader, baseInputStream, encoding, false); 161 162 try { 163 parseDocument (); 164 handler.endDocument (); 165 } finally { 166 if (baseReader != null) 167 try { baseReader.close (); 168 } catch (IOException e) { } 169 if (baseInputStream != null) 170 try { baseInputStream.close (); 171 } catch (IOException e) { } 172 if (is != null) 173 try { is.close (); 174 } catch (IOException e) { } 175 if (reader != null) 176 try { 177 reader.close (); 178 } catch (IOException e) { 179 } 180 cleanupVariables (); 181 } 182 } 183 184 185 189 193 197 public final static int CONTENT_UNDECLARED = 0; 198 199 203 public final static int CONTENT_ANY = 1; 204 205 209 public final static int CONTENT_EMPTY = 2; 210 211 215 public final static int CONTENT_MIXED = 3; 216 217 221 public final static int CONTENT_ELEMENTS = 4; 222 223 224 228 232 public final static int ENTITY_UNDECLARED = 0; 233 234 238 public final static int ENTITY_INTERNAL = 1; 239 240 244 public final static int ENTITY_NDATA = 2; 245 246 250 public final static int ENTITY_TEXT = 3; 251 252 253 257 261 public final static int ATTRIBUTE_UNDECLARED = 0; 262 263 267 public final static int ATTRIBUTE_CDATA = 1; 268 269 273 public final static int ATTRIBUTE_ID = 2; 274 275 279 public final static int ATTRIBUTE_IDREF = 3; 280 281 285 public final static int ATTRIBUTE_IDREFS = 4; 286 287 291 public final static int ATTRIBUTE_ENTITY = 5; 292 293 297 public final static int ATTRIBUTE_ENTITIES = 6; 298 299 303 public final static int ATTRIBUTE_NMTOKEN = 7; 304 305 309 public final static int ATTRIBUTE_NMTOKENS = 8; 310 311 315 public final static int ATTRIBUTE_ENUMERATED = 9; 316 317 321 public final static int ATTRIBUTE_NOTATION = 10; 322 323 324 329 332 private static Hashtable attributeTypeHash; 333 static { 334 attributeTypeHash = new Hashtable (13); 335 attributeTypeHash.put ("CDATA", new Integer (ATTRIBUTE_CDATA)); 336 attributeTypeHash.put ("ID", new Integer (ATTRIBUTE_ID)); 337 attributeTypeHash.put ("IDREF", new Integer (ATTRIBUTE_IDREF)); 338 attributeTypeHash.put ("IDREFS", new Integer (ATTRIBUTE_IDREFS)); 339 attributeTypeHash.put ("ENTITY", new Integer (ATTRIBUTE_ENTITY)); 340 attributeTypeHash.put ("ENTITIES", new Integer (ATTRIBUTE_ENTITIES)); 341 attributeTypeHash.put ("NMTOKEN", new Integer (ATTRIBUTE_NMTOKEN)); 342 attributeTypeHash.put ("NMTOKENS", new Integer (ATTRIBUTE_NMTOKENS)); 343 attributeTypeHash.put ("NOTATION", new Integer (ATTRIBUTE_NOTATION)); 344 } 345 346 347 private final static int ENCODING_EXTERNAL = 0; 351 private final static int ENCODING_UTF_8 = 1; 352 private final static int ENCODING_ISO_8859_1 = 2; 353 private final static int ENCODING_UCS_2_12 = 3; 354 private final static int ENCODING_UCS_2_21 = 4; 355 private final static int ENCODING_UCS_4_1234 = 5; 356 private final static int ENCODING_UCS_4_4321 = 6; 357 private final static int ENCODING_UCS_4_2143 = 7; 358 private final static int ENCODING_UCS_4_3412 = 8; 359 private final static int ENCODING_ASCII = 9; 360 361 362 366 370 public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; 371 372 377 public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; 378 379 383 public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; 384 385 389 public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; 390 391 396 public final static int ATTRIBUTE_DEFAULT_FIXED = 34; 397 398 399 private final static int INPUT_NONE = 0; 403 private final static int INPUT_INTERNAL = 1; 404 private final static int INPUT_STREAM = 3; 405 private final static int INPUT_BUFFER = 4; 406 private final static int INPUT_READER = 5; 407 408 409 private final static int LIT_ENTITY_REF = 2; 414 private final static int LIT_NORMALIZE = 4; 416 private final static int LIT_ATTRIBUTE = 8; 418 private final static int LIT_DISABLE_PE = 16; 420 private final static int LIT_DISABLE_CREF = 32; 422 private final static int LIT_DISABLE_EREF = 64; 424 private final static int LIT_ENTITY_CHECK = 128; 426 private final static int LIT_PUBID = 256; 428 429 private final static int CONTEXT_NORMAL = 0; 434 private final static int CONTEXT_LITERAL = 1; 435 436 437 441 442 449 private void error (String message, String textFound, String textExpected) 450 throws SAXException 451 { 452 if (textFound != null) { 453 message = message + " (found \"" + textFound + "\")"; 454 } 455 if (textExpected != null) { 456 message = message + " (expected \"" + textExpected + "\")"; 457 } 458 String uri = null; 459 460 if (externalEntity != null) { 461 uri = externalEntity.getURL ().toString (); 462 } 463 handler.error (message, uri, line, column); 464 465 throw new SAXException (message); 467 } 468 469 470 475 private void error (String message, char textFound, String textExpected) 476 throws SAXException 477 { 478 error (message, new Character (textFound).toString (), textExpected); 479 } 480 481 482 private void error (String message) 483 throws SAXException 484 { 485 error (message, null, null); 486 } 487 488 489 493 494 504 private void parseDocument () 505 throws Exception 506 { 507 try { parseProlog (); 509 require ('<', "document prolog"); 510 parseElement (); 511 } catch (EOFException ee) { error("premature end of file", "[EOF]", null); 513 } 514 515 try { 516 parseMisc (); char c = readCh (); error ("unexpected characters after document end", c, null); 519 } catch (EOFException e) { 520 return; 521 } 522 } 523 524 525 532 private void parseComment () 533 throws Exception 534 { 535 char c; 536 boolean saved = expandPE; 537 538 expandPE = false; 539 parseUntil ("--"); 540 require ('>', "-- in comment"); 541 expandPE = saved; 542 handler.comment (dataBuffer, 0, dataBufferPos); 543 dataBufferPos = 0; 544 } 545 546 547 557 private void parsePI () 558 throws SAXException, IOException 559 { 560 String name; 561 boolean saved = expandPE; 562 563 expandPE = false; 564 name = readNmtoken (true); 565 if ("xml".equalsIgnoreCase (name)) 566 error ("Illegal processing instruction target", name, null); 567 if (!tryRead ("?>")) { 568 requireWhitespace (); 569 parseUntil ("?>"); 570 } 571 expandPE = saved; 572 handler.processingInstruction (name, dataBufferToString ()); 573 } 574 575 576 586 private void parseCDSect () 587 throws Exception 588 { 589 parseUntil ("]]>"); 590 dataBufferFlush (); 591 } 592 593 594 608 private void parseProlog () 609 throws Exception 610 { 611 parseMisc (); 612 613 if (tryRead ("<!DOCTYPE")) { 614 parseDoctypedecl (); 615 parseMisc (); 616 } 617 } 618 619 620 638 private String parseXMLDecl (boolean ignoreEncoding) 639 throws SAXException, IOException 640 { 641 String version; 642 String encodingName = null; 643 String standalone = null; 644 int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; 645 646 require ("version", "XML declaration"); 648 parseEq (); 649 version = readLiteral (flags); 650 if (!version.equals ("1.0")) { 651 error ("unsupported XML version", version, "1.0"); 652 } 653 654 boolean white = tryWhitespace (); 656 if (tryRead ("encoding")) { 657 if (!white) 658 error ("whitespace required before 'encoding='"); 659 parseEq (); 660 encodingName = readLiteral (flags); 661 if (!ignoreEncoding) 662 setupDecoding (encodingName); 663 } 664 665 if (encodingName != null) 667 white = tryWhitespace (); 668 if (tryRead ("standalone")) { 669 if (!white) 670 error ("whitespace required before 'standalone='"); 671 parseEq (); 672 standalone = readLiteral (flags); 673 if (! ("yes".equals (standalone) || "no".equals (standalone))) 674 error ("standalone flag must be 'yes' or 'no'"); 675 } 676 677 skipWhitespace (); 678 require ("?>", "XML declaration"); 679 680 return encodingName; 681 } 682 683 684 697 private String parseTextDecl (boolean ignoreEncoding) 698 throws SAXException, IOException 699 { 700 String encodingName = null; 701 int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; 702 703 if (tryRead ("version")) { 705 String version; 706 parseEq (); 707 version = readLiteral (flags); 708 if (!version.equals ("1.0")) { 709 error ("unsupported XML version", version, "1.0"); 710 } 711 requireWhitespace (); 712 } 713 714 715 require ("encoding", "XML text declaration"); 717 parseEq (); 718 encodingName = readLiteral (flags); 719 if (!ignoreEncoding) 720 setupDecoding (encodingName); 721 722 skipWhitespace (); 723 require ("?>", "XML text declaration"); 724 725 return encodingName; 726 } 727 728 729 745 private void setupDecoding (String encodingName) 746 throws SAXException, IOException 747 { 748 encodingName = encodingName.toUpperCase (); 749 750 754 757 if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL) { 759 if (encodingName.equals ("ISO-8859-1") 760 || encodingName.equals ("8859_1") 761 || encodingName.equals ("ISO8859_1") 762 ) { 763 encoding = ENCODING_ISO_8859_1; 764 return; 765 } else if (encodingName.equals ("US-ASCII") 766 || encodingName.equals ("ASCII")) { 767 encoding = ENCODING_ASCII; 768 return; 769 } else if (encodingName.equals ("UTF-8") 770 || encodingName.equals ("UTF8")) { 771 encoding = ENCODING_UTF_8; 772 return; 773 } else if (encoding != ENCODING_EXTERNAL) { 774 throw new EncodingException (encodingName); 776 } 777 } 780 781 if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21) { 783 if (!(encodingName.equals ("ISO-10646-UCS-2") 784 || encodingName.equals ("UTF-16") 785 || encodingName.equals ("UTF-16BE") 786 || encodingName.equals ("UTF-16LE"))) 787 error ("unsupported Unicode encoding", 788 encodingName, 789 "UTF-16"); 790 return; 791 } 792 793 if (encoding == ENCODING_UCS_4_1234 795 || encoding == ENCODING_UCS_4_4321 796 || encoding == ENCODING_UCS_4_2143 797 || encoding == ENCODING_UCS_4_3412) { 798 if (!encodingName.equals ("ISO-10646-UCS-4")) 799 error ("unsupported 32-bit encoding", 800 encodingName, 801 "ISO-10646-UCS-4"); 802 return; 803 } 804 805 809 if (encodingName.equals ("UTF-16BE")) { 810 encoding = ENCODING_UCS_2_12; 811 return; 812 } 813 if (encodingName.equals ("UTF-16LE")) { 814 encoding = ENCODING_UCS_2_21; 815 return; 816 } 817 818 822 if (encodingName.equals ("UTF-16") 823 || encodingName.equals ("ISO-10646-UCS-2")) 824 encodingName = "Unicode"; 825 827 reader = new InputStreamReader (is, encodingName); 828 sourceType = INPUT_READER; 829 } 830 831 832 839 private void parseMisc () 840 throws Exception 841 { 842 while (true) { 843 skipWhitespace (); 844 if (tryRead ("<?")) { 845 parsePI (); 846 } else if (tryRead ("<!--")) { 847 parseComment (); 848 } else { 849 return; 850 } 851 } 852 } 853 854 855 863 private void parseDoctypedecl () 864 throws Exception 865 { 866 String doctypeName, ids[]; 867 868 requireWhitespace (); 870 doctypeName = readNmtoken (true); 871 872 skipWhitespace (); 874 ids = readExternalIds (false); 875 876 handler.doctypeDecl (doctypeName, ids [0], ids [1]); 878 879 skipWhitespace (); 881 if (tryRead ('[')) { 882 883 while (true) { 885 expandPE = true; 886 skipWhitespace (); 887 expandPE = false; 888 if (tryRead (']')) { 889 break; } else { 891 peIsError = expandPE = true; 893 parseMarkupdecl (); 894 peIsError = expandPE = false; 895 } 896 } 897 } 898 899 if (ids [1] != null) { 901 pushURL ("[external subset]", ids [0], ids [1], null, null, null, false); 902 903 while (true) { 905 expandPE = true; 906 skipWhitespace (); 907 expandPE = false; 908 if (tryRead ('>')) { 909 break; 910 } else { 911 expandPE = true; 912 parseMarkupdecl (); 913 expandPE = false; 914 } 915 } 916 } else { 917 skipWhitespace (); 919 require ('>', "internal DTD subset"); 920 } 921 922 handler.endDoctype (); 924 expandPE = false; 925 } 926 927 928 939 private void parseMarkupdecl () 940 throws Exception 941 { 942 if (tryRead ("<!ELEMENT")) { 943 parseElementdecl (); 944 } else if (tryRead ("<!ATTLIST")) { 945 parseAttlistDecl (); 946 } else if (tryRead ("<!ENTITY")) { 947 parseEntityDecl (); 948 } else if (tryRead ("<!NOTATION")) { 949 parseNotationDecl (); 950 } else if (tryRead ("<?")) { 951 parsePI (); 952 } else if (tryRead ("<!--")) { 953 parseComment (); 954 } else if (tryRead ("<![")) { 955 if (inputStack.size () > 0) 956 parseConditionalSect (); 957 else 958 error ("conditional sections illegal in internal subset"); 959 } else { 960 error ("expected markup declaration"); 961 } 962 } 963 964 965 976 private void parseElement () 977 throws Exception 978 { 979 String gi; 980 char c; 981 int oldElementContent = currentElementContent; 982 String oldElement = currentElement; 983 Object element []; 984 985 tagAttributePos = 0; 988 989 gi = readNmtoken (true); 991 992 currentElement = gi; 994 element = (Object []) elementInfo.get (gi); 995 currentElementContent = getContentType (element, CONTENT_ANY); 996 997 boolean white = tryWhitespace (); 1000 c = readCh (); 1001 while (c != '/' && c != '>') { 1002 unread (c); 1003 if (!white) 1004 error ("need whitespace between attributes"); 1005 parseAttribute (gi); 1006 white = tryWhitespace (); 1007 c = readCh (); 1008 } 1009 1010 Enumeration atts = declaredAttributes (element); 1012 if (atts != null) { 1013 String aname; 1014loop: 1015 while (atts.hasMoreElements ()) { 1016 aname = (String) atts.nextElement (); 1017 for (int i = 0; i < tagAttributePos; i++) { 1019 if (tagAttributes [i] == aname) { 1020 continue loop; 1021 } 1022 } 1023 String defaultVal = getAttributeExpandedValue (gi, aname); 1025 if (defaultVal!=null) { 1026 handler.attribute (aname, defaultVal, false); 1027 } 1028 } 1029 } 1030 1031 switch (c) { 1035 case '>': 1036 handler.startElement (gi); 1037 parseContent (); 1038 break; 1039 case '/': 1040 require ('>', "empty element tag"); 1041 handler.startElement (gi); 1042 handler.endElement (gi); 1043 break; 1044 } 1045 1046 currentElement = oldElement; 1048 currentElementContent = oldElementContent; 1049 } 1050 1051 1052 1060 private void parseAttribute (String name) 1061 throws Exception 1062 { 1063 String aname; 1064 int type; 1065 String value; 1066 int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF; 1067 1068 aname = readNmtoken (true); 1070 type = getAttributeType (name, aname); 1071 1072 parseEq (); 1074 1075 if (type == ATTRIBUTE_CDATA || type == ATTRIBUTE_UNDECLARED) { 1078 value = readLiteral (flags); 1079 } else { 1080 value = readLiteral (flags | LIT_NORMALIZE); 1081 } 1082 1083 for (int i = 0; i < tagAttributePos; i++) 1085 if (aname.equals (tagAttributes [i])) 1086 error ("duplicate attribute", aname, null); 1087 1088 handler.attribute (aname, value, true); 1091 dataBufferPos = 0; 1092 1093 if (tagAttributePos == tagAttributes.length) { 1096 String newAttrib[] = new String [tagAttributes.length * 2]; 1097 System.arraycopy (tagAttributes, 0, newAttrib, 0, tagAttributePos); 1098 tagAttributes = newAttrib; 1099 } 1100 tagAttributes [tagAttributePos++] = aname; 1101 } 1102 1103 1104 1110 private void parseEq () 1111 throws SAXException, IOException 1112 { 1113 skipWhitespace (); 1114 require ('=', "attribute name"); 1115 skipWhitespace (); 1116 } 1117 1118 1119 1127 private void parseETag () 1128 throws Exception 1129 { 1130 require (currentElement, "element end tag"); 1131 skipWhitespace (); 1132 require ('>', "name in end tag"); 1133 handler.endElement (currentElement); 1134 } 1137 1138 1139 1148 private void parseContent () 1149 throws Exception 1150 { 1151 char c; 1152 while (true) { 1153 1170 parseCharData(); 1173 c = readCh (); 1175 switch (c) { 1176 case '&': 1178 c = readCh (); 1179 if (c == '#') { 1180 parseCharRef (); 1181 } else { 1182 unread (c); 1183 parseEntityRef (true); 1184 } 1185 break; 1186 1187 case '<': dataBufferFlush (); 1189 c = readCh (); 1190 switch (c) { 1191 case '!': c = readCh (); 1193 switch (c) { 1194 case '-': require ('-', "start of comment"); 1196 parseComment (); 1197 break; 1198 case '[': require ("CDATA[", "CDATA section"); 1200 handler.startCDATA (); 1201 inCDATA = true; 1202 parseCDSect (); 1203 inCDATA = false; 1204 handler.endCDATA (); 1205 break; 1206 default: 1207 error ("expected comment or CDATA section", c, null); 1208 break; 1209 } 1210 break; 1211 1212 case '?': parsePI (); 1214 break; 1215 1216 case '/': parseETag (); 1218 return; 1219 1220 default: unread (c); 1222 parseElement (); 1223 break; 1224 } 1225 } 1226 } 1227 } 1228 1229 1230 1237 private void parseElementdecl () 1238 throws Exception 1239 { 1240 String name; 1241 1242 requireWhitespace (); 1243 name = readNmtoken (true); 1245 1246 requireWhitespace (); 1247 parseContentspec (name); 1249 1250 skipWhitespace (); 1251 require ('>', "element declaration"); 1252 } 1253 1254 1255 1261 private void parseContentspec (String name) 1262 throws Exception 1263 { 1264 if (tryRead ("EMPTY")) { 1265 setElement (name, CONTENT_EMPTY, null, null); 1266 return; 1267 } else if (tryRead ("ANY")) { 1268 setElement (name, CONTENT_ANY, null, null); 1269 return; 1270 } else { 1271 require ('(', "element name"); 1272 dataBufferAppend ('('); 1273 skipWhitespace (); 1274 if (tryRead ("#PCDATA")) { 1275 dataBufferAppend ("#PCDATA"); 1276 parseMixed (); 1277 setElement (name, CONTENT_MIXED, dataBufferToString (), null); 1278 } else { 1279 parseElements (); 1280 setElement (name, CONTENT_ELEMENTS, 1281 dataBufferToString (), null); 1282 } 1283 } 1284 } 1285 1286 1287 1297 private void parseElements () 1298 throws Exception 1299 { 1300 char c; 1301 char sep; 1302 1303 skipWhitespace (); 1305 parseCp (); 1306 1307 skipWhitespace (); 1309 c = readCh (); 1310 switch (c) { 1311 case ')': 1312 dataBufferAppend (')'); 1313 c = readCh (); 1314 switch (c) { 1315 case '*': 1316 case '+': 1317 case '?': 1318 dataBufferAppend (c); 1319 break; 1320 default: 1321 unread (c); 1322 } 1323 return; 1324 case ',': case '|': 1326 sep = c; 1327 dataBufferAppend (c); 1328 break; 1329 default: 1330 error ("bad separator in content model", c, null); 1331 return; 1332 } 1333 1334 while (true) { 1336 skipWhitespace (); 1337 parseCp (); 1338 skipWhitespace (); 1339 c = readCh (); 1340 if (c == ')') { 1341 dataBufferAppend (')'); 1342 break; 1343 } else if (c != sep) { 1344 error ("bad separator in content model", c, null); 1345 return; 1346 } else { 1347 dataBufferAppend (c); 1348 } 1349 } 1350 1351 c = readCh (); 1353 switch (c) { 1354 case '?': 1355 case '*': 1356 case '+': 1357 dataBufferAppend (c); 1358 return; 1359 default: 1360 unread (c); 1361 return; 1362 } 1363 } 1364 1365 1366 1372 private void parseCp () 1373 throws Exception 1374 { 1375 if (tryRead ('(')) { 1376 dataBufferAppend ('('); 1377 parseElements (); 1378 } else { 1379 dataBufferAppend (readNmtoken (true)); 1380 char c = readCh (); 1381 switch (c) { 1382 case '?': 1383 case '*': 1384 case '+': 1385 dataBufferAppend (c); 1386 break; 1387 default: 1388 unread (c); 1389 break; 1390 } 1391 } 1392 } 1393 1394 1395 1402 private void parseMixed () 1403 throws Exception 1404 { 1405 1406 skipWhitespace (); 1408 if (tryRead (')')) { 1409 dataBufferAppend (")*"); 1410 tryRead ('*'); 1411 return; 1412 } 1413 1414 skipWhitespace (); 1416 while (!tryRead (")*")) { 1417 require ('|', "alternative"); 1418 dataBufferAppend ('|'); 1419 skipWhitespace (); 1420 dataBufferAppend (readNmtoken (true)); 1421 skipWhitespace (); 1422 } 1423 dataBufferAppend (")*"); 1424 } 1425 1426 1427 1434 private void parseAttlistDecl () 1435 throws Exception 1436 { 1437 String elementName; 1438 1439 requireWhitespace (); 1440 elementName = readNmtoken (true); 1441 boolean white = tryWhitespace (); 1442 while (!tryRead ('>')) { 1443 if (!white) 1444 error ("whitespace required before attribute definition"); 1445 parseAttDef (elementName); 1446 white = tryWhitespace (); 1447 } 1448 } 1449 1450 1451 1457 private void parseAttDef (String elementName) 1458 throws Exception 1459 { 1460 String name; 1461 int type; 1462 String enum = null; 1463 1464 name = readNmtoken (true); 1466 1467 requireWhitespace (); 1469 type = readAttType (); 1470 1471 if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) { 1474 enum = dataBufferToString (); 1475 } 1476 1477 requireWhitespace (); 1479 parseDefault (elementName, name, type, enum); 1480 } 1481 1482 1483 1493 private int readAttType () 1494 throws Exception 1495 { 1496 if (tryRead ('(')) { 1497 parseEnumeration (false); 1498 return ATTRIBUTE_ENUMERATED; 1499 } else { 1500 String typeString = readNmtoken (true); 1501 if (typeString.equals ("NOTATION")) { 1502 parseNotationType (); 1503 } 1504 Integer type = (Integer) attributeTypeHash.get (typeString); 1505 if (type == null) { 1506 error ("illegal attribute type", typeString, null); 1507 return ATTRIBUTE_UNDECLARED; 1508 } else { 1509 return type.intValue (); 1510 } 1511 } 1512 } 1513 1514 1515 1522 private void parseEnumeration (boolean isNames) 1523 throws Exception 1524 { 1525 dataBufferAppend ('('); 1526 1527 skipWhitespace (); 1529 dataBufferAppend (readNmtoken (isNames)); 1530 skipWhitespace (); 1532 while (!tryRead (')')) { 1533 require ('|', "enumeration value"); 1534 dataBufferAppend ('|'); 1535 skipWhitespace (); 1536 dataBufferAppend (readNmtoken (isNames)); 1537 skipWhitespace (); 1538 } 1539 dataBufferAppend (')'); 1540 } 1541 1542 1543 1551 private void parseNotationType () 1552 throws Exception 1553 { 1554 requireWhitespace (); 1555 require ('(', "NOTATION"); 1556 1557 parseEnumeration (true); 1558 } 1559 1560 1561 1568 private void parseDefault ( 1569 String elementName, 1570 String name, 1571 int type, 1572 String enum 1573 ) throws Exception 1574 { 1575 int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; 1576 String value = null; 1577 int flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK | LIT_DISABLE_PE; 1578 1581 1586 1590 if (tryRead ('#')) { 1591 if (tryRead ("FIXED")) { 1592 valueType = ATTRIBUTE_DEFAULT_FIXED; 1593 requireWhitespace (); 1594 value = readLiteral (flags); 1595 } else if (tryRead ("REQUIRED")) { 1596 valueType = ATTRIBUTE_DEFAULT_REQUIRED; 1597 } else if (tryRead ("IMPLIED")) { 1598 valueType = ATTRIBUTE_DEFAULT_IMPLIED; 1599 } else { 1600 error ("illegal keyword for attribute default value"); 1601 } 1602 } else 1603 value = readLiteral (flags); 1604 setAttribute (elementName, name, type, enum, value, valueType); 1605 } 1606 1607 1608 1622 private void parseConditionalSect () 1623 throws Exception 1624 { 1625 skipWhitespace (); 1626 if (tryRead ("INCLUDE")) { 1627 skipWhitespace (); 1628 require ('[', "INCLUDE"); 1629 skipWhitespace (); 1630 while (!tryRead ("]]>")) { 1631 parseMarkupdecl (); 1632 skipWhitespace (); 1633 } 1634 } else if (tryRead ("IGNORE")) { 1635 skipWhitespace (); 1636 require ('[', "IGNORE"); 1637 int nesting = 1; 1638 char c; 1639 expandPE = false; 1640 for (int nest = 1; nest > 0;) { 1641 c = readCh (); 1642 switch (c) { 1643 case '<': 1644 if (tryRead ("![")) { 1645 nest++; 1646 } 1647 case ']': 1648 if (tryRead ("]>")) { 1649 nest--; 1650 } 1651 } 1652 } 1653 expandPE = true; 1654 } else { 1655 error ("conditional section must begin with INCLUDE or IGNORE"); 1656 } 1657 } 1658 1659 1660 1667 private void parseCharRef () 1668 throws SAXException, IOException 1669 { 1670 int value = 0; 1671 char c; 1672 1673 if (tryRead ('x')) { 1674loop1: 1675 while (true) { 1676 c = readCh (); 1677 switch (c) { 1678 case '0': 1679 case '1': 1680 case '2': 1681 case '3': 1682 case '4': 1683 case '5': 1684 case '6': 1685 case '7': 1686 case '8': 1687 case '9': 1688 case 'a': 1689 case 'A': 1690 case 'b': 1691 case 'B': 1692 case 'c': 1693 case 'C': 1694 case 'd': 1695 case 'D': 1696 case 'e': 1697 case 'E': 1698 case 'f': 1699 case 'F': 1700 value *= 16; 1701 value += Integer.parseInt (new Character (c).toString (), 1702 16); 1703 break; 1704 case ';': 1705 break loop1; 1706 default: 1707 error ("illegal character in character reference", c, null); 1708 break loop1; 1709 } 1710 } 1711 } else { 1712loop2: 1713 while (true) { 1714 c = readCh (); 1715 switch (c) { 1716 case '0': 1717 case '1': 1718 case '2': 1719 case '3': 1720 case '4': 1721 case '5': 1722 case '6': 1723 case '7': 1724 case '8': 1725 case '9': 1726 value *= 10; 1727 value += Integer.parseInt (new Character (c).toString (), 1728 10); 1729 break; 1730 case ';': 1731 break loop2; 1732 default: 1733 error ("illegal character in character reference", c, null); 1734 break loop2; 1735 } 1736 } 1737 } 1738 1739 if ((value < 0x0020 1741 && ! (value == '\n' || value == '\t' || value == '\r')) 1742 || (value >= 0xD800 && value <= 0xDFFF) 1743 || value == 0xFFFE || value == 0xFFFF 1744 || value > 0x0010ffff) 1745 error ("illegal XML character reference U+" 1746 + Integer.toHexString (value)); 1747 1748 if (value <= 0x0000ffff) { 1751 dataBufferAppend ((char) value); 1753 } else if (value <= 0x0010ffff) { 1754 value -= 0x10000; 1755 dataBufferAppend ((char) (0xd800 | (value >> 10))); 1757 dataBufferAppend ((char) (0xdc00 | (value & 0x0003ff))); 1758 } else { 1759 error ("character reference " + value + " is too large for UTF-16", 1761 new Integer (value).toString (), null); 1762 } 1763 } 1764 1765 1766 1774 private void parseEntityRef (boolean externalAllowed) 1775 throws SAXException, IOException 1776 { 1777 String name; 1778 1779 name = readNmtoken (true); 1780 require (';', "entity reference"); 1781 switch (getEntityType (name)) { 1782 case ENTITY_UNDECLARED: 1783 error ("reference to undeclared entity", name, null); 1784 break; 1785 case ENTITY_INTERNAL: 1786 pushString (name, getEntityValue (name)); 1787 break; 1788 case ENTITY_TEXT: 1789 if (externalAllowed) { 1790 pushURL (name, getEntityPublicId (name), 1791 getEntitySystemId (name), 1792 null, null, null, true); 1793 } else { 1794 error ("reference to external entity in attribute value.", 1795 name, null); 1796 } 1797 break; 1798 case ENTITY_NDATA: 1799 if (externalAllowed) { 1800 error ("unparsed entity reference in content", name, null); 1801 } else { 1802 error ("reference to external entity in attribute value.", 1803 name, null); 1804 } 1805 break; 1806 } 1807 } 1808 1809 1810 1817 private void parsePEReference () 1818 throws SAXException, IOException 1819 { 1820 String name; 1821 1822 name = "%" + readNmtoken (true); 1823 require (';', "parameter entity reference"); 1824 switch (getEntityType (name)) { 1825 case ENTITY_UNDECLARED: 1826 1831 break; 1832 case ENTITY_INTERNAL: 1833 if (inLiteral) 1834 pushString (name, getEntityValue (name)); 1835 else 1836 pushString (name, ' ' + getEntityValue (name) + ' '); 1837 break; 1838 case ENTITY_TEXT: 1839 if (!inLiteral) 1840 pushString (null, " "); 1841 pushURL (name, getEntityPublicId (name), 1842 getEntitySystemId (name), 1843 null, null, null, true); 1844 if (!inLiteral) 1845 pushString (null, " "); 1846 break; 1847 } 1848 } 1849 1850 1864 private void parseEntityDecl () 1865 throws Exception 1866 { 1867 boolean peFlag = false; 1868 1869 expandPE = false; 1871 requireWhitespace (); 1872 if (tryRead ('%')) { 1873 peFlag = true; 1874 requireWhitespace (); 1875 } 1876 expandPE = true; 1877 1878 String name = readNmtoken (true); 1881 if (peFlag) { 1882 name = "%" + name; 1883 } 1884 1885 requireWhitespace (); 1887 char c = readCh (); 1888 unread (c); 1889 if (c == '"' || c == '\'') { 1890 String value = readLiteral (0); 1893 setInternalEntity (name, value); 1894 } else { 1895 String[] ids = readExternalIds (false); 1897 if (ids [1] == null) { 1898 error ("system identifer missing", name, null); 1899 } 1900 1901 boolean white = tryWhitespace (); 1903 if (!peFlag && tryRead ("NDATA")) { 1904 if (!white) 1905 error ("whitespace required before NDATA"); 1906 requireWhitespace (); 1907 String notationName = readNmtoken (true); 1908 setExternalDataEntity (name, ids [0], ids [1], notationName); 1909 } else { 1910 setExternalTextEntity (name, ids [0], ids [1]); 1911 } 1912 } 1913 1914 skipWhitespace (); 1916 require ('>', "NDATA"); 1917 } 1918 1919 1920 1929 private void parseNotationDecl () 1930 throws Exception 1931 { 1932 String nname, ids[]; 1933 1934 1935 requireWhitespace (); 1936 nname = readNmtoken (true); 1937 1938 requireWhitespace (); 1939 1940 ids = readExternalIds (true); 1942 if (ids [0] == null && ids [1] == null) { 1943 error ("external identifer missing", nname, null); 1944 } 1945 1946 setNotation (nname, ids [0], ids [1]); 1948 1949 skipWhitespace (); 1950 require ('>', "notation declaration"); 1951 } 1952 1953 1954 1960 private void parseCharData () 1961 throws Exception 1962 { 1963 char c; 1964 1965 if (USE_CHEATS) { 1971 int lineAugment = 0; 1972 int columnAugment = 0; 1973 1974loop: 1975 for (int i = readBufferPos; i < readBufferLength; i++) { 1976 1977 switch (c = readBuffer [i]) { 1978 case '\n': 1979 lineAugment++; 1980 columnAugment = 0; 1981 break; 1982 case '&': 1983 case '<': 1984 int start = readBufferPos; 1985 columnAugment++; 1986 readBufferPos = i; 1987 if (lineAugment > 0) { 1988 line += lineAugment; 1989 column = columnAugment; 1990 } else { 1991 column += columnAugment; 1992 } 1993 dataBufferAppend (readBuffer, start, i - start); 1994 return; 1995 case ']': 1996 if ((i + 2) < readBufferLength) { 1998 if (readBuffer [i + 1] == ']' 1999 && readBuffer [i + 2] == '>') { 2000 error ("character data may not contain ']]>'"); 2001 } 2002 } 2003 columnAugment++; 2004 break; 2005 default: 2006 if (c < 0x0020 || c > 0xFFFD) 2007 error ("illegal XML character U+" 2008 + Integer.toHexString (c)); 2009 case '\r': 2011 case '\t': 2012 columnAugment++; 2013 } 2014 } 2015 } 2016 2017 2020 int closeSquareBracketCount = 0; 2021 while (true) { 2022 c = readCh (); 2023 switch (c) { 2024 case '<': 2025 case '&': 2026 unread (c); 2027 return; 2028 case ']': 2029 closeSquareBracketCount++; 2030 dataBufferAppend(c); 2031 break; 2032 case '>': 2033 if (closeSquareBracketCount>=2) { 2034 error ("']]>' is not allowed here"); 2036 break; 2037 } 2038 default: 2040 closeSquareBracketCount=0; 2041 dataBufferAppend (c); 2042 break; 2043 } 2044 } 2045 } 2046 2047 2048 2052 2055 private void requireWhitespace () 2056 throws SAXException, IOException 2057 { 2058 char c = readCh (); 2059 if (isWhitespace (c)) { 2060 skipWhitespace (); 2061 } else { 2062 error ("whitespace required", c, null); 2063 } 2064 } 2065 2066 2067 2070 private void parseWhitespace () throws Exception 2072 { 2073 char c = readCh (); 2074 while (isWhitespace (c)) { 2075 dataBufferAppend (c); 2076 c = readCh (); 2077 } 2078 unread (c); 2079 } 2080 2081 2082 2088 private void skipWhitespace () 2089 throws SAXException, IOException 2090 { 2091 if (USE_CHEATS) { 2096 int lineAugment = 0; 2097 int columnAugment = 0; 2098 2099loop: 2100 for (int i = readBufferPos; i < readBufferLength; i++) { 2101 switch (readBuffer [i]) { 2102 case ' ': 2103 case '\t': 2104 case '\r': 2105 columnAugment++; 2106 break; 2107 case '\n': 2108 lineAugment++; 2109 columnAugment = 0; 2110 break; 2111 case '%': 2112 if (expandPE) 2113 break loop; 2114 default: 2116 readBufferPos = i; 2117 if (lineAugment > 0) { 2118 line += lineAugment; 2119 column = columnAugment; 2120 } else { 2121 column += columnAugment; 2122 } 2123 return; 2124 } 2125 } 2126 } 2127 2128 char c = readCh (); 2130 while (isWhitespace (c)) { 2131 c = readCh (); 2132 } 2133 unread (c); 2134 } 2135 2136 2137 2144 private String readNmtoken (boolean isName) 2145 throws SAXException, IOException 2146 { 2147 char c; 2148 2149 if (USE_CHEATS) { 2150loop: 2151 for (int i = readBufferPos; i < readBufferLength; i++) { 2152 c = readBuffer [i]; 2153 switch (c) { 2154 case '%': 2155 if (expandPE) 2156 break loop; 2157 2159 case '<': case '>': case '&': 2161 case ',': case '|': case '*': case '+': case '?': 2162 case ')': 2163 case '=': 2164 case '\'': case '"': 2165 case '[': 2166 case ' ': case '\t': case '\r': case '\n': 2167 case ';': 2168 case '/': 2169 int start = readBufferPos; 2170 if (i == start) 2171 error ("name expected", readBuffer [i], null); 2172 readBufferPos = i; 2173 return intern (readBuffer, start, i - start); 2174 2175 default: 2176 if (i == readBufferPos && isName) { 2179 if (!Character.isUnicodeIdentifierStart (c) 2180 && c != ':' && c != '_') 2181 error ("Not a name start character, U+" 2182 + Integer.toHexString (c)); 2183 } else if (!Character.isUnicodeIdentifierPart (c) 2184 && c != '-' && c != ':' && c != '_' && c != '.' 2185 && !isExtender (c)) 2186 error ("Not a name character, U+" 2187 + Integer.toHexString (c)); 2188 } 2189 } 2190 } 2191 2192 nameBufferPos = 0; 2193 2194 loop: 2196 while (true) { 2197 c = readCh (); 2198 switch (c) { 2199 case '%': 2200 case '<': case '>': case '&': 2201 case ',': case '|': case '*': case '+': case '?': 2202 case ')': 2203 case '=': 2204 case '\'': case '"': 2205 case '[': 2206 case ' ': case '\t': case '\n': case '\r': 2207 case ';': 2208 case '/': 2209 unread (c); 2210 if (nameBufferPos == 0) { 2211 error ("name expected"); 2212 } 2213 if (isName 2215 && !Character.isUnicodeIdentifierStart ( 2216 nameBuffer [0]) 2217 && ":_".indexOf (nameBuffer [0]) == -1) 2218 error ("Not a name start character, U+" 2219 + Integer.toHexString (nameBuffer [0])); 2220 String s = intern (nameBuffer, 0, nameBufferPos); 2221 nameBufferPos = 0; 2222 return s; 2223 default: 2224 2226 if ((nameBufferPos != 0 || !isName) 2227 && !Character.isUnicodeIdentifierPart (c) 2228 && ":-_.".indexOf (c) == -1 2229 && !isExtender (c)) 2230 error ("Not a name character, U+" 2231 + Integer.toHexString (c)); 2232 if (nameBufferPos >= nameBuffer.length) 2233 nameBuffer = 2234 (char[]) extendArray (nameBuffer, 2235 nameBuffer.length, nameBufferPos); 2236 nameBuffer [nameBufferPos++] = c; 2237 } 2238 } 2239 } 2240 2241 private static boolean isExtender (char c) 2242 { 2243 return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387 2245 || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005 2246 || (c >= 0x3031 && c <= 0x3035) 2247 || (c >= 0x309d && c <= 0x309e) 2248 || (c >= 0x30fc && c <= 0x30fe); 2249 } 2250 2251 2252 2265 private String readLiteral (int flags) 2266 throws SAXException, IOException 2267 { 2268 char delim, c; 2269 int startLine = line; 2270 boolean saved = expandPE; 2271 2272 delim = readCh (); 2274 if (delim != '"' && delim != '\'' && delim != (char) 0) { 2275 error ("expected '\"' or \"'\"", delim, null); 2276 return null; 2277 } 2278 inLiteral = true; 2279 if ((flags & LIT_DISABLE_PE) != 0) 2280 expandPE = false; 2281 2282 char ourBuf [] = readBuffer; 2286 2287 try { 2289 c = readCh (); 2290loop: 2291 while (! (c == delim && readBuffer == ourBuf)) { 2292 switch (c) { 2293 case '\n': 2296 case '\r': 2297 if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0) 2298 c = ' '; 2299 break; 2300 case '\t': 2301 if ((flags & LIT_ATTRIBUTE) != 0) 2302 c = ' '; 2303 break; 2304 case '&': 2305 c = readCh (); 2306 if (c == '#') { 2309 if ((flags & LIT_DISABLE_CREF) != 0) { 2310 dataBufferAppend ('&'); 2311 continue; 2312 } 2313 parseCharRef (); 2314 2315 } else { 2317 unread (c); 2318 if ((flags & LIT_ENTITY_REF) > 0) { 2320 parseEntityRef (false); 2321 2322 } else if ((flags & LIT_DISABLE_EREF) != 0) { 2324 dataBufferAppend ('&'); 2325 2326 } else { 2328 String name = readNmtoken (true); 2329 require (';', "entity reference"); 2330 if ((flags & LIT_ENTITY_CHECK) != 0 2331 && getEntityType (name) == 2332 ENTITY_UNDECLARED) { 2333 error ("General entity '" + name 2335 + "' must be declared before use"); 2336 } 2337 dataBufferAppend ('&'); 2338 dataBufferAppend (name); 2339 dataBufferAppend (';'); 2340 } 2341 } 2342 c = readCh (); 2343 continue loop; 2344 2345 case '<': 2346 if ((flags & LIT_ATTRIBUTE) != 0) 2349 error ("attribute values may not contain '<'"); 2350 break; 2351 2352 2354 default: 2355 break; 2356 } 2357 dataBufferAppend (c); 2358 c = readCh (); 2359 } 2360 } catch (EOFException e) { 2361 error ("end of input while looking for delimiter (started on line " 2362 + startLine + ')', null, new Character (delim).toString ()); 2363 } 2364 inLiteral = false; 2365 expandPE = saved; 2366 2367 if ((flags & LIT_NORMALIZE) > 0) { 2369 dataBufferNormalize (); 2370 } 2371 2372 return dataBufferToString (); 2374 } 2375 2376 2377 2383 private String[] readExternalIds (boolean inNotation) 2384 throws Exception 2385 { 2386 char c; 2387 String ids[] = new String [2]; 2388 int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; 2389 2390 if (tryRead ("PUBLIC")) { 2391 requireWhitespace (); 2392 ids [0] = readLiteral (LIT_NORMALIZE | LIT_PUBID | flags); 2393 if (inNotation) { 2394 skipWhitespace (); 2395 c = readCh (); 2396 unread (c); 2397 if (c == '"' || c == '\'') { 2398 ids [1] = readLiteral (flags); 2399 } 2400 } else { 2401 requireWhitespace (); 2402 ids [1] = readLiteral (flags); 2403 } 2404 2405 for (int i = 0; i < ids [0].length (); i++) { 2406 c = ids [0].charAt (i); 2407 if (c >= 'a' && c <= 'z') 2408 continue; 2409 if (c >= 'A' && c <= 'Z') 2410 continue; 2411 if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf (c) != -1) 2412 continue; 2413 error ("illegal PUBLIC id character U+" 2414 + Integer.toHexString (c)); 2415 } 2416 } else if (tryRead ("SYSTEM")) { 2417 requireWhitespace (); 2418 ids [1] = readLiteral (flags); 2419 } 2420 2421 2425 return ids; 2426 } 2427 2428 2429 2437 private final boolean isWhitespace (char c) 2438 { 2439 if (c > 0x20) 2440 return false; 2441 if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d) 2442 return true; 2443 return false; } 2445 2446 2447 2451 2452 2455 private void dataBufferAppend (char c) 2456 { 2457 if (dataBufferPos >= dataBuffer.length) 2459 dataBuffer = 2460 (char[]) extendArray (dataBuffer, 2461 dataBuffer.length, dataBufferPos); 2462 dataBuffer [dataBufferPos++] = c; 2463 } 2464 2465 2466 2469 private void dataBufferAppend (String s) 2470 { 2471 dataBufferAppend (s.toCharArray (), 0, s.length ()); 2472 } 2473 2474 2475 2478 private void dataBufferAppend (char ch[], int start, int length) 2479 { 2480 dataBuffer = (char[]) 2481 extendArray (dataBuffer, dataBuffer.length, 2482 dataBufferPos + length); 2483 2484 System.arraycopy (ch, start, dataBuffer, dataBufferPos, length); 2485 dataBufferPos += length; 2486 } 2487 2488 2489 2492 private void dataBufferNormalize () 2493 { 2494 int i = 0; 2495 int j = 0; 2496 int end = dataBufferPos; 2497 2498 while (j < end && dataBuffer [j] == ' ') { 2500 j++; 2501 } 2502 2503 while (end > j && dataBuffer [end - 1] == ' ') { 2505 end --; 2506 } 2507 2508 while (j < end) { 2510 2511 char c = dataBuffer [j++]; 2512 2513 if (c == ' ') { 2516 while (j < end && dataBuffer [j++] == ' ') {} 2517 2518 dataBuffer [i++] = ' '; 2519 dataBuffer [i++] = dataBuffer [j - 1]; 2520 } else { 2521 dataBuffer [i++] = c; 2522 } 2523 } 2524 2525 dataBufferPos = i; 2527 } 2528 2529 2530 2533 private String dataBufferToString () 2534 { 2535 String s = new String (dataBuffer, 0, dataBufferPos); 2536 dataBufferPos = 0; 2537 return s; 2538 } 2539 2540 2541 2545 private void dataBufferFlush () 2546 throws SAXException 2547 { 2548 if (currentElementContent == CONTENT_ELEMENTS 2549 && dataBufferPos > 0 2550 && !inCDATA 2551 ) { 2552 for (int i = 0; i < dataBufferPos; i++) { 2555 if (!isWhitespace (dataBuffer [i])) { 2556 handler.charData (dataBuffer, 0, dataBufferPos); 2557 dataBufferPos = 0; 2558 } 2559 } 2560 if (dataBufferPos > 0) { 2561 handler.ignorableWhitespace (dataBuffer, 0, dataBufferPos); 2562 dataBufferPos = 0; 2563 } 2564 } else if (dataBufferPos > 0) { 2565 handler.charData (dataBuffer, 0, dataBufferPos); 2566 dataBufferPos = 0; 2567 } 2568 } 2569 2570 2571 2577 private void require (String delim, String context) 2578 throws SAXException, IOException 2579 { 2580 int length = delim.length (); 2581 char ch []; 2582 2583 if (length < dataBuffer.length) { 2584 ch = dataBuffer; 2585 delim.getChars (0, length, ch, 0); 2586 } else 2587 ch = delim.toCharArray (); 2588 2589 if (USE_CHEATS 2590 && length <= (readBufferLength - readBufferPos)) { 2591 int offset = readBufferPos; 2592 2593 for (int i = 0; i < length; i++, offset++) 2594 if (ch [i] != readBuffer [offset]) 2595 error ("unexpected characters in " + context, null, delim); 2596 readBufferPos = offset; 2597 2598 } else { 2599 for (int i = 0; i < length; i++) 2600 require (ch [i], delim); 2601 } 2602 } 2603 2604 2605 2608 private void require (char delim, String after) 2609 throws SAXException, IOException 2610 { 2611 char c = readCh (); 2612 2613 if (c != delim) { 2614 error ("unexpected character after " + after, c, delim+""); 2615 } 2616 } 2617 2618 2619 2635 public String intern (char ch[], int start, int length) 2636 { 2637 int index = 0; 2638 int hash = 0; 2639 Object bucket []; 2640 2641 for (int i = start; i < start + length; i++) 2643 hash = 31 * hash + ch [i]; 2644 hash = (hash & 0x7fffffff) % SYMBOL_TABLE_LENGTH; 2645 2646 if ((bucket = symbolTable [hash]) == null) { 2648 bucket = new Object [8]; 2650 2651 } else { 2654 while (index < bucket.length) { 2655 char chFound [] = (char []) bucket [index]; 2656 2657 if (chFound == null) 2659 break; 2660 2661 if (chFound.length == length) { 2663 for (int i = 0; i < chFound.length; i++) { 2664 if (ch [start + i] != chFound [i]) { 2666 break; 2667 } else if (i == length - 1) { 2668 return (String) bucket [index + 1]; 2670 } 2671 } 2672 } 2673 index += 2; 2674 } 2675 2677 bucket = (Object []) extendArray (bucket, bucket.length, index); 2679 } 2680 symbolTable [hash] = bucket; 2681 2682 String s = new String (ch, start, length).intern (); 2685 bucket [index] = s.toCharArray (); 2686 bucket [index + 1] = s; 2687 return s; 2688 } 2689 2690 2691 2695 private Object extendArray (Object array, int currentSize, int requiredSize) 2696 { 2697 if (requiredSize < currentSize) { 2698 return array; 2699 } else { 2700 Object newArray = null; 2701 int newSize = currentSize * 2; 2702 2703 if (newSize <= requiredSize) 2704 newSize = requiredSize + 1; 2705 2706 if (array instanceof char[]) 2707 newArray = new char [newSize]; 2708 else if (array instanceof Object[]) 2709 newArray = new Object [newSize]; 2710 else 2711 throw new RuntimeException (); 2712 2713 System.arraycopy (array, 0, newArray, 0, currentSize); 2714 return newArray; 2715 } 2716 } 2717 2718 2719 2723 2724 2728 2737 public Enumeration declaredElements () 2738 { 2739 return elementInfo.keys (); 2740 } 2741 2742 2743 2754 private int getContentType (Object element [], int defaultType) 2755 { 2756 int retval; 2757 2758 if (element == null) 2759 return defaultType; 2760 retval = ((Integer) element [0]).intValue (); 2761 if (retval == CONTENT_UNDECLARED) 2762 retval = defaultType; 2763 return retval; 2764 } 2765 2766 2767 2778 public int getElementContentType (String name) 2779 { 2780 Object element [] = (Object []) elementInfo.get (name); 2781 return getContentType (element, CONTENT_UNDECLARED); 2782 } 2783 2784 2785 2793 public String getElementContentModel (String name) 2794 { 2795 Object element[] = (Object[]) elementInfo.get (name); 2796 if (element == null) { 2797 return null; 2798 } else { 2799 return (String) element [1]; 2800 } 2801 } 2802 2803 2804 2811 private void setElement (String name, int contentType, 2812 String contentModel, Hashtable attributes) 2813 throws Exception 2814 { 2815 Object element[] = (Object []) elementInfo.get (name); 2816 2817 if (element == null) { 2819 element = new Object [3]; 2820 element [0] = new Integer (contentType); 2821 element [1] = contentModel; 2822 element [2] = attributes; 2823 elementInfo.put (name, element); 2824 return; 2825 } 2826 2827 if (contentType != CONTENT_UNDECLARED) { 2829 if (((Integer) element [0]).intValue () == CONTENT_UNDECLARED) { 2831 element [0] = new Integer (contentType); 2832 element [1] = contentModel; 2833 } else { 2834 } 2837 } 2838 2839 else if (attributes != null) { 2841 element [2] = attributes; 2842 } 2843 2844 } 2845 2846 2847 2851 private Hashtable getElementAttributes (String name) 2852 { 2853 Object element[] = (Object[]) elementInfo.get (name); 2854 if (element == null) { 2855 return null; 2856 } else { 2857 return (Hashtable) element [2]; 2858 } 2859 } 2860 2861 2862 2863 2867 2879 private Enumeration declaredAttributes (Object element []) 2880 { 2881 Hashtable attlist; 2882 2883 if (element == null) 2884 return null; 2885 if ((attlist = (Hashtable) element [2]) == null) 2886 return null; 2887 return attlist.keys (); 2888 } 2889 2890 2902 public Enumeration declaredAttributes (String elname) 2903 { 2904 return declaredAttributes ((Object []) elementInfo.get (elname)); 2905 } 2906 2907 2908 2925 public int getAttributeType (String name, String aname) 2926 { 2927 Object attribute[] = getAttribute (name, aname); 2928 if (attribute == null) { 2929 return ATTRIBUTE_UNDECLARED; 2930 } else { 2931 return ((Integer) attribute [0]).intValue (); 2932 } 2933 } 2934 2935 2936 2944 public String getAttributeEnumeration (String name, String aname) 2945 { 2946 Object attribute[] = getAttribute (name, aname); 2947 if (attribute == null) { 2948 return null; 2949 } else { 2950 return (String) attribute [3]; 2951 } 2952 } 2953 2954 2955 2963 public String getAttributeDefaultValue (String name, String aname) 2964 { 2965 Object attribute[] = getAttribute (name, aname); 2966 if (attribute == null) { 2967 return null; 2968 } else { 2969 return (String) attribute [1]; 2970 } 2971 } 2972 2973 2974 2983 public String getAttributeExpandedValue (String name, String aname) 2984 throws Exception 2985 { 2986 Object attribute[] = getAttribute (name, aname); 2987 2988 if (attribute == null) { 2989 return null; 2990 } else if (attribute [4] == null && attribute [1] != null) { 2991 char buf [] = new char [1]; 2994 int flags = LIT_ENTITY_REF | LIT_ATTRIBUTE; 2995 int type = getAttributeType (name, aname); 2996 2997 if (type != ATTRIBUTE_CDATA && type != ATTRIBUTE_UNDECLARED) 2998 flags |= LIT_NORMALIZE; 2999 buf [0] = '"'; 3000 pushCharArray (null, buf, 0, 1); 3001 pushString (null, (String) attribute [1]); 3002 pushCharArray (null, buf, 0, 1); 3003 attribute [4] = readLiteral (flags); 3004 } 3005 return (String) attribute [4]; 3006 } 3007 3008 3009 3016 public int getAttributeDefaultValueType (String name, String aname) 3017 { 3018 Object attribute[] = getAttribute (name, aname); 3019 if (attribute == null) { 3020 return ATTRIBUTE_DEFAULT_UNDECLARED; 3021 } else { 3022 return ((Integer) attribute [2]).intValue (); 3023 } 3024 } 3025 3026 3027 3034 private void setAttribute (String elName, String name, int type, 3035 String enumeration, 3036 String value, int valueType) 3037 throws Exception 3038 { 3039 Hashtable attlist; 3040 3041 attlist = getElementAttributes (elName); 3043 if (attlist == null) { 3044 attlist = new Hashtable (); 3045 } 3046 3047 if (attlist.get (name) != null) { 3049 return; 3051 } else { 3052 Object[] attribute = new Object [5]; 3053 attribute [0] = new Integer (type); 3054 attribute [1] = value; 3055 attribute [2] = new Integer (valueType); 3056 attribute [3] = enumeration; 3057 attribute [4] = null; 3058 attlist.put (name, attribute); 3059 3060 setElement (elName, CONTENT_UNDECLARED, null, attlist); 3062 } 3063 } 3064 3065 3066 3070 private Object[] getAttribute (String elName, String name) 3071 { 3072 Hashtable attlist = getElementAttributes (elName); 3073 if (attlist == null) { 3074 return null; 3075 } 3076 3077 return (Object[]) attlist.get (name); 3078 } 3079 3080 3081 3085 3096 public Enumeration declaredEntities () 3097 { 3098 return entityInfo.keys (); 3099 } 3100 3101 3102 3110 public int getEntityType (String ename) 3111 { 3112 Object entity[] = (Object[]) entityInfo.get (ename); 3113 if (entity == null) { 3114 return ENTITY_UNDECLARED; 3115 } else { 3116 return ((Integer) entity [0]).intValue (); 3117 } 3118 } 3119 3120 3121 3130 public String getEntityPublicId (String ename) 3131 { 3132 Object entity[] = (Object[]) entityInfo.get (ename); 3133 if (entity == null) { 3134 return null; 3135 } else { 3136 return (String) entity [1]; 3137 } 3138 } 3139 3140 3141 3151 public String getEntitySystemId (String ename) 3152 { 3153 Object entity[] = (Object[]) entityInfo.get (ename); 3154 if (entity == null) { 3155 return null; 3156 } else { 3157 try { 3158 String relativeURI = (String)entity [2]; 3159 URL baseURI = (URL)entity [5]; 3160 if (baseURI==null) return relativeURI; 3161 URL absoluteURI = new URL( baseURI, relativeURI ); 3162 return absoluteURI.toString(); 3163 } catch (IOException err) { 3164 return (String)entity [2]; 3167 } 3168 } 3169 } 3170 3171 3172 3179 public String getEntityValue (String ename) 3180 { 3181 Object entity[] = (Object[]) entityInfo.get (ename); 3182 if (entity == null) { 3183 return null; 3184 } else { 3185 return (String) entity [3]; 3186 } 3187 } 3188 3189 3190 3198 public String getEntityNotationName (String eName) 3199 { 3200 Object entity[] = (Object[]) entityInfo.get (eName); 3201 if (entity == null) { 3202 return null; 3203 } else { 3204 return (String) entity [4]; 3205 } 3206 } 3207 3208 3209 3212 private void setInternalEntity (String eName, String value) 3213 { 3214 setEntity (eName, ENTITY_INTERNAL, null, null, value, null); 3215 } 3216 3217 3218 3221 private void setExternalDataEntity (String eName, String pubid, 3222 String sysid, String nName) 3223 { 3224 setEntity (eName, ENTITY_NDATA, pubid, sysid, null, nName); 3225 } 3226 3227 3228 3231 private void setExternalTextEntity (String eName, 3232 String pubid, String sysid) 3233 { 3234 setEntity (eName, ENTITY_TEXT, pubid, sysid, null, null); 3235 } 3236 3237 3238 3241 private void setEntity (String eName, int eClass, 3242 String pubid, String sysid, 3243 String value, String nName) 3244 { 3245 Object entity[]; 3246 3247 if (entityInfo.get (eName) == null) { 3248 entity = new Object [6]; 3249 entity [0] = new Integer (eClass); 3250 entity [1] = pubid; 3251 entity [2] = sysid; 3252 entity [3] = value; 3253 entity [4] = nName; 3254 entity [5] = (externalEntity == null ? null : externalEntity.getURL()); 3255 3257 entityInfo.put (eName, entity); 3258 } 3259 } 3260 3261 3262 3266 3274 public Enumeration declaredNotations () 3275 { 3276 return notationInfo.keys (); 3277 } 3278 3279 3280 3290 public String getNotationPublicId (String nname) 3291 { 3292 Object notation[] = (Object[]) notationInfo.get (nname); 3293 if (notation == null) { 3294 return null; 3295 } else { 3296 return (String) notation [0]; 3297 } 3298 } 3299 3300 3301 3310 public String getNotationSystemId (String nname) 3311 { 3312 Object notation[] = (Object[]) notationInfo.get (nname); 3313 if (notation == null) { 3314 return null; 3315 } else { 3316 return (String) notation [1]; 3317 } 3318 } 3319 3320 3321 3327 private void setNotation (String nname, String pubid, String sysid) 3328 throws Exception 3329 { 3330 Object notation[]; 3331 3332 if (notationInfo.get (nname) == null) { 3333 notation = new Object [2]; 3334 notation [0] = pubid; 3335 notation [1] = sysid; 3336 notationInfo.put (nname, notation); 3337 } else { 3338 } 3341 } 3342 3343 3344 3348 3349 3352 public int getLineNumber () 3353 { 3354 return line; 3355 } 3356 3357 3358 3361 public int getColumnNumber () 3362 { 3363 return column; 3364 } 3365 3366 3367 3371 3372 3390 private char readCh () 3391 throws SAXException, IOException 3392 { 3393 3394 while (readBufferPos >= readBufferLength) { 3399 switch (sourceType) { 3400 case INPUT_READER: 3401 case INPUT_STREAM: 3402 readDataChunk (); 3403 while (readBufferLength < 1) { 3404 popInput (); 3405 if (readBufferLength < 1) { 3406 readDataChunk (); 3407 } 3408 } 3409 break; 3410 3411 default: 3412 3413 popInput (); 3414 break; 3415 } 3416 } 3417 3418 char c = readBuffer [readBufferPos++]; 3419 3420 if (c == '\n') { 3421 line++; 3422 column = 0; 3423 } else { 3424 if (c == '<') { 3425 ; 3426 } else if ((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD) 3427 error ("illegal XML character U+" 3428 + Integer.toHexString (c)); 3429 3430 else if (c == '%' && expandPE) { 3435 if (peIsError && entityStack.size()==1) 3436 error ("PE reference within declaration in internal subset."); 3438 parsePEReference (); 3439 return readCh (); 3440 } 3441 column++; 3442 } 3443 3444 return c; 3445 } 3446 3447 3448 3463 private void unread (char c) 3464 throws SAXException 3465 { 3466 if (c == '\n') { 3468 line--; 3469 column = -1; 3470 } 3471 if (readBufferPos > 0) { 3472 readBuffer [--readBufferPos] = c; 3473 } else { 3474 pushString (null, new Character (c).toString ()); 3475 } 3476 } 3477 3478 3479 3489 private void unread (char ch[], int length) 3490 throws SAXException 3491 { 3492 for (int i = 0; i < length; i++) { 3493 if (ch [i] == '\n') { 3494 line--; 3495 column = -1; 3496 } 3497 } 3498 if (length < readBufferPos) { 3499 readBufferPos -= length; 3500 } else { 3501 pushCharArray (null, ch, 0, length); 3502 sourceType = INPUT_BUFFER; 3503 } 3504 } 3505 3506 3507 3524 private void pushURL ( 3525 String ename, 3526 String publicId, 3527 String systemId, 3528 Reader reader, 3529 InputStream stream, 3530 String encoding, 3531 boolean isAbsolute 3532 ) throws SAXException, IOException 3533 { 3534 boolean ignoreEncoding = false; 3535 3536 pushInput (ename); 3538 3539 readBuffer = new char [READ_BUFFER_MAX + 4]; 3542 readBufferPos = 0; 3543 readBufferLength = 0; 3544 readBufferOverflow = -1; 3545 is = null; 3546 line = 1; 3547 column = 0; 3548 currentByteCount = 0; 3549 3550 if (!isAbsolute) { 3551 3552 try { 3556 if (systemId != null && externalEntity != null) { 3557 systemId = new URL (externalEntity.getURL (), systemId).toString (); 3558 } else if (baseURI != null) { 3559 systemId = new URL (new URL (baseURI), systemId).toString (); 3560 } 3562 } catch(java.io.IOException err) { 3563 popInput(); 3564 error("Invalid URL " + systemId + " (" + err.getMessage() + ")"); 3565 } 3566 } 3567 3568 if (reader == null && stream == null && systemId != null) { 3572 Object input = null; 3573 try { 3574 input = handler.resolveEntity (publicId, systemId); 3575 } catch (java.io.IOException err) { 3576 popInput(); 3577 error("Failure resolving entity " + systemId + " (" + err.getMessage() + ")"); 3578 } 3579 if (input != null) { 3580 if (input instanceof String) { 3581 systemId = (String) input; 3582 isAbsolute = true; 3583 } else if (input instanceof InputStream) { 3584 stream = (InputStream) input; 3585 } else if (input instanceof Reader) { 3586 reader = (Reader) input; 3587 } 3588 } 3589 } 3590 3591 if (systemId != null) { 3593 handler.startExternalEntity (systemId); 3594 } else { 3595 handler.startExternalEntity ("[unidentified data stream]"); 3596 } 3597 3598 if (reader != null) { 3601 sourceType = INPUT_READER; 3602 this.reader = reader; 3603 tryEncodingDecl (true); 3604 return; 3605 } 3606 3607 sourceType = INPUT_STREAM; 3610 if (stream != null) { 3611 is = stream; 3612 } else { 3613 URL url = new URL (systemId); 3615 try { 3616 externalEntity = url.openConnection (); 3617 externalEntity.connect (); 3618 is = externalEntity.getInputStream (); 3619 } catch (java.io.IOException err) { 3620 popInput(); 3621 error("Cannot read from " + systemId + 3622 (systemId.equals(err.getMessage()) ? "" : " (" + err.getMessage() + ")")); 3623 } 3624 } 3625 3626 if (!is.markSupported ()) { 3629 is = new BufferedInputStream (is); 3630 } 3631 3632 if (encoding == null && externalEntity != null) { 3634 if (!"file".equals (externalEntity.getURL ().getProtocol ())) { 3638 int temp; 3639 3640 encoding = externalEntity.getContentType (); 3643 3644 if (encoding==null) { 3646 temp = -1; 3647 } else { 3648 temp = encoding.indexOf ("charset"); 3649 } 3650 3651 if (temp < 0) 3655 encoding = null; else { 3657 temp = encoding.indexOf ('=', temp + 7); 3658 encoding = encoding.substring (temp+1); if ((temp = encoding.indexOf (';')) > 0) 3660 encoding = encoding.substring (0, temp); 3661 3662 if ((temp = encoding.indexOf ('(')) > 0) 3664 encoding = encoding.substring (0, temp); 3665 if ((temp = encoding.indexOf ('"')) > 0) 3667 encoding = encoding.substring (temp + 1, 3668 encoding.indexOf ('"', temp + 2)); 3669 encoding.trim (); 3670 } 3671 } 3672 } 3673 3674 if (encoding != null) { 3676 this.encoding = ENCODING_EXTERNAL; 3677 setupDecoding (encoding); 3678 ignoreEncoding = true; 3679 3680 } else { 3682 detectEncoding (); 3683 ignoreEncoding = false; 3684 } 3685 is.mark(100); 3686 3687 try { 3689 tryEncodingDecl (ignoreEncoding); 3690 } catch (EncodingException x) { 3691 encoding = x.getMessage (); 3692 3693 try { 3696 if (sourceType != INPUT_STREAM) 3697 throw x; 3698 3699 is.reset (); 3700 readBufferPos = 0; 3701 readBufferLength = 0; 3702 readBufferOverflow = -1; 3703 line = 1; 3704 currentByteCount = column = 0; 3705 3706 sourceType = INPUT_READER; 3707 this.reader = new InputStreamReader (is, encoding); 3708 is = null; 3709 3710 tryEncodingDecl (true); 3711 3712 } catch (IOException e) { 3713 error ("unsupported text encoding", 3714 encoding, 3715 null); 3716 } 3717 } 3718 } 3719 3720 3721 3735 private String tryEncodingDecl (boolean ignoreEncoding) 3736 throws SAXException, IOException 3737 { 3738 if (tryRead ("<?xml")) { 3740 dataBufferFlush (); 3741 if (tryWhitespace ()) { 3742 if (inputStack.size () > 0) { 3743 return parseTextDecl (ignoreEncoding); 3744 } else { 3745 return parseXMLDecl (ignoreEncoding); 3746 } 3747 } else { 3748 unread ("xml".toCharArray (), 3); 3749 parsePI (); 3750 } 3751 } 3752 return null; 3753 } 3754 3755 3756 3778 private void detectEncoding () 3779 throws SAXException, IOException 3780 { 3781 byte signature[] = new byte [4]; 3782 3783 is.mark (4); 3786 is.read (signature); 3787 is.reset (); 3788 3789 if (tryEncoding (signature, (byte) 0x00, (byte) 0x00, 3793 (byte) 0x00, (byte) 0x3c)) { 3794 encoding = ENCODING_UCS_4_1234; 3797 3798 } else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00, 3799 (byte) 0x00, (byte) 0x00)) { 3800 encoding = ENCODING_UCS_4_4321; 3802 3803 } else if (tryEncoding (signature, (byte) 0x00, (byte) 0x00, 3804 (byte) 0x3c, (byte) 0x00)) { 3805 encoding = ENCODING_UCS_4_2143; 3807 3808 } else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c, 3809 (byte) 0x00, (byte) 0x00)) { 3810 encoding = ENCODING_UCS_4_3412; 3812 3813 } 3816 3817 3820 else if (tryEncoding (signature, (byte)0xef, (byte)0xbb, (byte)0xbf)) { 3821 encoding = ENCODING_UTF_8; 3822 is.read(); is.read(); is.read(); 3823 } 3824 3825 else if (tryEncoding (signature, (byte) 0xfe, (byte) 0xff)) { 3832 encoding = ENCODING_UCS_2_12; 3835 is.read (); is.read (); 3836 3837 } else if (tryEncoding (signature, (byte) 0xff, (byte) 0xfe)) { 3838 encoding = ENCODING_UCS_2_21; 3841 is.read (); is.read (); 3842 3843 } else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c, 3844 (byte) 0x00, (byte) 0x3f)) { 3845 encoding = ENCODING_UCS_2_12; 3848 error ("no byte-order mark for UCS-2 entity"); 3849 3850 } else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00, 3851 (byte) 0x3f, (byte) 0x00)) { 3852 encoding = ENCODING_UCS_2_21; 3855 error ("no byte-order mark for UCS-2 entity"); 3856 } 3857 3858 else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x3f, 3862 (byte) 0x78, (byte) 0x6d)) { 3863 encoding = ENCODING_UTF_8; 3866 read8bitEncodingDeclaration (); 3867 3868 } else { 3869 3872 encoding = ENCODING_UTF_8; 3874 } 3875 } 3876 3877 3878 3889 private static boolean tryEncoding ( 3890 byte sig[], byte b1, byte b2, byte b3, byte b4) 3891 { 3892 return (sig [0] == b1 && sig [1] == b2 3893 && sig [2] == b3 && sig [3] == b4); 3894 } 3895 3896 3897 3906 private static boolean tryEncoding (byte sig[], byte b1, byte b2) 3907 { 3908 return ((sig [0] == b1) && (sig [1] == b2)); 3909 } 3910 3911 3921 private static boolean tryEncoding (byte sig[], byte b1, byte b2, byte b3) 3922 { 3923 return ((sig [0] == b1) && (sig [1] == b2) && (sig [2] == b3)); 3924 } 3925 3926 3934 private void pushString (String ename, String s) 3935 throws SAXException 3936 { 3937 char ch[] = s.toCharArray (); 3938 pushCharArray (ename, ch, 0, ch.length); 3939 } 3940 3941 3942 3955 private void pushCharArray (String ename, char ch[], int start, int length) 3956 throws SAXException 3957 { 3958 pushInput (ename); 3960 sourceType = INPUT_INTERNAL; 3961 readBuffer = ch; 3962 readBufferPos = start; 3963 readBufferLength = length; 3964 readBufferOverflow = -1; 3965 } 3966 3967 3968 3994 private void pushInput (String ename) 3995 throws SAXException 3996 { 3997 Object input[] = new Object [12]; 3998 3999 if (ename != null) { 4001 Enumeration entities = entityStack.elements (); 4002 while (entities.hasMoreElements ()) { 4003 String e = (String) entities.nextElement (); 4004 if (e == ename) { 4005 error ("recursive reference to entity", ename, null); 4006 } 4007 } 4008 } 4009 entityStack.push (ename); 4010 4011 if (sourceType == INPUT_NONE) { 4013 return; 4014 } 4015 4016 input [0] = new Integer (sourceType); 4019 input [1] = externalEntity; 4020 input [2] = readBuffer; 4021 input [3] = new Integer (readBufferPos); 4022 input [4] = new Integer (readBufferLength); 4023 input [5] = new Integer (line); 4024 input [6] = new Integer (encoding); 4025 input [7] = new Integer (readBufferOverflow); 4026 input [8] = is; 4027 input [9] = new Integer (currentByteCount); 4028 input [10] = new Integer (column); 4029 input [11] = reader; 4030 4031 inputStack.push (input); 4033 } 4034 4035 4036 4051 private void popInput () 4052 throws SAXException, IOException 4053 { 4054 String uri; 4055 4056 if (externalEntity != null) 4057 uri = externalEntity.getURL ().toString (); 4058 else 4059 uri = baseURI; 4060 4061 switch (sourceType) { 4062 case INPUT_STREAM: 4063 if (is!=null) { 4064 if (uri != null) { 4065 handler.endExternalEntity (baseURI); 4066 } 4067 is.close (); 4068 } 4069 break; 4070 case INPUT_READER: 4071 if (reader != null) { 4072 if (uri != null) { 4073 handler.endExternalEntity (baseURI); 4074 } 4075 reader.close (); 4076 } 4077 break; 4078 } 4079 4080 if (inputStack.isEmpty ()) { 4083 throw new EOFException ("no more input"); 4084 } 4085 4086 Object[] input = (Object[]) inputStack.pop (); 4087 entityStack.pop (); 4088 4089 sourceType = ((Integer) input [0]).intValue (); 4090 externalEntity = (URLConnection) input [1]; 4091 readBuffer = (char[]) input [2]; 4092 readBufferPos = ((Integer) input [3]).intValue (); 4093 readBufferLength = ((Integer) input [4]).intValue (); 4094 line = ((Integer) input [5]).intValue (); 4095 encoding = ((Integer) input [6]).intValue (); 4096 readBufferOverflow = ((Integer) input [7]).intValue (); 4097 is = (InputStream) input [8]; 4098 currentByteCount = ((Integer) input [9]).intValue (); 4099 column = ((Integer) input [10]).intValue (); 4100 reader = (Reader) input [11]; 4101 } 4102 4103 4104 4115 private boolean tryRead (char delim) 4116 throws SAXException, IOException 4117 { 4118 char c; 4119 4120 c = readCh (); 4122 4123 if (c == delim) { 4126 return true; 4127 } else { 4128 unread (c); 4129 return false; 4130 } 4131 } 4132 4133 4134 4149 private boolean tryRead (String delim) 4150 throws SAXException, IOException 4151 { 4152 char ch[] = delim.toCharArray (); 4153 char c; 4154 4155 4158 for (int i = 0; i < ch.length; i++) { 4159 c = readCh (); 4160 if (c != ch [i]) { 4161 unread (c); 4162 if (i != 0) { 4163 unread (ch, i); 4164 } 4165 return false; 4166 } 4167 } 4168 return true; 4169 } 4170 4171 4172 4173 4180 private boolean tryWhitespace () 4181 throws SAXException, IOException 4182 { 4183 char c; 4184 c = readCh (); 4185 if (isWhitespace (c)) { 4186 skipWhitespace (); 4187 return true; 4188 } else { 4189 unread (c); 4190 return false; 4191 } 4192 } 4193 4194 4195 4204 private void parseUntil (String delim) 4205 throws SAXException, IOException 4206 { 4207 char c; 4208 int startLine = line; 4209 4210 try { 4211 while (!tryRead (delim)) { 4212 c = readCh (); 4213 dataBufferAppend (c); 4214 } 4215 } catch (EOFException e) { 4216 error ("end of input while looking for delimiter " 4217 + "(started on line " + startLine 4218 + ')', null, delim); 4219 } 4220 } 4221 4222 4223 4234 private void read8bitEncodingDeclaration () 4235 throws SAXException, IOException 4236 { 4237 int ch; 4238 readBufferPos = readBufferLength = 0; 4239 4240 while (true) { 4241 ch = is.read (); 4242 readBuffer [readBufferLength++] = (char) ch; 4243 switch (ch) { 4244 case (int) '>': 4245 return; 4246 case - 1: 4247 error ("end of file before end of XML or encoding declaration.", 4248 null, "?>"); 4249 } 4250 if (readBuffer.length == readBufferLength) 4251 error ("unfinished XML or encoding declaration"); 4252 } 4253 } 4254 4255 4256 4260 4261 4274 private void readDataChunk () 4275 throws SAXException, IOException 4276 { 4277 int count; 4278 4279 if (readBufferOverflow > -1) { 4281 readBuffer [0] = (char) readBufferOverflow; 4282 readBufferOverflow = -1; 4283 readBufferPos = 1; 4284 sawCR = true; 4285 } else { 4286 readBufferPos = 0; 4287 sawCR = false; 4288 } 4289 4290 if (sourceType == INPUT_READER) { 4292 count = reader.read (readBuffer, 4293 readBufferPos, READ_BUFFER_MAX - readBufferPos); 4294 if (count < 0) 4295 readBufferLength = readBufferPos; 4296 else 4297 readBufferLength = readBufferPos + count; 4298 if (readBufferLength > 0) 4299 filterCR (count >= 0); 4300 sawCR = false; 4301 return; 4302 } 4303 4304 count = is.read (rawReadBuffer, 0, READ_BUFFER_MAX); 4306 4307 if (count > 0) { 4311 switch (encoding) { 4312 case ENCODING_ASCII: 4314 copyIso8859_1ReadBuffer (count, (char) 0x0080); 4315 break; 4316 case ENCODING_UTF_8: 4317 copyUtf8ReadBuffer (count); 4318 break; 4319 case ENCODING_ISO_8859_1: 4320 copyIso8859_1ReadBuffer (count, (char) 0); 4321 break; 4322 4323 case ENCODING_UCS_2_12: 4325 copyUcs2ReadBuffer (count, 8, 0); 4326 break; 4327 case ENCODING_UCS_2_21: 4328 copyUcs2ReadBuffer (count, 0, 8); 4329 break; 4330 4331 case ENCODING_UCS_4_1234: 4333 copyUcs4ReadBuffer (count, 24, 16, 8, 0); 4334 break; 4335 case ENCODING_UCS_4_4321: 4336 copyUcs4ReadBuffer (count, 0, 8, 16, 24); 4337 break; 4338 case ENCODING_UCS_4_2143: 4339 copyUcs4ReadBuffer (count, 16, 24, 0, 8); 4340 break; 4341 case ENCODING_UCS_4_3412: 4342 copyUcs4ReadBuffer (count, 8, 0, 24, 16); 4343 break; 4344 } 4345 } else 4346 readBufferLength = readBufferPos; 4347 4348 readBufferPos = 0; 4349 4350 if (sawCR) { 4353 filterCR (count >= 0); 4354 sawCR = false; 4355 4356 if (readBufferLength == 0 && count >= 0) 4358 readDataChunk (); 4359 } 4360 4361 if (count > 0) 4362 currentByteCount += count; 4363 } 4364 4365 4366 4374 private void filterCR (boolean moreData) 4375 { 4376 int i, j; 4377 4378 readBufferOverflow = -1; 4379 4380loop: 4381 for (i = j = readBufferPos; j < readBufferLength; i++, j++) { 4382 switch (readBuffer [j]) { 4383 case '\r': 4384 if (j == readBufferLength - 1) { 4385 if (moreData) { 4386 readBufferOverflow = '\r'; 4387 readBufferLength--; 4388 } else readBuffer [i++] = '\n'; 4390 break loop; 4391 } else if (readBuffer [j + 1] == '\n') { 4392 j++; 4393 } 4394 readBuffer [i] = '\n'; 4395 break; 4396 4397 case '\n': 4398 default: 4399 readBuffer [i] = readBuffer [j]; 4400 break; 4401 } 4402 } 4403 readBufferLength = i; 4404 } 4405 4406 4417 private void copyUtf8ReadBuffer (int count) 4418 throws SAXException, IOException 4419 { 4420 int i = 0; 4421 int j = readBufferPos; 4422 int b1; 4423 char c = 0; 4424 4425 4430 4431 while (i < count) { 4432 b1 = rawReadBuffer [i++]; 4433 4434 if (b1 < 0) { 4438 if ((b1 & 0xe0) == 0xc0) { 4439 c = (char) (((b1 & 0x1f) << 6) 4441 | getNextUtf8Byte (i++, count)); 4442 } else if ((b1 & 0xf0) == 0xe0) { 4443 c = (char) (((b1 & 0x0f) << 12) | 4447 (getNextUtf8Byte (i++, count) << 6) | 4448 getNextUtf8Byte (i++, count)); 4449 } else if ((b1 & 0xf8) == 0xf0) { 4450 int iso646 = b1 & 07; 4455 iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); 4456 iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); 4457 iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); 4458 4459 if (iso646 <= 0xffff) { 4460 c = (char) iso646; 4461 } else { 4462 if (iso646 > 0x0010ffff) 4463 encodingError ( 4464 "UTF-8 value out of range for Unicode", 4465 iso646, 0); 4466 iso646 -= 0x010000; 4467 readBuffer [j++] = (char) (0xd800 | (iso646 >> 10)); 4468 readBuffer [j++] = (char) (0xdc00 | (iso646 & 0x03ff)); 4469 continue; 4470 } 4471 } else { 4472 encodingError ( 4475 "invalid UTF-8 byte (check the XML declaration)", 4476 0xff & b1, i); 4477 c = 0; 4479 } 4480 } else { 4481 c = (char) b1; 4484 } 4485 readBuffer [j++] = c; 4486 if (c == '\r') 4487 sawCR = true; 4488 } 4489 readBufferLength = j; 4491 } 4492 4493 4494 4504 private int getNextUtf8Byte (int pos, int count) 4505 throws SAXException, IOException 4506 { 4507 int val; 4508 4509 if (pos < count) { 4512 val = rawReadBuffer [pos]; 4513 } else { 4514 val = is.read (); 4515 if (val == -1) { 4516 encodingError ("unfinished multi-byte UTF-8 sequence at EOF", 4517 -1, pos); 4518 } 4519 } 4520 4521 if ((val & 0xc0) != 0x80) { 4523 encodingError ("bad continuation of multi-byte UTF-8 sequence", 4524 val, pos + 1); 4525 } 4526 4527 return (val & 0x3f); 4529 } 4530 4531 4532 4546 private void copyIso8859_1ReadBuffer (int count, char mask) 4547 throws IOException 4548 { 4549 int i, j; 4550 for (i = 0, j = readBufferPos; i < count; i++, j++) { 4551 char c = (char) (rawReadBuffer [i] & 0xff); 4552 if ((c & mask) != 0) 4553 throw new CharConversionException ("non-ASCII character U+" 4554 + Integer.toHexString (c)); 4555 readBuffer [j] = c; 4556 if (c == '\r') { 4557 sawCR = true; 4558 } 4559 } 4560 readBufferLength = j; 4561 } 4562 4563 4564 4578 private void copyUcs2ReadBuffer (int count, int shift1, int shift2) 4579 throws SAXException 4580 { 4581 int j = readBufferPos; 4582 4583 if (count > 0 && (count % 2) != 0) { 4584 encodingError ("odd number of bytes in UCS-2 encoding", -1, count); 4585 } 4586 if (shift1 == 0) { for (int i = 0; i < count; i += 2) { 4589 char c = (char) (rawReadBuffer [i + 1] << 8); 4590 c |= 0xff & rawReadBuffer [i]; 4591 readBuffer [j++] = c; 4592 if (c == '\r') 4593 sawCR = true; 4594 } 4595 } else { for (int i = 0; i < count; i += 2) { 4597 char c = (char) (rawReadBuffer [i] << 8); 4598 c |= 0xff & rawReadBuffer [i + 1]; 4599 readBuffer [j++] = c; 4600 if (c == '\r') 4601 sawCR = true; 4602 } 4603 } 4604 readBufferLength = j; 4605 } 4606 4607 4608 4628 private void copyUcs4ReadBuffer (int count, int shift1, int shift2, 4629 int shift3, int shift4) 4630 throws SAXException 4631 { 4632 int j = readBufferPos; 4633 4634 if (count > 0 && (count % 4) != 0) { 4635 encodingError ( 4636 "number of bytes in UCS-4 encoding not divisible by 4", 4637 -1, count); 4638 } 4639 for (int i = 0; i < count; i += 4) { 4640 int value = (((rawReadBuffer [i] & 0xff) << shift1) | 4641 ((rawReadBuffer [i + 1] & 0xff) << shift2) | 4642 ((rawReadBuffer [i + 2] & 0xff) << shift3) | 4643 ((rawReadBuffer [i + 3] & 0xff) << shift4)); 4644 if (value < 0x0000ffff) { 4645 readBuffer [j++] = (char) value; 4646 if (value == (int) '\r') { 4647 sawCR = true; 4648 } 4649 } else if (value < 0x0010ffff) { 4650 value -= 0x010000; 4651 readBuffer [j++] = (char) (0xd8 | ((value >> 10) & 0x03ff)); 4652 readBuffer [j++] = (char) (0xdc | (value & 0x03ff)); 4653 } else { 4654 encodingError ("UCS-4 value out of range for Unicode", 4655 value, i); 4656 } 4657 } 4658 readBufferLength = j; 4659 } 4660 4661 4662 4665 private void encodingError (String message, int value, int offset) 4666 throws SAXException 4667 { 4668 String uri; 4669 4670 if (value != -1) { 4671 message = message + " (code: 0x" + 4672 Integer.toHexString (value) + ')'; 4673 } 4674 if (externalEntity != null) { 4675 uri = externalEntity.getURL ().toString (); 4676 } else { 4677 uri = baseURI; 4678 } 4679 handler.error (message, uri, -1, offset + currentByteCount); 4680 } 4681 4682 4683 4687 4690 private void initializeVariables () 4691 { 4692 line = 1; 4694 column = 0; 4695 4696 dataBufferPos = 0; 4698 dataBuffer = new char [DATA_BUFFER_INITIAL]; 4699 nameBufferPos = 0; 4700 nameBuffer = new char [NAME_BUFFER_INITIAL]; 4701 4702 elementInfo = new Hashtable (); 4704 entityInfo = new Hashtable (); 4705 notationInfo = new Hashtable (); 4706 4707 currentElement = null; 4710 currentElementContent = CONTENT_UNDECLARED; 4711 4712 sourceType = INPUT_NONE; 4714 inputStack = new Stack (); 4715 entityStack = new Stack (); 4716 externalEntity = null; 4717 tagAttributePos = 0; 4718 tagAttributes = new String [100]; 4719 rawReadBuffer = new byte [READ_BUFFER_MAX]; 4720 readBufferOverflow = -1; 4721 4722 inLiteral = false; 4723 expandPE = false; 4724 peIsError = false; 4725 4726 inCDATA = false; 4727 4728 symbolTable = new Object [SYMBOL_TABLE_LENGTH][]; 4729 } 4730 4731 4732 4735 private void cleanupVariables () 4736 { 4737 dataBuffer = null; 4738 nameBuffer = null; 4739 4740 elementInfo = null; 4741 entityInfo = null; 4742 notationInfo = null; 4743 4744 currentElement = null; 4745 4746 inputStack = null; 4747 entityStack = null; 4748 externalEntity = null; 4749 4750 tagAttributes = null; 4751 rawReadBuffer = null; 4752 4753 symbolTable = null; 4754 } 4755 4756 4757 static class EncodingException extends IOException 4758 { 4759 EncodingException (String encoding) { super (encoding); } 4760 } 4761 4762 private SAXDriver handler; 4766 4767 private Reader reader; private InputStream is; private int line; private int column; private int sourceType; private Stack inputStack; private URLConnection externalEntity; private int encoding; private int currentByteCount; 4780 private char readBuffer []; 4784 private int readBufferPos; 4785 private int readBufferLength; 4786 private int readBufferOverflow; 4788 4789 private final static int READ_BUFFER_MAX = 16384; 4793 private byte rawReadBuffer []; 4794 4795 4796 private static int DATA_BUFFER_INITIAL = 4096; 4800 private char dataBuffer []; 4801 private int dataBufferPos; 4802 4803 private static int NAME_BUFFER_INITIAL = 1024; 4807 private char nameBuffer []; 4808 private int nameBufferPos; 4809 4810 4811 private Hashtable elementInfo; 4815 private Hashtable entityInfo; 4816 private Hashtable notationInfo; 4817 4818 4819 private String currentElement; 4823 private int currentElementContent; 4824 4825 private String basePublicId; 4829 private String baseURI; 4830 private int baseEncoding; 4831 private Reader baseReader; 4832 private InputStream baseInputStream; 4833 private char baseInputBuffer []; 4834 private int baseInputBufferStart; 4835 private int baseInputBufferLength; 4836 4837 private Stack entityStack; 4841 4842 private boolean inLiteral; 4847 private boolean expandPE; 4848 private boolean peIsError; 4849 4850 private final static int SYMBOL_TABLE_LENGTH = 1087; 4854 private Object symbolTable [][]; 4855 4856 private String tagAttributes []; 4860 private int tagAttributePos; 4861 4862 private boolean sawCR; 4868 4869 private boolean inCDATA; 4873} 4874 | Popular Tags |