1 5 package com.microstar.xml; 6 7 import java.io.BufferedInputStream ; 8 import java.io.EOFException ; 9 import java.io.InputStream ; 10 import java.io.Reader ; 11 import java.net.URL ; 12 import java.net.URLConnection ; 13 import java.util.Enumeration ; 14 import java.util.Hashtable ; 15 import java.util.Stack ; 16 17 18 46 public class XmlParser { 47 48 private final static boolean USE_CHEATS = true; 54 55 56 57 61 62 67 public XmlParser () 68 { 69 } 70 71 72 78 public void setHandler (XmlHandler handler) 79 { 80 this.handler = handler; 81 } 82 83 84 95 public void parse (String systemId, String publicId, String encoding) 96 throws java.lang.Exception 97 { 98 doParse(systemId, publicId, null, null, encoding); 99 } 100 101 102 119 public void parse (String systemId, String publicId, 120 InputStream stream, String encoding) 121 throws java.lang.Exception 122 { 123 doParse(systemId, publicId, null, stream, encoding); 124 } 125 126 127 143 public void parse (String systemId, String publicId, Reader reader) 144 throws java.lang.Exception 145 { 146 doParse(systemId, publicId, reader, null, null); 147 } 148 149 150 private synchronized void doParse (String systemId, String publicId, 151 Reader reader, InputStream stream, 152 String encoding) 153 throws java.lang.Exception 154 { 155 basePublicId = publicId; 156 baseURI = systemId; 157 baseReader = reader; 158 baseInputStream = stream; 159 160 initializeVariables(); 161 162 setInternalEntity(intern("amp"), "&"); 164 setInternalEntity(intern("lt"), "<"); 165 setInternalEntity(intern("gt"), ">"); 166 setInternalEntity(intern("apos"), "'"); 167 setInternalEntity(intern("quot"), """); 168 169 if (handler != null) { 170 handler.startDocument(); 171 } 172 173 pushURL("[document]", basePublicId, baseURI, baseReader, baseInputStream, 174 encoding); 175 176 parseDocument(); 177 178 if (handler != null) { 179 handler.endDocument(); 180 } 181 cleanupVariables(); 182 } 183 184 185 186 190 194 198 public final static int CONTENT_UNDECLARED = 0; 199 200 204 public final static int CONTENT_ANY = 1; 205 206 210 public final static int CONTENT_EMPTY = 2; 211 212 216 public final static int CONTENT_MIXED = 3; 217 218 222 public final static int CONTENT_ELEMENTS = 4; 223 224 225 229 233 public final static int ENTITY_UNDECLARED = 0; 234 235 239 public final static int ENTITY_INTERNAL = 1; 240 241 245 public final static int ENTITY_NDATA = 2; 246 247 251 public final static int ENTITY_TEXT = 3; 252 253 254 258 262 public final static int ATTRIBUTE_UNDECLARED = 0; 263 264 268 public final static int ATTRIBUTE_CDATA = 1; 269 270 274 public final static int ATTRIBUTE_ID = 2; 275 276 280 public final static int ATTRIBUTE_IDREF = 3; 281 282 286 public final static int ATTRIBUTE_IDREFS = 4; 287 288 292 public final static int ATTRIBUTE_ENTITY = 5; 293 294 298 public final static int ATTRIBUTE_ENTITIES = 6; 299 300 304 public final static int ATTRIBUTE_NMTOKEN = 7; 305 306 310 public final static int ATTRIBUTE_NMTOKENS = 8; 311 312 316 public final static int ATTRIBUTE_ENUMERATED = 9; 317 318 322 public final static int ATTRIBUTE_NOTATION = 10; 323 324 325 330 333 private static Hashtable attributeTypeHash; 334 static { 335 attributeTypeHash = new Hashtable (); 336 attributeTypeHash.put("CDATA", new Integer (ATTRIBUTE_CDATA)); 337 attributeTypeHash.put("ID", new Integer (ATTRIBUTE_ID)); 338 attributeTypeHash.put("IDREF", new Integer (ATTRIBUTE_IDREF)); 339 attributeTypeHash.put("IDREFS", new Integer (ATTRIBUTE_IDREFS)); 340 attributeTypeHash.put("ENTITY", new Integer (ATTRIBUTE_ENTITY)); 341 attributeTypeHash.put("ENTITIES", new Integer (ATTRIBUTE_ENTITIES)); 342 attributeTypeHash.put("NMTOKEN", new Integer (ATTRIBUTE_NMTOKEN)); 343 attributeTypeHash.put("NMTOKENS", new Integer (ATTRIBUTE_NMTOKENS)); 344 attributeTypeHash.put("NOTATION", new Integer (ATTRIBUTE_NOTATION)); 345 } 346 347 348 private final static int ENCODING_UTF_8 = 1; 352 private final static int ENCODING_ISO_8859_1 = 2; 353 private final static int ENCODING_UCS_2_12 = 3; 354 private final static int ENCODING_UCS_2_21 = 4; 355 private final static int ENCODING_UCS_4_1234 = 5; 356 private final static int ENCODING_UCS_4_4321 = 6; 357 private final static int ENCODING_UCS_4_2143 = 7; 358 private final static int ENCODING_UCS_4_3412 = 8; 359 360 361 365 369 public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 0; 370 371 376 public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 1; 377 378 382 public final static int ATTRIBUTE_DEFAULT_IMPLIED = 2; 383 384 388 public final static int ATTRIBUTE_DEFAULT_REQUIRED = 3; 389 390 395 public final static int ATTRIBUTE_DEFAULT_FIXED = 4; 396 397 398 private final static int INPUT_NONE = 0; 402 private final static int INPUT_INTERNAL = 1; 403 private final static int INPUT_EXTERNAL = 2; 404 private final static int INPUT_STREAM = 3; 405 private final static int INPUT_BUFFER = 4; 406 private final static int INPUT_READER = 5; 407 408 409 private final static int LIT_CHAR_REF = 1; 413 private final static int LIT_ENTITY_REF = 2; 414 private final static int LIT_PE_REF = 4; 415 private final static int LIT_NORMALIZE = 8; 416 417 418 private final static int CONTEXT_NONE = 0; 422 private final static int CONTEXT_DTD = 1; 423 private final static int CONTEXT_ENTITYVALUE = 2; 424 private final static int CONTEXT_ATTRIBUTEVALUE = 3; 425 426 427 428 432 433 440 void error (String message, String textFound, String textExpected) 441 throws java.lang.Exception 442 { 443 errorCount++; 444 if (textFound != null) { 445 message = message + " (found \"" + textFound + "\")"; 446 } 447 if (textExpected != null) { 448 message = message + " (expected \"" + textExpected + "\")"; 449 } 450 if (handler != null) { 451 String uri = null; 452 453 if (externalEntity != null) { 454 uri = externalEntity.getURL().toString(); 455 } 456 handler.error(message, uri, line, column); 457 } 458 } 459 460 461 466 void error (String message, char textFound, String textExpected) 467 throws java.lang.Exception 468 { 469 error(message, new Character (textFound).toString(), textExpected); 470 } 471 472 473 474 478 479 489 void parseDocument () 490 throws java.lang.Exception 491 { 492 char c; 493 494 parseProlog(); 495 require('<'); 496 parseElement(); 497 try 498 { 499 parseMisc(); c=readCh(); error("unexpected characters after document end",c,null); 502 } 503 catch (EOFException e) 504 {return;} 505 } 506 507 508 515 void parseComment () 516 throws java.lang.Exception 517 { 518 skipUntil("-->"); 519 } 520 521 522 531 void parsePI () 532 throws java.lang.Exception 533 { 534 String name; 535 536 name = readNmtoken(true); 537 if (!tryRead("?>")) { 538 requireWhitespace(); 539 parseUntil("?>"); 540 } 541 if (handler != null) { 542 handler.processingInstruction(name, dataBufferToString()); 543 } 544 } 545 546 547 559 void parseCDSect () 560 throws java.lang.Exception 561 { 562 parseUntil("]]>"); 563 } 564 565 566 580 void parseProlog () 581 throws java.lang.Exception 582 { 583 parseMisc(); 584 585 if (tryRead("<!DOCTYPE")) { 586 parseDoctypedecl(); 587 parseMisc(); 588 } 589 } 590 591 592 607 void parseXMLDecl (boolean ignoreEncoding) 608 throws java.lang.Exception 609 { 610 String version; 611 String encodingName = null; 612 String standalone = null; 613 614 require("version"); 616 parseEq(); 617 version = readLiteral(0); 618 if (!version.equals("1.0")) { 619 error("unsupported XML version", version, "1.0"); 620 } 621 622 skipWhitespace(); 624 if (tryRead("encoding")) { 625 parseEq(); 626 encodingName = readLiteral(0); 627 checkEncoding(encodingName, ignoreEncoding); 628 } 629 630 skipWhitespace(); 632 if (tryRead("standalone")) { 633 parseEq(); 634 standalone = readLiteral(0); 635 } 636 637 skipWhitespace(); 638 require("?>"); 639 } 640 641 642 655 void parseTextDecl (boolean ignoreEncoding) 656 throws java.lang.Exception 657 { 658 String encodingName = null; 659 660 if (tryRead("version")) { 662 String version; 663 parseEq(); 664 version = readLiteral(0); 665 if (!version.equals("1.0")) { 666 error("unsupported XML version", version, "1.0"); 667 } 668 requireWhitespace(); 669 } 670 671 672 require("encoding"); 674 parseEq(); 675 encodingName = readLiteral(0); 676 checkEncoding(encodingName, ignoreEncoding); 677 678 skipWhitespace(); 679 require("?>"); 680 } 681 682 683 694 void checkEncoding (String encodingName, boolean ignoreEncoding) 695 throws java.lang.Exception 696 { 697 encodingName = encodingName.toUpperCase(); 698 699 if (ignoreEncoding) { 700 return; 701 } 702 703 switch (encoding) { 704 case ENCODING_UTF_8: 706 if (encodingName.equals("ISO-8859-1")) { 707 encoding = ENCODING_ISO_8859_1; 708 } else if (!encodingName.equals("UTF-8")) { 709 error("unsupported 8-bit encoding", 710 encodingName, 711 "UTF-8 or ISO-8859-1"); 712 } 713 break; 714 case ENCODING_UCS_2_12: 716 case ENCODING_UCS_2_21: 717 if (!encodingName.equals("ISO-10646-UCS-2") && 718 !encodingName.equals("UTF-16")) { 719 error("unsupported 16-bit encoding", 720 encodingName, 721 "ISO-10646-UCS-2"); 722 } 723 break; 724 case ENCODING_UCS_4_1234: 726 case ENCODING_UCS_4_4321: 727 case ENCODING_UCS_4_2143: 728 case ENCODING_UCS_4_3412: 729 if (!encodingName.equals("ISO-10646-UCS-4")) { 730 error("unsupported 32-bit encoding", 731 encodingName, 732 "ISO-10646-UCS-4"); 733 } 734 } 735 } 736 737 738 745 void parseMisc () 746 throws java.lang.Exception 747 { 748 while (true) 749 { 750 skipWhitespace(); 751 if (tryRead("<?")) 752 {parsePI();} 753 else if (tryRead("<!--")) 754 {parseComment();} 755 else 756 {return;} 757 } 758 } 759 760 761 769 void parseDoctypedecl () 770 throws java.lang.Exception 771 { 772 char c; 773 String doctypeName, ids[]; 774 775 requireWhitespace(); 777 doctypeName = readNmtoken(true); 778 779 skipWhitespace(); 781 ids = readExternalIds(false); 782 783 skipWhitespace(); 785 if (tryRead('[')) { 786 787 while (true) { 789 context = CONTEXT_DTD; 790 skipWhitespace(); 791 context = CONTEXT_NONE; 792 if (tryRead(']')) { 793 break; } else { 795 context = CONTEXT_DTD; 796 parseMarkupdecl(); 797 context = CONTEXT_NONE; 798 } 799 } 800 } 801 802 if (ids[1] != null) { 804 pushURL("[external subset]", ids[0], ids[1], null, null, null); 805 806 while (true) { 808 context = CONTEXT_DTD; 809 skipWhitespace(); 810 context = CONTEXT_NONE; 811 if (tryRead('>')) { 812 break; 813 } else { 814 context = CONTEXT_DTD; 815 parseMarkupdecl(); 816 context = CONTEXT_NONE; 817 } 818 } 819 } else { 820 skipWhitespace(); 822 require('>'); 823 } 824 825 if (handler != null) { 826 handler.doctypeDecl(doctypeName, ids[0], ids[1]); 827 } 828 829 } 835 836 837 847 void parseMarkupdecl () 848 throws java.lang.Exception 849 { 850 if (tryRead("<!ELEMENT")) { 851 parseElementdecl(); 852 } else if (tryRead("<!ATTLIST")) { 853 parseAttlistDecl(); 854 } else if (tryRead("<!ENTITY")) { 855 parseEntityDecl(); 856 } else if (tryRead("<!NOTATION")) { 857 parseNotationDecl(); 858 } else if (tryRead("<?")) { 859 parsePI(); 860 } else if (tryRead("<!--")) { 861 parseComment(); 862 } else if (tryRead("<![")) { 863 parseConditionalSect(); 864 } else { 865 error("expected markup declaration", null, null); 866 } 867 } 868 869 870 882 void parseElement () 883 throws java.lang.Exception 884 { 885 String gi; 886 char c; 887 int oldElementContent = currentElementContent; 888 String oldElement = currentElement; 889 890 tagAttributePos = 0; 893 894 gi = readNmtoken(true); 896 897 currentElement = gi; 899 currentElementContent = getElementContentType(gi); 900 if (currentElementContent == CONTENT_UNDECLARED) { 901 currentElementContent = CONTENT_ANY; 902 } 903 904 skipWhitespace(); 908 c = readCh(); 909 while (c != '/' && c != '>') { 910 unread(c); 911 parseAttribute(gi); 912 skipWhitespace(); 913 c = readCh(); 914 } 915 unread(c); 916 917 Enumeration atts = declaredAttributes(gi); 919 if (atts != null) { 920 String aname; 921 loop: while (atts.hasMoreElements()) { 922 aname = (String )atts.nextElement(); 923 for (int i = 0; i < tagAttributePos; i++) { 925 if (tagAttributes[i] == aname) { 926 continue loop; 927 } 928 } 929 if (handler != null) { 931 handler.attribute(aname, 932 getAttributeExpandedValue(gi, aname), 933 false); 934 } 935 } 936 } 937 938 c = readCh(); 942 switch (c) { 943 case '>': 944 if (handler != null) { 945 handler.startElement(gi); 946 } 947 parseContent(); 948 break; 949 case '/': 950 require('>'); 951 if (handler != null) { 952 handler.startElement(gi); 953 handler.endElement(gi); 954 } 955 break; 956 } 957 958 currentElement = oldElement; 960 currentElementContent = oldElementContent; 961 } 962 963 964 972 void parseAttribute (String name) 973 throws java.lang.Exception 974 { 975 String aname; 976 int type; 977 String value; 978 979 aname = readNmtoken(true).intern(); 981 type = getAttributeDefaultValueType(name, aname); 982 983 parseEq(); 985 986 if (type == ATTRIBUTE_CDATA || type == ATTRIBUTE_UNDECLARED) { 989 value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF); 990 } else { 991 value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF | LIT_NORMALIZE); 992 } 993 994 if (handler != null) { 997 handler.attribute(aname, value, true); 998 } 999 dataBufferPos = 0; 1000 1001 if (tagAttributePos == tagAttributes.length) { 1004 String newAttrib[] = new String [tagAttributes.length * 2]; 1005 System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos); 1006 tagAttributes = newAttrib; 1007 } 1008 tagAttributes[tagAttributePos++] = aname; 1009 } 1010 1011 1012 1016 void parseEq () 1017 throws java.lang.Exception 1018 { 1019 skipWhitespace(); 1020 require('='); 1021 skipWhitespace(); 1022 } 1023 1024 1025 1030 void parseETag () 1031 throws java.lang.Exception 1032 { 1033 String name; 1034 name = readNmtoken(true); 1035 if (name != currentElement) { 1036 error("mismatched end tag", name, currentElement); 1037 } 1038 skipWhitespace(); 1039 require('>'); 1040 if (handler != null) { 1041 handler.endElement(name); 1042 } 1043 } 1044 1045 1046 1051 void parseContent () 1052 throws java.lang.Exception 1053 { 1054 String data; 1055 char c; 1056 1057 while (true) { 1058 1059 switch (currentElementContent) { 1060 case CONTENT_ANY: 1061 case CONTENT_MIXED: 1062 parsePCData(); 1063 break; 1064 case CONTENT_ELEMENTS: 1065 parseWhitespace(); 1066 break; 1067 } 1068 1069 c = readCh(); 1071 switch (c) { 1072 1073 case '&': c = readCh(); 1075 if (c == '#') { 1076 parseCharRef(); 1077 } else { 1078 unread(c); 1079 parseEntityRef(true); 1080 } 1081 break; 1082 1083 case '<': 1085 c = readCh(); 1086 switch (c) { 1087 1088 case '!': c = readCh(); 1090 switch (c) { 1091 case '-': require('-'); 1093 parseComment(); 1094 break; 1095 case '[': require("CDATA["); 1097 parseCDSect(); 1098 break; 1099 default: 1100 error("expected comment or CDATA section", c, null); 1101 break; 1102 } 1103 break; 1104 1105 case '?': dataBufferFlush(); 1107 parsePI(); 1108 break; 1109 1110 case '/': dataBufferFlush(); 1112 parseETag(); 1113 return; 1114 1115 default: dataBufferFlush(); 1117 unread(c); 1118 parseElement(); 1119 break; 1120 } 1121 } 1122 } 1123 } 1124 1125 1126 1132 void parseElementdecl () 1133 throws java.lang.Exception 1134 { 1135 String name; 1136 1137 requireWhitespace(); 1138 name = readNmtoken(true); 1140 1141 requireWhitespace(); 1142 parseContentspec(name); 1144 1145 skipWhitespace(); 1146 require('>'); 1147 } 1148 1149 1150 1154 void parseContentspec (String name) 1155 throws java.lang.Exception 1156 { 1157 if (tryRead("EMPTY")) { 1158 setElement(name, CONTENT_EMPTY, null, null); 1159 return; 1160 } else if (tryRead("ANY")) { 1161 setElement(name, CONTENT_ANY, null, null); 1162 return; 1163 } else { 1164 require('('); 1165 dataBufferAppend('('); 1166 skipWhitespace(); 1167 if (tryRead("#PCDATA")) { 1168 dataBufferAppend("#PCDATA"); 1169 parseMixed(); 1170 setElement(name, CONTENT_MIXED, dataBufferToString(), null); 1171 } else { 1172 parseElements(); 1173 setElement(name, CONTENT_ELEMENTS, dataBufferToString(), null); 1174 } 1175 } 1176 } 1177 1178 1179 1191 void parseElements () 1192 throws java.lang.Exception 1193 { 1194 char c; 1195 char sep; 1196 1197 skipWhitespace(); 1199 parseCp(); 1200 1201 skipWhitespace(); 1203 c = readCh(); 1204 switch (c) { 1205 case ')': 1206 dataBufferAppend(')'); 1207 c = readCh(); 1208 switch (c) { 1209 case '*': 1210 case '+': 1211 case '?': 1212 dataBufferAppend(c); 1213 break; 1214 default: 1215 unread(c); 1216 } 1217 return; 1218 case ',': case '|': 1220 sep = c; 1221 dataBufferAppend(c); 1222 break; 1223 default: 1224 error("bad separator in content model", c, null); 1225 return; 1226 } 1227 1228 while (true) { 1230 skipWhitespace(); 1231 parseCp(); 1232 skipWhitespace(); 1233 c = readCh(); 1234 if (c == ')') { 1235 dataBufferAppend(')'); 1236 break; 1237 } else if (c != sep) { 1238 error("bad separator in content model", c, null); 1239 return; 1240 } else { 1241 dataBufferAppend(c); 1242 } 1243 } 1244 1245 c = readCh(); 1247 switch (c) { 1248 case '?': 1249 case '*': 1250 case '+': 1251 dataBufferAppend(c); 1252 return; 1253 default: 1254 unread(c); 1255 return; 1256 } 1257 } 1258 1259 1260 1266 void parseCp () 1267 throws java.lang.Exception 1268 { 1269 char c; 1270 1271 if (tryRead('(')) { 1272 dataBufferAppend('('); 1273 parseElements(); 1274 } else { 1275 dataBufferAppend(readNmtoken(true)); 1276 c = readCh(); 1277 switch (c) { 1278 case '?': 1279 case '*': 1280 case '+': 1281 dataBufferAppend(c); 1282 break; 1283 default: 1284 unread(c); 1285 break; 1286 } 1287 } 1288 } 1289 1290 1291 1298 void parseMixed () 1299 throws java.lang.Exception 1300 { 1301 char c; 1302 1303 skipWhitespace(); 1305 if (tryRead(')')) { 1306 dataBufferAppend(")*"); 1307 tryRead('*'); 1308 return; 1309 } 1310 1311 skipWhitespace(); 1313 while (!tryRead(")*")) { 1314 require('|'); 1315 dataBufferAppend('|'); 1316 skipWhitespace(); 1317 dataBufferAppend(readNmtoken(true)); 1318 skipWhitespace(); 1319 } 1320 dataBufferAppend(")*"); 1321 } 1322 1323 1324 1329 void parseAttlistDecl () 1330 throws java.lang.Exception 1331 { 1332 String elementName; 1333 1334 requireWhitespace(); 1335 elementName = readNmtoken(true); 1336 requireWhitespace(); 1337 while (!tryRead('>')) { 1338 parseAttDef(elementName); 1339 skipWhitespace(); 1340 } 1341 } 1342 1343 1344 1348 void parseAttDef (String elementName) 1349 throws java.lang.Exception 1350 { 1351 String name; 1352 int type; 1353 String enumeration = null; 1354 1355 name = readNmtoken(true); 1357 1358 requireWhitespace(); 1360 type = readAttType(); 1361 1362 if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) { 1365 enumeration = dataBufferToString(); 1366 } 1367 1368 requireWhitespace(); 1370 parseDefault(elementName, name, type, enumeration); 1371 } 1372 1373 1374 1383 int readAttType () 1384 throws java.lang.Exception 1385 { 1386 String typeString; 1387 Integer type; 1388 1389 if (tryRead('(')) { 1390 parseEnumeration(); 1391 return ATTRIBUTE_ENUMERATED; 1392 } else { 1393 typeString = readNmtoken(true); 1394 if (typeString.equals("NOTATION")) { 1395 parseNotationType(); 1396 } 1397 type = (Integer )attributeTypeHash.get(typeString); 1398 if (type == null) { 1399 error("illegal attribute type", typeString, null); 1400 return ATTRIBUTE_UNDECLARED; 1401 } else { 1402 return type.intValue(); 1403 } 1404 } 1405 } 1406 1407 1408 1414 void parseEnumeration () 1415 throws java.lang.Exception 1416 { 1417 char c; 1418 1419 dataBufferAppend('('); 1420 1421 skipWhitespace(); 1423 dataBufferAppend(readNmtoken(true)); 1424 skipWhitespace(); 1426 while (!tryRead(')')) { 1427 require('|'); 1428 dataBufferAppend('|'); 1429 skipWhitespace(); 1430 dataBufferAppend(readNmtoken(true)); 1431 skipWhitespace(); 1432 } 1433 dataBufferAppend(')'); 1434 } 1435 1436 1437 1444 void parseNotationType () 1445 throws java.lang.Exception 1446 { 1447 requireWhitespace(); 1448 require('('); 1449 1450 parseEnumeration(); 1451 } 1452 1453 1454 1458 void parseDefault (String elementName, String name, int type, String enumeration) 1459 throws java.lang.Exception 1460 { 1461 int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; 1462 String value = null; 1463 boolean normalizeWSFlag; 1464 1465 if (tryRead('#')) { 1466 if (tryRead("FIXED")) { 1467 valueType = ATTRIBUTE_DEFAULT_FIXED; 1468 requireWhitespace(); 1469 context = CONTEXT_ATTRIBUTEVALUE; 1470 value = readLiteral(LIT_CHAR_REF); 1471 context = CONTEXT_DTD; 1472 } else if (tryRead("REQUIRED")) { 1473 valueType = ATTRIBUTE_DEFAULT_REQUIRED; 1474 } else if (tryRead("IMPLIED")) { 1475 valueType = ATTRIBUTE_DEFAULT_IMPLIED; 1476 } else { 1477 error("illegal keyword for attribute default value", null, null); 1478 } 1479 } else { 1480 context = CONTEXT_ATTRIBUTEVALUE; 1481 value = readLiteral(LIT_CHAR_REF); 1482 context = CONTEXT_DTD; 1483 } 1484 setAttribute(elementName, name, type, enumeration, value, valueType); 1485 } 1486 1487 1488 1500 void parseConditionalSect () 1501 throws java.lang.Exception 1502 { 1503 skipWhitespace(); 1504 if (tryRead("INCLUDE")) { 1505 skipWhitespace(); 1506 require('['); 1507 skipWhitespace(); 1508 while (!tryRead("]]>")) { 1509 parseMarkupdecl(); 1510 skipWhitespace(); 1511 } 1512 } else if (tryRead("IGNORE")) { 1513 skipWhitespace(); 1514 require('['); 1515 int nesting = 1; 1516 char c; 1517 for (int nest = 1; nest > 0; ) { 1518 c = readCh(); 1519 switch (c) { 1520 case '<': 1521 if (tryRead("![")) { 1522 nest++; 1523 } 1524 case ']': 1525 if (tryRead("]>")) { 1526 nest--; 1527 } 1528 } 1529 } 1530 } else { 1531 error("conditional section must begin with INCLUDE or IGNORE", 1532 null, null); 1533 } 1534 } 1535 1536 1537 1542 void parseCharRef () 1543 throws java.lang.Exception 1544 { 1545 int value = 0; 1546 char c; 1547 1548 if (tryRead('x')) { 1549 loop1: while (true) { 1550 c = readCh(); 1551 switch (c) { 1552 case '0': 1553 case '1': 1554 case '2': 1555 case '3': 1556 case '4': 1557 case '5': 1558 case '6': 1559 case '7': 1560 case '8': 1561 case '9': 1562 case 'a': 1563 case 'A': 1564 case 'b': 1565 case 'B': 1566 case 'c': 1567 case 'C': 1568 case 'd': 1569 case 'D': 1570 case 'e': 1571 case 'E': 1572 case 'f': 1573 case 'F': 1574 value *= 16; 1575 value += Integer.parseInt(new Character (c).toString(), 16); 1576 break; 1577 case ';': 1578 break loop1; 1579 default: 1580 error("illegal character in character reference", c, null); 1581 break loop1; 1582 } 1583 } 1584 } else { 1585 loop2: while (true) { 1586 c = readCh(); 1587 switch (c) { 1588 case '0': 1589 case '1': 1590 case '2': 1591 case '3': 1592 case '4': 1593 case '5': 1594 case '6': 1595 case '7': 1596 case '8': 1597 case '9': 1598 value *= 10; 1599 value += Integer.parseInt(new Character (c).toString(), 10); 1600 break; 1601 case ';': 1602 break loop2; 1603 default: 1604 error("illegal character in character reference", c, null); 1605 break loop2; 1606 } 1607 } 1608 } 1609 1610 if (value <= 0x0000ffff) { 1613 dataBufferAppend((char)value); 1615 } else if (value <= 0x000fffff) { 1616 dataBufferAppend((char)(0xd8 | ((value & 0x000ffc00) >> 10))); 1618 dataBufferAppend((char)(0xdc | (value & 0x0003ff))); 1619 } else { 1620 error("character reference " + value + " is too large for UTF-16", 1622 new Integer (value).toString(), null); 1623 } 1624 } 1625 1626 1627 1633 void parseEntityRef (boolean externalAllowed) 1634 throws java.lang.Exception 1635 { 1636 String name; 1637 1638 name = readNmtoken(true); 1639 require(';'); 1640 switch (getEntityType(name)) { 1641 case ENTITY_UNDECLARED: 1642 error("reference to undeclared entity", name, null); 1643 break; 1644 case ENTITY_INTERNAL: 1645 pushString(name, getEntityValue(name)); 1646 break; 1647 case ENTITY_TEXT: 1648 if (externalAllowed) { 1649 pushURL(name, getEntityPublicId(name), 1650 getEntitySystemId(name), 1651 null, null, null); 1652 } else { 1653 error("reference to external entity in attribute value.", name, null); 1654 } 1655 break; 1656 case ENTITY_NDATA: 1657 if (externalAllowed) { 1658 error("data entity reference in content", name, null); 1659 } else { 1660 error("reference to external entity in attribute value.", name, null); 1661 } 1662 break; 1663 } 1664 } 1665 1666 1667 1672 void parsePEReference (boolean isEntityValue) 1673 throws java.lang.Exception 1674 { 1675 String name; 1676 1677 name = "%" + readNmtoken(true); 1678 require(';'); 1679 switch (getEntityType(name)) { 1680 case ENTITY_UNDECLARED: 1681 error("reference to undeclared parameter entity", name, null); 1682 break; 1683 case ENTITY_INTERNAL: 1684 if (isEntityValue) { 1685 pushString(name, getEntityValue(name)); 1686 } else { 1687 pushString(name, " " + getEntityValue(name) + ' '); 1688 } 1689 break; 1690 case ENTITY_TEXT: 1691 if (isEntityValue) { 1692 pushString(null, " "); 1693 } 1694 pushURL(name, getEntityPublicId(name), 1695 getEntitySystemId(name), 1696 null, null, null); 1697 if (isEntityValue) { 1698 pushString(null, " "); 1699 } 1700 break; 1701 } 1702 } 1703 1704 1705 1716 void parseEntityDecl () 1717 throws java.lang.Exception 1718 { 1719 char c; 1720 boolean peFlag = false; 1721 String name, value, notationName, ids[]; 1722 1723 requireWhitespace(); 1725 if (tryRead('%')) { 1726 peFlag = true; 1727 requireWhitespace(); 1728 } 1729 1730 name = readNmtoken(true); 1733 if (peFlag) { 1734 name = "%" + name; 1735 } 1736 1737 requireWhitespace(); 1739 c = readCh(); 1740 unread(c); 1741 if (c == '"' || c == '\'') { 1742 context = CONTEXT_ENTITYVALUE; 1744 value = readLiteral(LIT_CHAR_REF|LIT_PE_REF); 1745 context = CONTEXT_DTD; 1746 setInternalEntity(name,value); 1747 } else { 1748 ids = readExternalIds(false); 1750 if (ids[1] == null) { 1751 error("system identifer missing", name, null); 1752 } 1753 1754 skipWhitespace(); 1756 if (tryRead("NDATA")) { 1757 requireWhitespace(); 1758 notationName = readNmtoken(true); 1759 setExternalDataEntity(name, ids[0], ids[1], notationName); 1760 } else { 1761 setExternalTextEntity(name, ids[0], ids[1]); 1762 } 1763 } 1764 1765 skipWhitespace(); 1767 require('>'); 1768 } 1769 1770 1771 1776 void parseNotationDecl () 1777 throws java.lang.Exception 1778 { 1779 String nname, ids[]; 1780 1781 1782 requireWhitespace(); 1783 nname = readNmtoken(true); 1784 1785 requireWhitespace(); 1786 1787 ids = readExternalIds(true); 1789 if (ids[0] == null && ids[1] == null) { 1790 error("external identifer missing", nname, null); 1791 } 1792 1793 setNotation(nname, ids[0], ids[1]); 1795 1796 skipWhitespace(); 1797 require('>'); 1798 } 1799 1800 1801 1809 void parsePCData () 1810 throws java.lang.Exception 1811 { 1812 char c; 1813 1814 if (USE_CHEATS) { 1820 int lineAugment = 0; 1821 int columnAugment = 0; 1822 1823 loop: for (int i = readBufferPos; i < readBufferLength; i++) { 1824 switch (readBuffer[i]) { 1825 case '\n': 1826 lineAugment++; 1827 columnAugment = 0; 1828 break; 1829 case '&': 1830 case '<': 1831 int start = readBufferPos; 1832 columnAugment++; 1833 readBufferPos = i; 1834 if (lineAugment > 0) { 1835 line += lineAugment; 1836 column = columnAugment; 1837 } else { 1838 column += columnAugment; 1839 } 1840 dataBufferAppend(readBuffer, start, i-start); 1841 return; 1842 default: 1843 columnAugment++; 1844 } 1845 } 1846 } 1847 1848 while (true) { 1851 c = readCh(); 1852 switch (c) { 1853 case '<': 1854 case '&': 1855 unread(c); 1856 return; 1857 default: 1858 dataBufferAppend(c); 1859 break; 1860 } 1861 } 1862 } 1863 1864 1865 1866 1870 1874 void requireWhitespace () 1875 throws java.lang.Exception 1876 { 1877 char c = readCh(); 1878 if (isWhitespace(c)) { 1879 skipWhitespace(); 1880 } else { 1881 error("whitespace expected", c, null); 1882 } 1883 } 1884 1885 1886 1889 void parseWhitespace () 1890 throws java.lang.Exception 1891 { 1892 char c = readCh(); 1893 while (isWhitespace(c)) { 1894 dataBufferAppend(c); 1895 c = readCh(); 1896 } 1897 unread(c); 1898 } 1899 1900 1901 1905 void skipWhitespace () 1906 throws java.lang.Exception 1907 { 1908 if (USE_CHEATS) { 1913 int lineAugment = 0; 1914 int columnAugment = 0; 1915 1916 loop: for (int i = readBufferPos; i < readBufferLength; i++) { 1917 switch (readBuffer[i]) { 1918 case ' ': 1919 case '\t': 1920 case '\r': 1921 columnAugment++; 1922 break; 1923 case '\n': 1924 lineAugment++; 1925 columnAugment = 0; 1926 break; 1927 case '%': 1928 if (context == CONTEXT_DTD || context == CONTEXT_ENTITYVALUE) { 1929 break loop; 1930 } default: 1932 readBufferPos = i; 1933 if (lineAugment > 0) { 1934 line += lineAugment; 1935 column = columnAugment; 1936 } else { 1937 column += columnAugment; 1938 } 1939 return; 1940 } 1941 } 1942 } 1943 1944 char c = readCh(); 1946 while (isWhitespace(c)) { 1947 c = readCh(); 1948 } 1949 unread(c); 1950 } 1951 1952 1953 1959 String readNmtoken (boolean isName) 1960 throws java.lang.Exception 1961 { 1962 char c; 1963 1964 if (USE_CHEATS) { 1965 loop: for (int i = readBufferPos; i < readBufferLength; i++) { 1966 switch (readBuffer[i]) { 1967 case '%': 1968 if (context == CONTEXT_DTD || context == CONTEXT_ENTITYVALUE) { 1969 break loop; 1970 } case '<': 1972 case '>': 1973 case '&': 1974 case ',': 1975 case '|': 1976 case '*': 1977 case '+': 1978 case '?': 1979 case ')': 1980 case '=': 1981 case '\'': 1982 case '"': 1983 case '[': 1984 case ' ': 1985 case '\t': 1986 case '\r': 1987 case '\n': 1988 case ';': 1989 case '/': 1990 case '#': 1991 int start = readBufferPos; 1992 if (i == start) { 1993 error("name expected", readBuffer[i], null); 1994 } 1995 readBufferPos = i; 1996 return intern(readBuffer, start, i - start); 1997 } 1998 } 1999 } 2000 2001 nameBufferPos = 0; 2002 2003 loop: while (true) { 2005 c = readCh(); 2006 switch (c) { 2007 case '%': 2008 case '<': 2009 case '>': 2010 case '&': 2011 case ',': 2012 case '|': 2013 case '*': 2014 case '+': 2015 case '?': 2016 case ')': 2017 case '=': 2018 case '\'': 2019 case '"': 2020 case '[': 2021 case ' ': 2022 case '\t': 2023 case '\n': 2024 case '\r': 2025 case ';': 2026 case '/': 2027 unread(c); 2028 if (nameBufferPos == 0) { 2029 error("name expected", null, null); 2030 } 2031 String s = intern(nameBuffer,0,nameBufferPos); 2032 nameBufferPos = 0; 2033 return s; 2034 default: 2035 nameBuffer = 2036 (char[])extendArray(nameBuffer, nameBuffer.length, nameBufferPos); 2037 nameBuffer[nameBufferPos++] = c; 2038 } 2039 } 2040 } 2041 2042 2043 2052 String readLiteral (int flags) 2053 throws java.lang.Exception 2054 { 2055 char delim, c; 2056 int startLine = line; 2057 2058 delim = readCh(); 2060 if (delim != '"' && delim != '\'' && delim != (char)0) { 2061 error("expected '\"' or \"'\"", delim, null); 2062 return null; 2063 } 2064 2065 try { 2067 c = readCh(); 2068 2069 loop: while (c != delim) { 2070 switch (c) { 2071 case '\n': 2073 case '\r': 2074 c = ' '; 2075 break; 2076 case '&': 2078 if ((flags & LIT_CHAR_REF) > 0) { 2079 c = readCh(); 2080 if (c == '#') { 2081 parseCharRef(); 2082 c = readCh(); 2083 continue loop; } else if ((flags & LIT_ENTITY_REF) > 0) { 2085 unread(c); 2086 parseEntityRef(false); 2087 c = readCh(); 2088 continue loop; 2089 } else { 2090 dataBufferAppend('&'); 2091 } 2092 } 2093 break; 2094 2095 default: 2096 break; 2097 } 2098 dataBufferAppend(c); 2099 c = readCh(); 2100 } 2101 } catch (EOFException e) { 2102 error("end of input while looking for delimiter (started on line " 2103 + startLine + ')', null, new Character (delim).toString()); 2104 } 2105 2106 if ((flags & LIT_NORMALIZE) > 0) { 2108 dataBufferNormalize(); 2109 } 2110 2111 return dataBufferToString(); 2113 } 2114 2115 2116 2122 String [] readExternalIds (boolean inNotation) 2123 throws java.lang.Exception 2124 { 2125 char c; 2126 String ids[] = new String [2]; 2127 2128 if (tryRead("PUBLIC")) { 2129 requireWhitespace(); 2130 ids[0] = readLiteral(LIT_NORMALIZE); if (inNotation) { 2132 skipWhitespace(); 2133 if (tryRead('"') || tryRead('\'')) { 2134 ids[1] = readLiteral(0); 2135 } 2136 } else { 2137 requireWhitespace(); 2138 ids[1] = readLiteral(0); } 2140 } else if (tryRead("SYSTEM")) { 2141 requireWhitespace(); 2142 ids[1] = readLiteral(0); } 2144 2145 return ids; 2146 } 2147 2148 2149 2157 final boolean isWhitespace (char c) 2158 { 2159 switch ((int)c) { 2160 case 0x20: 2161 case 0x09: 2162 case 0x0d: 2163 case 0x0a: 2164 return true; 2165 default: 2166 return false; 2167 } 2168 } 2169 2170 2171 2172 2176 2177 2180 void dataBufferAppend (char c) 2181 { 2182 dataBuffer = 2184 (char[])extendArray(dataBuffer, dataBuffer.length, dataBufferPos); 2185 dataBuffer[dataBufferPos++] = c; 2186 } 2187 2188 2189 2192 void dataBufferAppend (String s) 2193 { 2194 dataBufferAppend(s.toCharArray(), 0, s.length()); 2195 } 2196 2197 2198 2201 void dataBufferAppend (char ch[], int start, int length) 2202 { 2203 dataBuffer = 2204 (char[])extendArray(dataBuffer, dataBuffer.length, 2205 dataBufferPos + length); 2206 System.arraycopy((Object )ch, start, 2207 (Object )dataBuffer, dataBufferPos, 2208 length); 2209 dataBufferPos += length; 2210 } 2211 2212 2213 2216 void dataBufferNormalize () 2217 { 2218 int i = 0; 2219 int j = 0; 2220 int end = dataBufferPos; 2221 2222 while (j < end && isWhitespace(dataBuffer[j])) { 2224 j++; 2225 } 2226 2227 while (end > j && isWhitespace(dataBuffer[end - 1])) { 2229 end --; 2230 } 2231 2232 while (j < end) { 2234 2235 char c = dataBuffer[j++]; 2236 2237 if (isWhitespace(c)) { 2240 while (j < end && isWhitespace(dataBuffer[j++])) { 2241 } 2242 dataBuffer[i++] = ' '; 2243 dataBuffer[i++] = dataBuffer[j-1]; 2244 } else { 2245 dataBuffer[i++] = c; 2246 } 2247 } 2248 2249 dataBufferPos = i; 2251 } 2252 2253 2254 2259 String dataBufferToString () 2260 { 2261 String s = new String (dataBuffer, 0, dataBufferPos); 2262 dataBufferPos = 0; 2263 return s; 2264 } 2265 2266 2267 2271 void dataBufferFlush () 2272 throws java.lang.Exception 2273 { 2274 if (dataBufferPos > 0) { 2275 switch (currentElementContent) { 2276 case CONTENT_UNDECLARED: 2277 case CONTENT_EMPTY: 2278 break; 2280 case CONTENT_MIXED: 2281 case CONTENT_ANY: 2282 if (handler != null) { 2283 handler.charData(dataBuffer, 0, dataBufferPos); 2284 } 2285 break; 2286 case CONTENT_ELEMENTS: 2287 if (handler != null) { 2288 handler.ignorableWhitespace(dataBuffer, 0, dataBufferPos); 2289 } 2290 break; 2291 } 2292 dataBufferPos = 0; 2293 } 2294 } 2295 2296 2297 2300 void require (String delim) 2301 throws java.lang.Exception 2302 { 2303 char ch[] = delim.toCharArray(); 2304 for (int i = 0; i < ch.length; i++) { 2305 require(ch[i]); 2306 } 2307 } 2308 2309 2310 2313 void require (char delim) 2314 throws java.lang.Exception 2315 { 2316 char c = readCh(); 2317 2318 if (c != delim) { 2319 error("expected character", c, new Character (delim).toString()); 2320 } 2321 } 2322 2323 2324 2340 public String intern (String s) 2341 { 2342 char ch[] = s.toCharArray(); 2343 return intern(ch, 0, ch.length); 2344 } 2345 2346 2347 2359 public String intern (char ch[], int start, int length) 2360 { 2361 int index; 2362 int hash = 0; 2363 2364 for (int i = start; i < start + length; i++) { 2366 hash = ((hash << 1) & 0xffffff) + (int)ch[i]; 2367 } 2368 2369 hash = hash % SYMBOL_TABLE_LENGTH; 2370 2371 Object bucket[] = (Object [])symbolTable[hash]; 2373 if (bucket == null) { 2374 symbolTable[hash] = bucket = new Object [8]; 2375 } 2376 2377 for (index = 0; index < bucket.length; index += 2) { 2380 char chFound[] = (char[])bucket[index]; 2381 2382 if (chFound == null) { 2384 break; 2385 } 2386 2387 if (chFound.length == length) { 2393 for (int i = 0; i < chFound.length; i++) { 2394 if (ch[start+i] != chFound[i]) { 2396 break; 2397 } else if (i == length-1) { 2398 return (String )bucket[index+1]; 2400 } 2401 } 2402 } 2403 } 2404 2405 2407 bucket = 2409 (Object [])extendArray(bucket, bucket.length, index); 2410 2411 String s = new String (ch, start, length); 2414 bucket[index] = s.toCharArray(); 2415 bucket[index+1] = s; 2416 symbolTable[hash] = bucket; 2417 return s; 2418 } 2419 2420 2421 2425 Object extendArray (Object array, int currentSize, int requiredSize) 2426 { 2427 if (requiredSize < currentSize) { 2428 return array; 2429 } else { 2430 Object newArray = null; 2431 int newSize = currentSize * 2; 2432 2433 if (newSize <= requiredSize) { 2434 newSize = requiredSize + 1; 2435 } 2436 2437 if (array instanceof char[]) { 2438 newArray = new char[newSize]; 2439 } else if (array instanceof Object []) { 2440 newArray = new Object [newSize]; 2441 } 2442 2443 System.arraycopy(array, 0, newArray, 0, currentSize); 2444 return newArray; 2445 } 2446 } 2447 2448 2449 2450 2454 2455 2459 2468 public Enumeration declaredElements () 2469 { 2470 return elementInfo.keys(); 2471 } 2472 2473 2474 2485 public int getElementContentType (String name) 2486 { 2487 Object element[] = (Object [])elementInfo.get(name); 2488 if (element == null) { 2489 return CONTENT_UNDECLARED; 2490 } else { 2491 return ((Integer )element[0]).intValue(); 2492 } 2493 } 2494 2495 2496 2504 public String getElementContentModel (String name) 2505 { 2506 Object element[] = (Object [])elementInfo.get(name); 2507 if (element == null) { 2508 return null; 2509 } else { 2510 return (String )element[1]; 2511 } 2512 } 2513 2514 2515 2521 void setElement (String name, int contentType, 2522 String contentModel, Hashtable attributes) 2523 throws java.lang.Exception 2524 { 2525 Object element[]; 2526 2527 element = (Object [])elementInfo.get(name); 2529 2530 if (element == null) { 2532 element = new Object [3]; 2533 element[0] = new Integer (CONTENT_UNDECLARED); 2534 element[1] = null; 2535 element[2] = null; 2536 } else if (contentType != CONTENT_UNDECLARED && 2537 ((Integer )element[0]).intValue() != CONTENT_UNDECLARED) { 2538 error("multiple declarations for element type", name, null); 2539 return; 2540 } 2541 2542 if (contentType != CONTENT_UNDECLARED) { 2544 element[0] = new Integer (contentType); 2545 } 2546 2547 if (contentModel != null) { 2549 element[1] = contentModel; 2550 } 2551 2552 if (attributes != null) { 2554 element[2] =attributes; 2555 } 2556 2557 elementInfo.put(name,element); 2559 } 2560 2561 2562 2566 Hashtable getElementAttributes (String name) 2567 { 2568 Object element[] = (Object [])elementInfo.get(name); 2569 if (element == null) { 2570 return null; 2571 } else { 2572 return (Hashtable )element[2]; 2573 } 2574 } 2575 2576 2577 2578 2582 2594 public Enumeration declaredAttributes (String elname) 2595 { 2596 Hashtable attlist = getElementAttributes(elname); 2597 2598 if (attlist == null) { 2599 return null; 2600 } else { 2601 return attlist.keys(); 2602 } 2603 } 2604 2605 2606 2623 public int getAttributeType (String name, String aname) 2624 { 2625 Object attribute[] = getAttribute(name, aname); 2626 if (attribute == null) { 2627 return ATTRIBUTE_UNDECLARED; 2628 } else { 2629 return ((Integer )attribute[0]).intValue(); 2630 } 2631 } 2632 2633 2634 2642 public String getAttributeEnumeration (String name, String aname) 2643 { 2644 Object attribute[] = getAttribute(name, aname); 2645 if (attribute == null) { 2646 return null; 2647 } else { 2648 return (String )attribute[3]; 2649 } 2650 } 2651 2652 2653 2661 public String getAttributeDefaultValue (String name, String aname) 2662 { 2663 Object attribute[] = getAttribute(name, aname); 2664 if (attribute == null) { 2665 return null; 2666 } else { 2667 return (String )attribute[1]; 2668 } 2669 } 2670 2671 2672 2681 public String getAttributeExpandedValue (String name, String aname) 2682 { 2683 Object attribute[] = getAttribute(name, aname); 2684 if (attribute == null) { 2685 return null; 2686 } else if (attribute[4] == null && attribute[1] != null) { 2687 try { 2688 pushString(null, (char)0 + (String )attribute[1] + (char)0); 2689 attribute[4] = readLiteral(LIT_NORMALIZE | 2690 LIT_CHAR_REF | 2691 LIT_ENTITY_REF); 2692 } catch (Exception e) {} 2693 } 2694 return (String )attribute[4]; 2695 } 2696 2697 2698 2705 public int getAttributeDefaultValueType (String name, String aname) 2706 { 2707 Object attribute[] = getAttribute(name, aname); 2708 if (attribute == null) { 2709 return ATTRIBUTE_DEFAULT_UNDECLARED; 2710 } else { 2711 return ((Integer )attribute[2]).intValue(); 2712 } 2713 } 2714 2715 2716 2724 void setAttribute (String elName, String name, int type, String enumeration, 2725 String value, int valueType) 2726 throws java.lang.Exception 2727 { 2728 Hashtable attlist; 2729 Object attribute[]; 2730 2731 attlist = getElementAttributes(elName); 2733 if (attlist == null) { 2734 attlist = new Hashtable (); 2735 } 2736 2737 if (attlist.get(name) != null) { 2740 return; 2741 } else { 2742 attribute = new Object [5]; 2743 attribute[0] = new Integer (type); 2744 attribute[1] = value; 2745 attribute[2] = new Integer (valueType); 2746 attribute[3] = enumeration; 2747 attribute[4] = null; 2748 attlist.put(name.intern(), attribute); 2749 2750 setElement(elName,CONTENT_UNDECLARED, null, attlist); 2753 } 2754 } 2755 2756 2757 2761 Object [] getAttribute (String elName, String name) 2762 { 2763 Hashtable attlist; 2764 Object attribute[]; 2765 2766 attlist = getElementAttributes(elName); 2767 if (attlist == null) { 2768 return null; 2769 } 2770 2771 attribute = (Object [])attlist.get(name); 2772 return attribute; 2773 } 2774 2775 2776 2780 2791 public Enumeration declaredEntities () 2792 { 2793 return entityInfo.keys(); 2794 } 2795 2796 2797 2805 public int getEntityType (String ename) 2806 { 2807 Object entity[] = (Object [])entityInfo.get(ename); 2808 if (entity == null) { 2809 return ENTITY_UNDECLARED; 2810 } else { 2811 return ((Integer )entity[0]).intValue(); 2812 } 2813 } 2814 2815 2816 2825 public String getEntityPublicId (String ename) 2826 { 2827 Object entity[] = (Object [])entityInfo.get(ename); 2828 if (entity == null) { 2829 return null; 2830 } else { 2831 return (String )entity[1]; 2832 } 2833 } 2834 2835 2836 2844 public String getEntitySystemId (String ename) 2845 { 2846 Object entity[] = (Object [])entityInfo.get(ename); 2847 if (entity == null) { 2848 return null; 2849 } else { 2850 return (String )entity[2]; 2851 } 2852 } 2853 2854 2855 2862 public String getEntityValue (String ename) 2863 { 2864 Object entity[] = (Object [])entityInfo.get(ename); 2865 if (entity == null) { 2866 return null; 2867 } else { 2868 return (String )entity[3]; 2869 } 2870 } 2871 2872 2873 2881 public String getEntityNotationName (String eName) 2882 { 2883 Object entity[] = (Object [])entityInfo.get(eName); 2884 if (entity == null) { 2885 return null; 2886 } else { 2887 return (String )entity[4]; 2888 } 2889 } 2890 2891 2892 2895 void setInternalEntity (String eName, String value) 2896 { 2897 setEntity(eName, ENTITY_INTERNAL, null, null, value, null); 2898 } 2899 2900 2901 2904 void setExternalDataEntity (String eName, String pubid, 2905 String sysid, String nName) 2906 { 2907 setEntity(eName, ENTITY_NDATA, pubid, sysid, null, nName); 2908 } 2909 2910 2911 2914 void setExternalTextEntity (String eName, String pubid, String sysid) 2915 { 2916 setEntity(eName, ENTITY_TEXT, pubid, sysid, null, null); 2917 } 2918 2919 2920 2923 void setEntity (String eName, int eClass, 2924 String pubid, String sysid, 2925 String value, String nName) 2926 { 2927 Object entity[]; 2928 2929 if (entityInfo.get(eName) == null) { 2930 entity = new Object [5]; 2931 entity[0] = new Integer (eClass); 2932 entity[1] = pubid; 2933 entity[2] = sysid; 2934 entity[3] = value; 2935 entity[4] = nName; 2936 2937 entityInfo.put(eName,entity); 2938 } 2939 } 2940 2941 2942 2946 2954 public Enumeration declaredNotations () 2955 { 2956 return notationInfo.keys(); 2957 } 2958 2959 2960 2970 public String getNotationPublicId (String nname) 2971 { 2972 Object notation[] = (Object [])notationInfo.get(nname); 2973 if (notation == null) { 2974 return null; 2975 } else { 2976 return (String )notation[0]; 2977 } 2978 } 2979 2980 2981 2990 public String getNotationSystemId (String nname) 2991 { 2992 Object notation[] = (Object [])notationInfo.get(nname); 2993 if (notation == null) { 2994 return null; 2995 } else { 2996 return (String )notation[1]; 2997 } 2998 } 2999 3000 3001 3007 void setNotation (String nname, String pubid, String sysid) 3008 throws java.lang.Exception 3009 { 3010 Object notation[]; 3011 3012 if (notationInfo.get(nname) == null) { 3013 notation = new Object [2]; 3014 notation[0] = pubid; 3015 notation[1] = sysid; 3016 notationInfo.put(nname,notation); 3017 } else { 3018 error("multiple declarations of notation", nname, null); 3019 } 3020 } 3021 3022 3023 3027 3028 3031 public int getLineNumber () 3032 { 3033 return line; 3034 } 3035 3036 3037 3040 public int getColumnNumber () 3041 { 3042 return column; 3043 } 3044 3045 3046 3047 3051 3052 3070 char readCh () 3071 throws java.lang.Exception 3072 { 3073 char c; 3074 3075 while (readBufferPos >= readBufferLength) 3080 { 3081 switch (sourceType) 3082 { 3083 case INPUT_READER: 3084 case INPUT_EXTERNAL: 3085 case INPUT_STREAM: 3086 readDataChunk(); 3087 while (readBufferLength < 1) 3088 { 3089 popInput(); 3090 if (readBufferLength <1) 3091 { 3092 readDataChunk(); 3093 } 3094 } 3095 break; 3096 3097 default: 3098 popInput(); 3099 break; 3100 } 3101 } 3102 3103 c = readBuffer[readBufferPos++]; 3104 3105 if 3111 ( 3112 c == '%' && 3113 (context == CONTEXT_DTD || context == CONTEXT_ENTITYVALUE) 3114 ) 3115 { 3116 char c2 = readCh(); 3117 unread(c2); 3118 if (!isWhitespace(c2)) 3119 { 3120 parsePEReference(context == CONTEXT_ENTITYVALUE); 3121 return readCh(); 3122 } 3123 } 3124 3125 if (c == '\n') 3126 { 3127 line++; 3128 column = 0; 3129 } 3130 else 3131 { 3132 column++; 3133 } 3134 3135 return c; 3136 } 3137 3138 3139 3154 void unread (char c) 3155 throws java.lang.Exception 3156 { 3157 if (c == '\n') 3159 { 3160 line--; 3161 column = -1; 3162 } 3163 if (readBufferPos > 0) 3164 { 3165 readBuffer[--readBufferPos] = c; 3166 } 3167 else 3168 { 3169 pushString(null, new Character (c).toString()); 3170 } 3171 } 3172 3173 3174 3184 void unread (char ch[], int length) 3185 throws java.lang.Exception 3186 { 3187 for (int i = 0; i < length; i++) 3188 { 3189 if (ch[i] == '\n') 3190 {line--;column = -1;} 3191 } 3192 if (length < readBufferPos) 3193 {readBufferPos -= length;} 3194 else 3195 { 3196 pushCharArray(null, ch, 0, length); 3197 sourceType = INPUT_BUFFER; 3198 } 3199 } 3200 3201 3202 3219 void pushURL (String ename, String publicId, String systemId, 3220 Reader reader, InputStream stream, String encoding) 3221 throws java.lang.Exception 3222 { 3223 URL url; 3224 boolean ignoreEncoding = false; 3225 3226 pushInput(ename); 3228 3229 readBuffer = new char[READ_BUFFER_MAX+4]; 3232 readBufferPos = 0; 3233 readBufferLength = 0; 3234 readBufferOverflow = -1; 3235 is = null; 3236 line = 1; 3237 3238 currentByteCount = 0; 3239 3240 dataBufferFlush(); 3242 3243 if (systemId != null && externalEntity != null) { 3245 systemId = new URL (externalEntity.getURL(), systemId).toString(); 3246 } else if (baseURI != null) { 3247 try { 3248 systemId = new URL (new URL (baseURI), systemId).toString(); 3249 } catch (Exception e) {} 3250 } 3251 3252 if (systemId != null && handler != null) { 3256 Object input = handler.resolveEntity(publicId, systemId); 3257 if (input != null) { 3258 if (input instanceof String ) { 3259 systemId = (String )input; 3260 } else if (input instanceof InputStream ) { 3261 stream = (InputStream )input; 3262 } else if (input instanceof Reader ) { 3263 reader = (Reader )input; 3264 } 3265 } 3266 } 3267 3268 if (handler != null) { 3270 if (systemId != null) { 3271 handler.startExternalEntity(systemId); 3272 } else { 3273 handler.startExternalEntity("[external stream]"); 3274 } 3275 } 3276 3277 if (reader != null) { 3279 sourceType = INPUT_READER; 3281 this.reader = reader; 3282 tryEncodingDecl(true); 3283 return; 3284 } else if (stream != null) { 3285 sourceType = INPUT_STREAM; 3286 is = stream; 3287 } else { 3288 3291 sourceType = INPUT_EXTERNAL; 3293 url = new URL (systemId); 3294 3295 externalEntity = url.openConnection(); 3296 externalEntity.connect(); 3297 is = externalEntity.getInputStream(); 3298 } 3299 3300 if (!is.markSupported()) { 3303 is = new BufferedInputStream (is); 3304 } 3305 3306 if (encoding == null && externalEntity != null) { 3308 encoding = externalEntity.getContentEncoding(); 3309 } 3310 3311 if (encoding != null) { 3312 checkEncoding(encoding, false); 3313 ignoreEncoding = true; 3314 } else { 3315 detectEncoding(); 3316 ignoreEncoding = false; 3317 } 3318 3319 tryEncodingDecl(ignoreEncoding); 3321 } 3322 3323 3324 3327 void tryEncodingDecl (boolean ignoreEncoding) 3328 throws java.lang.Exception 3329 { 3330 if (tryRead("<?xml")) { 3332 if (tryWhitespace()) { 3333 if (inputStack.size() > 0) { 3334 parseTextDecl(ignoreEncoding); 3335 } else { 3336 parseXMLDecl(ignoreEncoding); 3337 } 3338 } else { 3339 unread("xml".toCharArray(), 3); 3340 parsePI(); 3341 } 3342 } 3343 } 3344 3345 3346 3365 void detectEncoding () 3366 throws java.lang.Exception 3367 { 3368 byte signature[] = new byte[4]; 3369 3370 is.mark(4); 3373 is.read(signature); 3374 is.reset(); 3375 3376 if (tryEncoding(signature, (byte)0x00, (byte)0x00, 3378 (byte)0x00, (byte)0x3c)) { 3379 encoding = ENCODING_UCS_4_1234; 3382 } else if (tryEncoding(signature, (byte)0x3c, (byte)0x00, 3383 (byte)0x00, (byte)0x00)) { 3384 encoding = ENCODING_UCS_4_4321; 3387 } else if (tryEncoding(signature, (byte)0x00, (byte)0x00, 3388 (byte)0x3c, (byte)0x00)) { 3389 encoding = ENCODING_UCS_4_2143; 3392 } else if (tryEncoding(signature, (byte)0x00, (byte)0x3c, 3393 (byte)0x00, (byte)0x00)) { 3394 encoding = ENCODING_UCS_4_3412; 3397 } else if (tryEncoding(signature, (byte)0xfe, (byte)0xff)) { 3398 encoding = ENCODING_UCS_2_12; 3401 is.read(); is.read(); 3402 } else if (tryEncoding(signature, (byte)0xff, (byte)0xfe)) { 3403 encoding = ENCODING_UCS_2_21; 3406 is.read(); is.read(); 3407 } else if (tryEncoding(signature, (byte)0x00, (byte)0x3c, 3408 (byte)0x00, (byte)0x3f)) { 3409 encoding = ENCODING_UCS_2_12; 3412 error("no byte-order mark for UCS-2 entity", null, null); 3413 } else if (tryEncoding(signature, (byte)0x3c, (byte)0x00, 3414 (byte)0x3f, (byte)0x00)) { 3415 encoding = ENCODING_UCS_2_21; 3418 error("no byte-order mark for UCS-2 entity", null, null); 3419 } else if (tryEncoding(signature, (byte)0x3c, (byte)0x3f, 3420 (byte)0x78, (byte)0x6d)) { 3421 encoding = ENCODING_UTF_8; 3424 read8bitEncodingDeclaration(); 3425 } else { 3426 encoding = ENCODING_UTF_8; 3429 } 3430 } 3431 3432 3433 3444 boolean tryEncoding (byte sig[], byte b1, byte b2, byte b3, byte b4) 3445 { 3446 return (sig[0] == b1 && sig[1] == b2 && sig[2] == b3 && sig[3] == b4); 3447 } 3448 3449 3450 3459 boolean tryEncoding (byte sig[], byte b1, byte b2) 3460 { 3461 return ((sig[0] == b1) && (sig[1] == b2)); 3462 } 3463 3464 3465 3473 void pushString (String ename, String s) 3474 throws java.lang.Exception 3475 { 3476 char ch[] = s.toCharArray(); 3477 pushCharArray(ename, ch, 0, ch.length); 3478 } 3479 3480 3481 3497 void pushCharArray (String ename, char ch[], int start, int length) 3498 throws java.lang.Exception 3499 { 3500 pushInput(ename); 3502 sourceType = INPUT_INTERNAL; 3503 readBuffer = ch; 3504 readBufferPos = start; 3505 readBufferLength = length; 3506 readBufferOverflow = -1; 3507 } 3508 3509 3510 3536 void pushInput (String ename) 3537 throws java.lang.Exception 3538 { 3539 Object input[] = new Object [12]; 3540 3541 if (ename != null) { 3543 Enumeration entities = entityStack.elements(); 3544 while (entities.hasMoreElements()) { 3545 String e = (String )entities.nextElement(); 3546 if (e == ename) { 3547 error("recursive reference to entity", ename, null); 3548 } 3549 } 3550 } 3551 entityStack.push(ename); 3552 3553 if (sourceType == INPUT_NONE) { 3555 return; 3556 } 3557 3558 input[0] = new Integer (sourceType); 3561 input[1] = externalEntity; 3562 input[2] = readBuffer; 3563 input[3] = new Integer (readBufferPos); 3564 input[4] = new Integer (readBufferLength); 3565 input[5] = new Integer (line); 3566 input[6] = new Integer (encoding); 3567 input[7] = new Integer (readBufferOverflow); 3568 input[8] = is; 3569 input[9] = new Integer (currentByteCount); 3570 input[10] = new Integer (column); 3571 input[11] = reader; 3572 3573 inputStack.push(input); 3575 } 3576 3577 3578 3593 void popInput () 3594 throws java.lang.Exception 3595 { 3596 Object input[]; 3597 3598 3599 switch (sourceType) { 3600 3601 case INPUT_EXTERNAL: 3602 dataBufferFlush(); 3603 if (handler != null && externalEntity != null) { 3604 handler.endExternalEntity(externalEntity.getURL().toString()); 3605 } 3606 break; 3607 case INPUT_STREAM: 3608 dataBufferFlush(); 3609 if (baseURI != null) { 3610 if (handler != null) { 3611 handler.endExternalEntity(baseURI); 3612 } 3613 } 3614 break; 3615 case INPUT_READER: 3616 dataBufferFlush(); 3617 if (baseURI != null) { 3618 if (handler != null) { 3619 handler.endExternalEntity(baseURI); 3620 } 3621 } 3622 break; 3623 } 3624 3625 if (inputStack.isEmpty()) { 3628 throw new EOFException (); 3629 } else { 3630 String s; 3631 input = (Object [])inputStack.pop(); 3632 s = (String )entityStack.pop(); 3633 } 3634 3635 sourceType = ((Integer )input[0]).intValue(); 3636 externalEntity = (URLConnection )input[1]; 3637 readBuffer = (char[])input[2]; 3638 readBufferPos = ((Integer )input[3]).intValue(); 3639 readBufferLength = ((Integer )input[4]).intValue(); 3640 line = ((Integer )input[5]).intValue(); 3641 encoding = ((Integer )input[6]).intValue(); 3642 readBufferOverflow = ((Integer )input[7]).intValue(); 3643 is = (InputStream )input[8]; 3644 currentByteCount = ((Integer )input[9]).intValue(); 3645 column = ((Integer )input[10]).intValue(); 3646 reader = (Reader )input[11]; 3647 } 3648 3649 3650 3661 boolean tryRead (char delim) 3662 throws java.lang.Exception 3663 { 3664 char c; 3665 3666 c = readCh(); 3668 3669 if (c == delim) { 3672 return true; 3673 } else { 3674 unread(c); 3675 return false; 3676 } 3677 } 3678 3679 3680 3695 boolean tryRead (String delim) 3696 throws java.lang.Exception 3697 { 3698 char ch[] = delim.toCharArray(); 3699 char c; 3700 3701 3704 for (int i = 0; i < ch.length; i++) 3705 { 3706 c=readCh(); 3707 if (c!=ch[i]) 3708 { 3709 unread(c); 3710 if (i!=0) 3711 {unread(ch,i);} 3712 return false; 3713 } 3714 } 3715 return true; 3716 } 3717 3718 3719 3720 3727 boolean tryWhitespace () 3728 throws java.lang.Exception 3729 { 3730 char c; 3731 c = readCh(); 3732 if (isWhitespace(c)) { 3733 skipWhitespace(); 3734 return true; 3735 } else { 3736 unread(c); 3737 return false; 3738 } 3739 } 3740 3741 3742 3751 void parseUntil (String delim) 3752 throws java.lang.Exception 3753 { 3754 char c; 3755 int startLine = line; 3756 3757 try { 3758 while (!tryRead(delim)) { 3759 c = readCh(); 3760 dataBufferAppend(c); 3761 } 3762 } catch (EOFException e) { 3763 error("end of input while looking for delimiter (started on line " + 3764 startLine + ')', null, delim); 3765 } 3766 } 3767 3768 3769 3778 void skipUntil (String delim) 3779 throws java.lang.Exception 3780 { 3781 while (!tryRead(delim)) { 3782 readCh(); 3783 } 3784 } 3785 3786 3787 3798 void read8bitEncodingDeclaration () 3799 throws java.lang.Exception 3800 { 3801 int ch; 3802 readBufferPos = readBufferLength = 0; 3803 3804 while (true) { 3805 ch = is.read(); 3806 readBuffer[readBufferLength++] = (char)ch; 3807 switch (ch) { 3808 case (int)'>': 3809 return; 3810 case -1: 3811 error("end of file before end of XML or encoding declaration.", 3812 null, "?>"); 3813 return; 3814 } 3815 if (readBuffer.length == readBufferLength) { 3816 error("unfinished XML or encoding declaration", null, null); 3817 } 3818 } 3819 } 3820 3821 3822 3823 3827 3828 3841 void readDataChunk () 3842 throws java.lang.Exception 3843 { 3844 int count, i, j; 3845 3846 if (readBufferOverflow > -1) 3848 { 3849 readBuffer[0] = (char)readBufferOverflow; 3850 readBufferOverflow = -1; 3851 readBufferPos = 1; 3852 sawCR = true; 3853 } 3854 else 3855 { 3856 readBufferPos = 0; 3857 sawCR = false; 3858 } 3859 3860 if (sourceType == INPUT_READER) 3863 { 3864 count = reader.read(readBuffer, readBufferPos, READ_BUFFER_MAX-1); 3865 if (count < 0) 3866 {readBufferLength = -1;} 3867 else 3868 { 3869 readBufferLength = readBufferPos+count; 3870 filterCR(); 3871 sawCR = false; 3872 } 3873 return; 3874 } 3875 3876 count = is.read(rawReadBuffer, 0, READ_BUFFER_MAX); 3879 3880 switch (encoding) 3884 { 3885 case ENCODING_UTF_8: 3886 copyUtf8ReadBuffer(count); 3887 break; 3888 3889 case ENCODING_ISO_8859_1: 3890 copyIso8859_1ReadBuffer(count); 3891 break; 3892 3893 case ENCODING_UCS_2_12: 3894 copyUcs2ReadBuffer(count, 8, 0); 3895 break; 3896 3897 case ENCODING_UCS_2_21: 3898 copyUcs2ReadBuffer(count, 0, 8); 3899 break; 3900 3901 case ENCODING_UCS_4_1234: 3902 copyUcs4ReadBuffer(count, 24, 16, 8, 0); 3903 break; 3904 3905 case ENCODING_UCS_4_4321: 3906 copyUcs4ReadBuffer(count, 0, 8, 16, 24); 3907 break; 3908 3909 case ENCODING_UCS_4_2143: 3910 copyUcs4ReadBuffer(count, 16, 24, 0, 8); 3911 break; 3912 3913 case ENCODING_UCS_4_3412: 3914 copyUcs4ReadBuffer(count, 8, 0, 24, 16); 3915 break; 3916 } 3917 3918 if (sawCR) 3921 { 3922 filterCR(); 3923 sawCR = false; 3924 } 3925 3926 readBufferPos = 0; 3928 currentByteCount += count; 3929 } 3930 3931 3932 3939 void filterCR () 3940 { 3941 int i, j; 3942 3943 readBufferOverflow = -1; 3944 3945 loop: for (i = 0, j = 0; j < readBufferLength; i++, j++) 3946 { 3947 switch (readBuffer[j]) 3948 { 3949 case '\r': 3950 if (j == readBufferLength - 1) 3951 { 3952 readBufferOverflow = '\r'; 3953 readBufferLength--; 3954 break loop; 3955 } 3956 else if (readBuffer[j+1] == '\n') 3957 {j++;} 3958 readBuffer[i] = '\n'; 3959 break; 3960 3961 case '\n': 3962 default: 3963 readBuffer[i] = readBuffer[j]; 3964 break; 3965 } 3966 } 3967 readBufferLength = i; 3968 } 3969 3970 3971 3984 void copyUtf8ReadBuffer (int count) 3985 throws java.lang.Exception 3986 { 3987 int i = 0; 3988 int j = readBufferPos; 3989 int b1; 3990 boolean isSurrogate = false; 3991 while (i < count) { 3992 b1 = rawReadBuffer[i++]; 3993 isSurrogate = false; 3994 3995 if ((b1 & 0x80) == 0) { 3999 readBuffer[j++] = (char)b1; 4001 } else if ((b1 & 0xe0) == 0xc0) { 4002 readBuffer[j++] = 4004 (char)(((b1 & 0x1f) << 6) | 4005 getNextUtf8Byte(i++, count)); 4006 } else if ((b1 & 0xf0) == 0xe0) { 4007 readBuffer[j++] = 4009 (char)(((b1 & 0x0f) << 12) | 4010 (getNextUtf8Byte(i++, count) << 6) | 4011 getNextUtf8Byte(i++, count)); 4012 } else if ((b1 & 0xf8) == 0xf0) { 4013 isSurrogate = true; 4017 int b2 = getNextUtf8Byte(i++, count); 4018 int b3 = getNextUtf8Byte(i++, count); 4019 int b4 = getNextUtf8Byte(i++, count); 4020 readBuffer[j++] = 4021 (char)(0xd800 | 4022 ((((b1 & 0x07) << 2) | ((b2 & 0x30) >> 4) - 1) << 6) | 4023 ((b2 & 0x0f) << 2) | 4024 ((b3 & 0x30) >> 4)); 4025 readBuffer[j++] = 4026 (char)(0xdc | 4027 ((b3 & 0x0f) << 6) | 4028 b4); 4029 } else { 4031 encodingError("bad start for UTF-8 multi-byte sequence", b1, i); 4033 } 4034 if (readBuffer[j-1] == '\r') { 4035 sawCR = true; 4036 } 4037 } 4038 readBufferLength = j; 4040 } 4041 4042 4043 4053 int getNextUtf8Byte (int pos, int count) 4054 throws java.lang.Exception 4055 { 4056 int val; 4057 4058 if (pos < count) { 4061 val = rawReadBuffer[pos]; 4062 } else { 4063 val = is.read(); 4064 if (val == -1) { 4065 encodingError("unfinished multi-byte UTF-8 sequence at EOF", -1, pos); 4066 } 4067 } 4068 4069 if ((val & 0xc0) != 0x80) { 4072 encodingError("bad continuation of multi-byte UTF-8 sequence", val, 4073 pos + 1); 4074 } 4075 4076 return (val & 0x3f); 4078 } 4079 4080 4081 4092 void copyIso8859_1ReadBuffer (int count) 4093 { 4094 int i, j; 4095 for (i = 0, j = readBufferPos; i < count; i++, j++) { 4096 readBuffer[j] = (char)(rawReadBuffer[i] & 0xff); 4097 if (readBuffer[j] == '\r') { 4098 sawCR = true; 4099 } 4100 } 4101 readBufferLength = j; 4102 } 4103 4104 4105 4117 void copyUcs2ReadBuffer (int count, int shift1, int shift2) 4118 throws java.lang.Exception 4119 { 4120 int j = readBufferPos; 4121 4122 if (count > 0 && (count % 2) != 0) { 4123 encodingError("odd number of bytes in UCS-2 encoding", -1, count); 4124 } 4125 for (int i = 0; i < count; i+=2) { 4126 readBuffer[j++] = 4127 (char)(((rawReadBuffer[i] & 0xff) << shift1) | 4128 ((rawReadBuffer[i+1] & 0xff) << shift2)); 4129 if (readBuffer[j-1] == '\r') { 4130 sawCR = true; 4131 } 4132 } 4133 readBufferLength = j; 4134 } 4135 4136 4137 4153 void copyUcs4ReadBuffer (int count, int shift1, int shift2, 4154 int shift3, int shift4) 4155 throws java.lang.Exception 4156 { 4157 int j = readBufferPos; 4158 int value; 4159 4160 if (count > 0 && (count % 4) != 0) { 4161 encodingError("number of bytes in UCS-4 encoding not divisible by 4", 4162 -1, count); 4163 } 4164 for (int i = 0; i < count; i+=4) { 4165 value = (((rawReadBuffer[i] & 0xff) << shift1) | 4166 ((rawReadBuffer[i+1] & 0xff) << shift2) | 4167 ((rawReadBuffer[i+2] & 0xff) << shift3) | 4168 ((rawReadBuffer[i+3] & 0xff) << shift4)); 4169 if (value < 0x0000ffff) { 4170 readBuffer[j++] = (char)value; 4171 if (value == (int)'\r') { 4172 sawCR = true; 4173 } 4174 } else if (value < 0x000fffff) { 4175 readBuffer[j++] = (char)(0xd8 | ((value & 0x000ffc00) >> 10)); 4176 readBuffer[j++] = (char)(0xdc | (value & 0x0003ff)); 4177 } else { 4178 encodingError("value cannot be represented in UTF-16", 4179 value, i); 4180 } 4181 } 4182 readBufferLength = j; 4183 } 4184 4185 4186 4189 void encodingError (String message, int value, int offset) 4190 throws java.lang.Exception 4191 { 4192 String uri; 4193 4194 if (value >= 0) { 4195 message = message + " (byte value: 0x" + 4196 Integer.toHexString(value) + ')'; 4197 } 4198 if (externalEntity != null) { 4199 uri = externalEntity.getURL().toString(); 4200 } else { 4201 uri = baseURI; 4202 } 4203 handler.error(message, uri, -1, offset + currentByteCount); 4204 } 4205 4206 4207 4208 4212 4215 void initializeVariables () 4216 { 4217 errorCount = 0; 4219 line = 1; 4220 column = 0; 4221 4222 dataBufferPos = 0; 4224 dataBuffer = new char[DATA_BUFFER_INITIAL]; 4225 nameBufferPos = 0; 4226 nameBuffer = new char[NAME_BUFFER_INITIAL]; 4227 4228 elementInfo = new Hashtable (); 4230 entityInfo = new Hashtable (); 4231 notationInfo = new Hashtable (); 4232 4233 currentElement = null; 4236 currentElementContent = CONTENT_UNDECLARED; 4237 4238 sourceType = INPUT_NONE; 4240 inputStack = new Stack (); 4241 entityStack = new Stack (); 4242 externalEntity = null; 4243 tagAttributePos = 0; 4244 tagAttributes = new String [100]; 4245 rawReadBuffer = new byte[READ_BUFFER_MAX]; 4246 readBufferOverflow = -1; 4247 4248 context = CONTEXT_NONE; 4249 4250 symbolTable = new Object [SYMBOL_TABLE_LENGTH]; 4251 } 4252 4253 4254 4258 void cleanupVariables () 4259 { 4260 errorCount = -1; 4261 line = -1; 4262 column = -1; 4263 dataBuffer = null; 4264 nameBuffer = null; 4265 currentElement = null; 4266 currentElementContent = CONTENT_UNDECLARED; 4267 sourceType = INPUT_NONE; 4268 inputStack = null; 4269 externalEntity = null; 4270 entityStack = null; 4271 } 4272 4273 XmlHandler handler; 4277 4278 private Reader reader; private InputStream is; private int line; private int column; private int sourceType; private Stack inputStack; private URLConnection externalEntity; private int encoding; private int currentByteCount; 4291 private int errorCount; 4295 4296 private final static int READ_BUFFER_MAX = 16384; 4300 private char readBuffer[]; 4301 private int readBufferPos; 4302 private int readBufferLength; 4303 private int readBufferOverflow; 4305 4306 private byte rawReadBuffer[]; 4310 4311 4312 private static int DATA_BUFFER_INITIAL = 4096; 4316 private char dataBuffer[]; 4317 private int dataBufferPos; 4318 4319 private static int NAME_BUFFER_INITIAL = 1024; 4323 private char nameBuffer[]; 4324 private int nameBufferPos; 4325 4326 4327 private Hashtable elementInfo; 4331 private Hashtable entityInfo; 4332 private Hashtable notationInfo; 4333 4334 4335 private String currentElement; 4339 private int currentElementContent; 4340 4341 private String basePublicId; 4345 private String baseURI; 4346 private int baseEncoding; 4347 private Reader baseReader; 4348 private InputStream baseInputStream; 4349 private char baseInputBuffer[]; 4350 private int baseInputBufferStart; 4351 private int baseInputBufferLength; 4352 4353 private Stack entityStack; 4357 4358 private int context; 4362 4363 private Object symbolTable[]; 4367 private final static int SYMBOL_TABLE_LENGTH = 1087; 4368 4369 private String tagAttributes[]; 4373 private int tagAttributePos; 4374 4375 private boolean sawCR; 4381} 4382 | Popular Tags |