1 9 10 11 18 19 24 25 31 32 33 package JSX; 34 import java.io.*; 35 36 public class ParserXML { 37 Reader in; StreamTokenizer st; 39 final static boolean DEBUG = false; 40 42 final static boolean CLOSETAG_DEBUG = false; 43 45 final static boolean ATTR_BUFFER_DEBUG = false; 46 48 public ParserXML(Reader in) { 49 this.in = in; 50 st = new StreamTokenizer(new BufferedReader(in)); setSyntax(); 52 } 53 54 public void close() { 55 try { 56 in.close(); } catch (IOException e) { 58 e.printStackTrace(); 59 } 60 } 61 62 63 68 private void setSyntax() { 69 st.eolIsSignificant(false); st.resetSyntax(); st.wordChars(0,255); 73 st.ordinaryChar('<'); st.ordinaryChar('/'); 75 st.ordinaryChar('>'); 77 st.ordinaryChar('='); 78 st.ordinaryChar('"'); st.ordinaryChar('\''); st.whitespaceChars(' ',' '); 82 st.whitespaceChars('\t','\t'); 83 st.whitespaceChars('\n','\n'); 84 st.whitespaceChars('\r','\r'); 85 } 86 87 public Attr readAttr() throws ExceptionXML, IOException { 89 Attr attr; 90 attr = (Attr) attrBuffer.get(attrBufferIndex++); if (attrBufferIndex==attrBuffer.size()) { 93 attrBuffer.clear(); 95 attrBufferIndex = 0; } 97 return attr; 98 } 99 100 101 116 117 118 124 public Attr readAttrImpl() throws ExceptionXML, IOException { 125 126 Attr attr = new Attr(); 127 int ttype; 128 ttype = st.nextToken(); if (ttype=='>') { 130 attr.isEnd = true; 131 return attr; 132 } 133 134 if (ttype=='/') { if (st.nextToken() == '>') { 137 attr.isEnd=true; 138 attr.emptyTag=true; 139 return attr; 140 } 141 else 142 throw new ExceptionXML("empty tag (\"/>\")", st); 143 } 144 145 if (ttype!=st.TT_WORD) 146 throw new ExceptionXML("an attribute name", st); 147 148 if (st.sval.equals("?")) { if (st.nextToken() == '>') { 152 attr.isEndPI=true; return attr; 154 } 155 else 156 throw new ExceptionXML("PI end (\"?>\")", st); 157 } 158 159 attr.name = st.sval; 160 ttype = st.nextToken(); if (ttype!='=') 162 throw new ExceptionXML( 163 "Expected \"=\" to follow attribute name \"" +attr.name+ "\"" 164 ); 165 st.wordChars('>','>'); st.wordChars('=','='); 171 st.wordChars('/','/'); 172 st.ordinaryChar(' '); st.ordinaryChar('\t'); 176 st.eolIsSignificant(true); 178 st.ordinaryChar('\r'); 180 182 ttype = st.nextToken(); if (ttype!='"' && ttype!='\'') 185 throw new ExceptionXML( 186 "'quoted' or \"quoted\" value (single or double) to follow \""+ 187 attr.name + "=\"", st); if (DEBUG) System.err.println(st.sval + " (sval) *with* quote"); 189 int quoteChar = ttype; 190 201 202 ttype = st.nextToken(); if (ttype==quoteChar) { attr.value = ""; } 206 else { 207 if (DEBUG) System.err.println(ttype + " (type) read after quote"); 208 if (DEBUG) System.err.println(st.sval + " (sval) read after quote"); 209 StringBuffer out = new StringBuffer (); while (true) { if (DEBUG) System.err.println("BUILDING: '"+out+"'"); 215 if (ttype==' ') { out.append(' '); 217 ttype = st.nextToken(); 219 continue; 220 } 221 if (ttype=='\t') { out.append('\t'); 223 ttype = st.nextToken(); 225 continue; 226 } 227 if (ttype=='\r') { out.append('\r'); 229 ttype = st.nextToken(); 231 continue; 232 } 233 if (ttype==st.TT_EOL) { out.append('\n'); 235 ttype = st.nextToken(); 237 continue; 238 } 239 if (ttype==st.TT_WORD) { 240 out.append(st.sval); ttype = st.nextToken(); 243 continue; 244 } 245 break; 251 } 252 attr.value = out.toString(); } 254 if (DEBUG) System.err.println(st.sval + " was value read after quote"); 255 if (DEBUG) System.err.println(attr.value + " was total value"); 256 if (DEBUG) System.err.println(ttype + " read after that"); 257 if (ttype!=quoteChar) throw new ExceptionXML( 259 "matching '"+(char)quoteChar+"'", st); 260 st.eolIsSignificant(false); 262 st.ordinaryChar('>'); 263 st.ordinaryChar('='); 264 st.ordinaryChar('/'); 265 st.whitespaceChars(' ',' '); 267 st.whitespaceChars('\t','\t'); 268 st.whitespaceChars('\n','\n'); 269 st.whitespaceChars('\r','\r'); 270 attr.name = ParseUtilities.descapeDollar(attr.name); 272 return attr; 273 } 274 275 284 public class Attr implements Serializable{ 285 public String name; 286 public String value; 287 boolean nameMissing; int length; boolean isEndPI = false; public boolean isEnd = false; public boolean emptyTag = false; 292 public String toString() { 293 298 return name +"=\""+value+"\" , " 299 + ((emptyTag)?"empty":"")+((isEnd)?"end":"")+" tag" 300 + " nameMissing="+nameMissing; 301 } 302 } 303 304 305 306 311 public Tag readTag() throws ExceptionXML, IOException { 312 Tag tag = new Tag(); int ttype; 314 ttype = st.nextToken(); if (ttype==st.TT_EOF) 316 return null; 317 if (ttype!='<') 318 throw new ExceptionXML("an open tag", st); 319 ttype = st.nextToken(); switch (ttype) { 321 case StreamTokenizer.TT_WORD: tag.name = st.sval; 323 tag.start = true; 324 break; 325 case '/': ttype = st.nextToken(); if (ttype!=st.TT_WORD) 328 throw new ExceptionXML("a tag", st); 329 tag.name = st.sval; 330 tag.start = false; 331 ttype = st.nextToken(); if (ttype!='>') 333 throw new ExceptionXML("a >", st); 334 break; 335 default: 336 throw new ExceptionXML("either \"<tag\" or \"</tag\"", st); 337 } 338 if (tag.name.startsWith("?")) { 339 tag.isPI = true; 340 tag.name = tag.name.substring(1); } 342 if (CLOSETAG_DEBUG) { 343 System.err.println("RETURNING TAG: "+tag); 344 try { 345 throw new Exception (); 346 } catch (Exception e) { 347 e.printStackTrace(); 348 } 349 } 350 if (tag.start) { if (attrBuffer.size()!=0) { 353 throw new ExceptionXML("not all attrs ("+attrBuffer+") consumed, when read " +tag); 354 } 355 Attr attr; 356 do { attr=readAttrImpl(); 358 if (attr.name!=null && attr.name.equals(XMLSerialize.ALIAS_ID_TOKEN) ) 359 setAlias(attr); 360 else if (attr.name!=null && attr.name.equals(XMLSerialize.NAME_TOKEN) ) 361 attrBuffer.insertElementAt(attr, 0); else 363 attrBuffer.add(attr); if (ATTR_BUFFER_DEBUG) System.err.println("attr: "+attr); 365 } while (!attr.isEnd && !attr.isEndPI); } 367 return tag; 368 } 369 378 public class Tag { 379 public String name; 380 boolean isPI; public boolean start; public String toString() { return "<"+(isPI?"?":(start?"":"/")) + name; } 383 } 384 385 388 private java.util.Vector attrBuffer = new java.util.Vector (); private int attrBufferIndex = 0; 391 private Attr attrAlias = null; 392 393 private void setAlias(Attr attr) throws ExceptionXML { 394 if (attrAlias==null) { attrAlias = attr; 396 } else { 397 throw new ExceptionXML("Found two "+XMLSerialize.ALIAS_ID_TOKEN+" attrs in tag"); 398 } 399 } 400 401 public Attr getAlias() throws ExceptionXML { 402 if (attrAlias==null) 403 throw new ExceptionXML("no alias attr found"); 404 Attr attr = attrAlias; 405 attrAlias = null; return attr; } 408 409 410 411 419 static public class ExceptionXML extends IOException { 420 public ExceptionXML(String expected) { 421 super(expected); 422 } 423 public ExceptionXML(String expected, String got) { 424 super("Expected \"" +expected+ "\"" 425 + ", but got \"" +got+ "\""); 426 } 427 public ExceptionXML(String comment, String expected, String got) { 428 super(comment+": Expected \"" +expected+ "\"" 429 + ", but got \"" +got+ "\""); 430 } 431 public ExceptionXML(String expected, StreamTokenizer st) { 432 super( 433 "Line " +st.lineno()+": " 434 + "Expected " +expected 435 + ", but got " 436 +"'"+ 437 (st.ttype!=StreamTokenizer.TT_WORD? 438 (""+(char)(st.ttype)): 439 (st.sval)) 440 +"' " 441 +"["+st.ttype+": " +st.sval+ "]" 442 ); 443 } 444 } 445 446 447 448 453 public void parse() { 454 Tag tag; 455 Attr attr; 456 String indent = ""; 457 System.out.println( 458 "Note: this version of parse() does no start-end tag balancing\n"+ 459 "Note: it does no checking for legality of class and field names\n"); 460 try { 461 while ( (tag=readTag()) != null) { 462 if (tag.start) { 463 System.out.print(indent + tag); 464 indent += " "; while ( (attr=readAttr()) != null) 466 System.out.print(" "+attr); 467 } 468 else { 469 indent = indent.substring(4); System.out.print(indent + tag); 471 } 472 System.out.println(">"); 473 } 474 } 475 catch (Exception e) { 476 System.out.println(e); 477 } 478 } 479 480 481 public void dumbParse() { 482 int ttype; 483 try { 484 while ( (ttype = st.nextToken()) != st.TT_EOF) { 485 String a; 486 switch (ttype) { 487 case '"': 488 a = "QUOTED: " +st.sval; 489 break; 490 case StreamTokenizer.TT_WORD: a = "TOKEN: " +st.sval; 492 break; 493 default: 494 a = (char)ttype + " : " + st.sval; 495 break; 496 } 497 System.out.println(a); 498 } 499 } 500 catch (Exception e) { 501 System.out.println(e); 502 } 503 } 504 505 public static class Test { 506 public static void main(String args[]) { 507 new ParserXML(new InputStreamReader(System.in)).parse(); 508 } 509 } 510 } 511 512 532 | Popular Tags |