1 29 30 package com.caucho.relaxng; 31 32 import com.caucho.relaxng.pattern.*; 33 import com.caucho.util.CharBuffer; 34 import com.caucho.util.IntMap; 35 import com.caucho.util.L10N; 36 import com.caucho.vfs.Path; 37 import com.caucho.vfs.ReadStream; 38 import com.caucho.vfs.Vfs; 39 import com.caucho.xml.QName; 40 import com.caucho.xml.XmlChar; 41 42 import org.xml.sax.InputSource ; 43 import org.xml.sax.SAXException ; 44 45 import java.io.IOException ; 46 import java.io.InputStream ; 47 import java.util.HashMap ; 48 import java.util.logging.Level ; 49 import java.util.logging.Logger ; 50 51 54 public class CompactParser { 55 private static final L10N L = new L10N(CompactParser.class); 56 private static final Logger log 57 = Logger.getLogger(CompactParser.class.getName()); 58 59 private static final int IDENTIFIER = 256; 60 61 private static final int NAMESPACE = IDENTIFIER + 1; 62 private static final int DEFAULT = NAMESPACE + 1; 63 64 private static final int START = DEFAULT + 1; 65 private static final int DIV = START + 1; 66 private static final int INCLUDE = DIV + 1; 67 68 private static final int ELEMENT = INCLUDE + 1; 69 private static final int ATTRIBUTE = ELEMENT + 1; 70 71 private static final int TEXT = ATTRIBUTE + 1; 72 private static final int STRING = TEXT + 1; 73 private static final int TOKEN = STRING + 1; 74 private static final int LITERAL = TOKEN + 1; 75 76 private static final int EMPTY = LITERAL + 1; 77 78 private static final int COMMENT = EMPTY + 1; 79 80 private static final IntMap _tokenMap = new IntMap(); 81 82 private GrammarPattern _grammar; 83 private Pattern _pattern; 84 85 private String _ns = ""; 86 private HashMap <String ,String > _nsMap; 87 88 private Path _pwd; 89 private ReadStream _is; 90 private String _filename; 91 private int _line; 92 93 private int _peek = -1; 94 private int _peekToken = -1; 95 96 private CharBuffer _cb = new CharBuffer(); 97 private String _lexeme; 98 99 private int _generatedId; 100 101 CompactParser() 102 { 103 } 104 105 108 public GrammarPattern getGrammar() 109 { 110 return _grammar; 111 } 112 113 public void setGeneratedId(int id) 114 { 115 _generatedId = id; 116 } 117 118 public String generateId() 119 { 120 return "__caucho_" + _generatedId++; 121 } 122 123 126 public void parse(InputSource source) 127 throws SAXException , IOException , RelaxException 128 { 129 InputStream is = source.getByteStream(); 130 131 _pwd = null; 132 133 if (is instanceof ReadStream) { 134 _is = (ReadStream) is; 135 _filename = _is.getUserPath(); 136 _pwd = _is.getPath().getParent(); 137 } 138 if (is != null) 139 _is = Vfs.openRead(is); 140 else 141 _is = Vfs.openRead(source.getSystemId()); 142 143 if (_filename == null) 144 _filename = source.getSystemId(); 145 _line = 1; 146 147 if (_pwd == null) 148 _pwd = Vfs.lookup(_filename).getParent(); 149 150 try { 151 parse(); 152 } catch (RelaxException e) { 153 log.log(Level.FINER, e.toString(), e); 154 155 throw new SAXException (_filename + ":" + _line + ": " + e.getMessage()); 158 } finally { 159 _is.close(); 160 } 161 } 162 163 166 private void parse() 167 throws SAXException , IOException , RelaxException 168 { 169 _grammar = new GrammarPattern(); 170 _nsMap = new HashMap <String ,String >(); 171 172 parseDeclarations(); 173 174 int token = parseToken(); 175 _peekToken = token; 176 177 switch (token) { 178 case START: 179 case IDENTIFIER: 180 case INCLUDE: 181 parseGrammar(_grammar); 182 break; 183 184 case COMMENT: 185 break; 186 187 default: 188 _grammar.setStart(parsePattern(_grammar)); 189 break; 190 } 191 } 192 193 196 private void parseDeclarations() 197 throws SAXException , IOException , RelaxException 198 { 199 while (true) { 200 int token = parseToken(); 201 202 _peekToken = token; 203 204 switch (token) { 205 case DEFAULT: 206 case NAMESPACE: 207 parseNamespace(); 208 break; 209 210 case COMMENT: 211 break; 212 213 default: 214 return; 215 } 216 } 217 } 218 219 222 private void parseNamespace() 223 throws SAXException , IOException , RelaxException 224 { 225 boolean isDefault = false; 226 int token = parseToken(); 227 228 if (token == DEFAULT) { 229 isDefault = true; 230 token = parseToken(); 231 } 232 233 if (token != NAMESPACE) 234 throw error(L.l("expected `namespace' at {0}", _cb)); 235 236 token = parseToken(); 237 238 if (token != IDENTIFIER) 239 throw error(L.l("expected identifier at {0}", _cb)); 240 241 String prefix = _lexeme; 242 243 token = parseToken(); 244 245 if (token != '=') 246 throw error(L.l("expected `=' at {0}", _cb)); 247 248 String value = parseLiteral(); 249 250 if (isDefault) 251 _ns = value; 252 253 _nsMap.put(prefix, value); 254 } 255 256 259 private void parseGrammar(GrammarPattern grammar) 260 throws IOException , SAXException , RelaxException, RelaxException 261 { 262 while (true) { 263 int token = parseToken(); 264 Pattern pattern; 265 266 switch (token) { 267 case -1: 268 return; 269 270 case COMMENT: 271 break; 272 273 case START: 274 int next = parseToken(); 275 if (next == '=') 276 grammar.setStart(parsePattern(grammar)); 277 else 278 throw error(L.l("expected `=' at {0}", _cb)); 279 break; 280 281 case IDENTIFIER: 282 String name = _lexeme; 283 Pattern oldPattern = grammar.getDefinition(name); 284 pattern = new GroupPattern(); 285 next = parseToken(); 286 if (next == '=') { 287 grammar.setDefinition(name, parsePattern(grammar)); 288 } 289 else 290 throw error(L.l("expected `=' at {0}", _cb)); 291 break; 292 293 case INCLUDE: 294 parseInclude(grammar); 295 break; 296 297 default: 298 throw error(L.l("unexpected token {0}", _cb)); 299 } 300 } 301 } 302 303 private void parseInclude(GrammarPattern grammar) 304 throws IOException , SAXException , RelaxException 305 { 306 String uri = parseLiteral(); 307 308 Path sub = _pwd.lookup(uri); 309 310 ReadStream is = null; 311 312 try { 313 is = sub.openRead(); 314 315 InputSource source = new InputSource (is); 316 source.setSystemId(uri); 317 318 CompactParser parser = new CompactParser(); 319 parser.setGeneratedId(_generatedId); 320 parser.parse(source); 321 322 GrammarPattern subGrammar = parser.getGrammar(); 323 324 _generatedId = parser._generatedId; 325 326 grammar.mergeInclude(subGrammar); 327 } finally { 328 if (is != null) 329 is.close(); 330 } 331 } 332 333 336 private Pattern parsePattern(GrammarPattern grammar) 337 throws IOException , SAXException , RelaxException 338 { 339 Pattern pattern = parseTerm(grammar); 340 341 int token = parseToken(); 342 343 switch (token) { 344 case '|': 345 return parseChoicePattern(grammar, pattern); 346 case '&': 347 return parseInterleavePattern(grammar, pattern); 348 case ',': 349 return parseGroupPattern(grammar, pattern); 350 351 default: 352 _peekToken = token; 353 return pattern; 354 } 355 } 356 357 360 private Pattern parseInterleavePattern(GrammarPattern grammar, 361 Pattern pattern) 362 throws IOException , SAXException , RelaxException 363 { 364 int token; 365 366 do { 367 if (! (pattern instanceof InterleavePattern)) { 368 Pattern child = pattern; 369 pattern = new InterleavePattern(); 370 pattern.addChild(child); 371 } 372 373 pattern.addChild(parseTerm(grammar)); 374 } while ((token = parseToken()) == '&'); 375 376 _peekToken = token; 377 378 return pattern; 379 } 380 381 384 private Pattern parseGroupPattern(GrammarPattern grammar, Pattern pattern) 385 throws IOException , SAXException , RelaxException 386 { 387 int token; 388 389 do { 390 if (! (pattern instanceof GroupPattern)) { 391 Pattern child = pattern; 392 pattern = new GroupPattern(); 393 pattern.addChild(child); 394 } 395 396 pattern.addChild(parseTerm(grammar)); 397 } while ((token = parseToken()) == ','); 398 399 _peekToken = token; 400 401 return pattern; 402 } 403 404 407 private Pattern parseChoicePattern(GrammarPattern grammar, Pattern pattern) 408 throws IOException , SAXException , RelaxException 409 { 410 int token; 411 412 do { 413 if (! (pattern instanceof ChoicePattern)) { 414 Pattern child = pattern; 415 pattern = new ChoicePattern(); 416 pattern.addChild(child); 417 } 418 419 pattern.addChild(parseTerm(grammar)); 420 } while ((token = parseToken()) == '|'); 421 422 _peekToken = token; 423 424 return pattern; 425 } 426 427 430 private Pattern parseTerm(GrammarPattern grammar) 431 throws IOException , SAXException , RelaxException 432 { 433 int token = parseToken(); 434 435 while (token == COMMENT) { 436 token = parseToken(); 437 } 438 439 Pattern pattern; 440 switch (token) { 441 case EMPTY: 442 return new EmptyPattern(); 443 444 case TEXT: 445 return new TextPattern(); 446 447 case STRING: 448 case LITERAL: 449 return new DataPattern("string"); 450 451 case TOKEN: 452 return new DataPattern("token"); 453 454 case ELEMENT: 455 pattern = parseElement(grammar); 456 break; 457 458 case ATTRIBUTE: 459 pattern = parseAttribute(grammar); 460 break; 461 462 case '(': 463 pattern = parsePattern(grammar); 464 465 token = parseToken(); 466 if (token != ')') 467 throw error(L.l("expected ')' at {0}", _cb)); 468 break; 469 470 case IDENTIFIER: 471 pattern = new RefPattern(_grammar, _lexeme); 472 pattern.setLocation(getLocation()); 473 break; 474 475 default: 476 throw error(L.l("unknown token {0}", _cb)); 477 } 478 479 token = parseToken(); 480 481 if (token == '*') 482 pattern = new ZeroOrMorePattern(pattern); 483 else if (token == '?') { 484 ChoicePattern choice = new ChoicePattern(); 485 choice.addChild(new EmptyPattern()); 486 choice.addChild(pattern); 487 return choice; 488 } 489 else if (token == '+') { 490 GroupPattern group = new GroupPattern(); 491 group.addChild(pattern); 492 group.addChild(new ZeroOrMorePattern(pattern)); 493 return group; 494 } 495 else { 496 _peekToken = token; 497 } 498 499 return pattern; 500 } 501 502 505 private Pattern parseElement(GrammarPattern grammar) 506 throws IOException , SAXException , RelaxException 507 { 508 String id = generateId(); 509 ElementPattern elt = new ElementPattern(id); 510 grammar.setDefinition(id, elt); 511 512 elt.addNameChild(parseNameClass(grammar, true)); 513 514 int token = parseToken(); 515 if (token == '{') { 516 elt.addChild(parsePattern(grammar)); 517 518 token = parseToken(); 519 if (token != '}') 520 throw error(L.l("expected `}' at {0}", _cb)); 521 } 522 523 return elt; 524 } 525 526 529 private Pattern parseAttribute(GrammarPattern grammar) 530 throws IOException , SAXException , RelaxException 531 { 532 AttributePattern elt = new AttributePattern(); 533 elt.addNameChild(parseNameClass(grammar, false)); 534 535 int token = parseToken(); 536 if (token == '{') { 537 token = parseToken(); 538 539 if (token == '}') 540 return elt; 541 542 _peekToken = token; 543 544 elt.addChild(parsePattern(grammar)); 545 546 token = parseToken(); 547 if (token != '}') 548 throw error(L.l("expected `}' at {0}", _cb)); 549 } 550 551 return elt; 552 } 553 554 557 private NameClassPattern parseNameClass(GrammarPattern grammar, 558 boolean isElement) 559 throws IOException , SAXException , RelaxException 560 { 561 NameClassPattern left = parseName(grammar, isElement); 562 ChoiceNamePattern choice = null; 563 564 int ch; 565 while ((ch = skipWhitespace()) == '|') { 566 NameClassPattern right = parseName(grammar, isElement); 567 568 if (choice == null) { 569 choice = new ChoiceNamePattern(); 570 choice.addNameChild(left); 571 } 572 573 choice.addNameChild(right); 574 } 575 576 _peek = ch; 577 578 if (choice != null) 579 return choice; 580 else 581 return left; 582 } 583 584 587 private NameClassPattern parseName(GrammarPattern grammar, boolean isElement) 588 throws IOException , SAXException , RelaxException 589 { 590 _cb.clear(); 591 592 int ch = skipWhitespace(); 593 if (ch == '(') { 594 NameClassPattern name = parseNameClass(grammar, isElement); 595 ch = skipWhitespace(); 596 if (ch != ')') 597 throw error(L.l("expected `)' at `{0}'", String.valueOf((char) ch))); 598 return name; 599 } 600 601 for (; XmlChar.isNameChar(ch); ch = read()) 602 _cb.append((char) ch); 603 604 if (ch == '*') 605 _cb.append('*'); 606 else 607 _peek = ch; 608 609 if (_cb.length() == 0) 610 throw error(L.l("expected name at `{0}'", String.valueOf((char) ch))); 611 612 String lexeme = _cb.toString(); 613 614 int p = lexeme.lastIndexOf(':'); 615 String ns = _ns; 616 String localName; 617 618 if (p < 0) { 619 localName = lexeme; 620 621 if (! isElement) 622 ns = null; 623 } 624 else { 625 String prefix = lexeme.substring(0, p); 626 localName = lexeme.substring(p + 1); 627 ns = _nsMap.get(prefix); 628 629 if (ns == null && localName.equals("*")) 630 throw error(L.l("`{0}' does not match a defined namespace.", lexeme)); 631 632 if (ns == null) { return new NamePattern(new QName(lexeme, "")); 634 } 635 } 636 637 if (lexeme.equals("*")) { 638 AnyNamePattern pattern = new AnyNamePattern(); 639 pattern.setExcept(parseExcept(grammar, isElement)); 640 return pattern; 641 } 642 else if (localName.equals("*")) { 643 NsNamePattern pattern = new NsNamePattern(lexeme, ns); 644 pattern.setExcept(parseExcept(grammar, isElement)); 645 return pattern; 646 } 647 else if ("".equals(ns) || ns == null) { 648 return new NamePattern(new QName(localName, "")); 649 } 650 else { 651 return new NamePattern(new QName(lexeme, ns)); 652 } 653 } 654 655 658 private NameClassPattern parseExcept(GrammarPattern grammar, 659 boolean isElement) 660 throws IOException , SAXException , RelaxException 661 { 662 int ch = skipWhitespace(); 663 664 if (ch != '-') { 665 _peek = ch; 666 return null; 667 } 668 669 return parseName(grammar, isElement); 670 } 671 672 675 private int parseToken() 676 throws IOException , SAXException , RelaxException 677 { 678 int ch = _peekToken; 679 680 if (ch >= 0) { 681 _peekToken = -1; 682 return ch; 683 } 684 685 ch = skipWhitespace(); 686 687 _cb.clear(); 688 689 if (ch < 0) { 690 _cb.append("end of file"); 691 return -1; 692 } 693 694 switch (ch) { 695 case '?': 696 case '*': 697 case '+': 698 case ',': 699 case '|': 700 case '&': 701 case '{': 702 case '}': 703 case '(': 704 case ')': 705 case '=': 706 _cb.append((char) ch); 707 return ch; 708 709 case '\"': 710 case '\'': 711 _peek = ch; 712 _lexeme = parseLiteral(); 713 return LITERAL; 714 715 case '#': 716 do { 717 ch = read(); 718 if (ch != '#') 719 throw error(L.l("expeced `#' at `{0}'", String.valueOf((char) ch))); 720 721 if (_cb.length() > 0) 722 _cb.append('\n'); 723 724 for (ch = read(); ch > 0 && ch != '\n' && ch != '\r'; ch = read()) 725 _cb.append((char) ch); 726 727 if (ch == '\r') { 728 ch = read(); 729 if (ch != '\n') 730 _peek = ch; 731 } 732 733 ch = read(); 734 } while (ch == '#'); 735 736 _peek = ch; 737 _lexeme = _cb.toString(); 738 return COMMENT; 739 740 default: 741 if (XmlChar.isNameStart(ch)) { 742 for (; XmlChar.isNameChar(ch); ch = read()) { 743 _cb.append((char) ch); 744 } 745 _peek = ch; 746 _lexeme = _cb.toString().intern(); 747 748 int token = _tokenMap.get(_lexeme); 749 750 if (token > 0) 751 return token; 752 else 753 return IDENTIFIER; 754 } 755 else { 756 throw error(L.l("Unknown character `{0}'", String.valueOf((char) ch))); 757 } 758 } 759 } 760 761 private String parseLiteral() 762 throws IOException , SAXException , RelaxException 763 { 764 int end = skipWhitespace(); 765 766 if (end != '"' && end != '\'') 767 throw error(L.l("expected `\"' at `{0}'", String.valueOf((char) end))); 768 769 _cb.clear(); 770 int ch = read(); 771 for (; ch >= 0 && ch != end; ch = read()) { 772 _cb.append((char) ch); 773 } 774 775 if (ch != end) 776 throw error(L.l("expected `\"' at `{0}'", String.valueOf((char) ch))); 777 778 return _cb.toString(); 779 } 780 781 782 private String parseIdentifier() 783 throws IOException , SAXException , RelaxException 784 { 785 int ch = skipWhitespace(); 786 787 if (! XmlChar.isNameChar(ch)) 788 throw error(L.l("expected identifier character at `{0}'", String.valueOf((char) ch))); 789 790 _cb.clear(); 791 for (; XmlChar.isNameChar(ch); ch = read()) { 792 _cb.append((char) ch); 793 } 794 795 return _cb.toString(); 796 } 797 798 801 private int skipWhitespace() 802 throws IOException , SAXException 803 { 804 int ch; 805 806 for (ch = read(); XmlChar.isWhitespace(ch); ch = read()) { 807 } 808 809 return ch; 810 } 811 812 815 private SAXException error(String msg) 816 { 817 return new SAXException (_filename + ":" + _line + ": " + msg); 818 } 819 820 823 public String getLocation() 824 { 825 return _filename + ":" + _line; 826 } 827 828 831 private int read() 832 throws IOException 833 { 834 int ch = _peek; 835 836 if (ch >= 0) { 837 _peek = -1; 838 return ch; 839 } 840 841 ch = _is.read(); 842 843 if (ch == '\n') 844 _line++; 845 else if (ch == '\r') { 846 _line++; 847 ch = _is.read(); 848 849 if (ch != '\n') { 850 _peek = ch; 851 ch = '\n'; 852 } 853 } 854 855 return ch; 856 } 857 858 static { 859 _tokenMap.put("namespace", NAMESPACE); 860 _tokenMap.put("default", DEFAULT); 861 862 _tokenMap.put("start", START); 863 _tokenMap.put("div", DIV); 864 865 _tokenMap.put("element", ELEMENT); 866 _tokenMap.put("attribute", ATTRIBUTE); 867 868 _tokenMap.put("text", TEXT); 869 _tokenMap.put("string", STRING); 870 _tokenMap.put("token", TOKEN); 871 872 _tokenMap.put("empty", EMPTY); 873 874 _tokenMap.put("include", INCLUDE); 875 } 876 } 877 | Popular Tags |