KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > relaxng > CompactParser


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  *
23  * Free Software Foundation, Inc.
24  * 59 Temple Place, Suite 330
25  * Boston, MA 02111-1307 USA
26  *
27  * @author Scott Ferguson
28  */

29
30 package com.caucho.relaxng;
31
32 import com.caucho.relaxng.pattern.*;
33 import com.caucho.util.CharBuffer;
34 import com.caucho.util.IntMap;
35 import com.caucho.util.L10N;
36 import com.caucho.vfs.Path;
37 import com.caucho.vfs.ReadStream;
38 import com.caucho.vfs.Vfs;
39 import com.caucho.xml.QName;
40 import com.caucho.xml.XmlChar;
41
42 import org.xml.sax.InputSource JavaDoc;
43 import org.xml.sax.SAXException JavaDoc;
44
45 import java.io.IOException JavaDoc;
46 import java.io.InputStream JavaDoc;
47 import java.util.HashMap JavaDoc;
48 import java.util.logging.Level JavaDoc;
49 import java.util.logging.Logger JavaDoc;
50
51 /**
52  * Builder for the relax.
53  */

54 public class CompactParser {
55   private static final L10N L = new L10N(CompactParser.class);
56   private static final Logger JavaDoc log
57     = Logger.getLogger(CompactParser.class.getName());
58
59   private static final int IDENTIFIER = 256;
60   
61   private static final int NAMESPACE = IDENTIFIER + 1;
62   private static final int DEFAULT = NAMESPACE + 1;
63   
64   private static final int START = DEFAULT + 1;
65   private static final int DIV = START + 1;
66   private static final int INCLUDE = DIV + 1;
67   
68   private static final int ELEMENT = INCLUDE + 1;
69   private static final int ATTRIBUTE = ELEMENT + 1;
70   
71   private static final int TEXT = ATTRIBUTE + 1;
72   private static final int STRING = TEXT + 1;
73   private static final int TOKEN = STRING + 1;
74   private static final int LITERAL = TOKEN + 1;
75   
76   private static final int EMPTY = LITERAL + 1;
77   
78   private static final int COMMENT = EMPTY + 1;
79
80   private static final IntMap _tokenMap = new IntMap();
81
82   private GrammarPattern _grammar;
83   private Pattern _pattern;
84
85   private String JavaDoc _ns = "";
86   private HashMap JavaDoc<String JavaDoc,String JavaDoc> _nsMap;
87
88   private Path _pwd;
89   private ReadStream _is;
90   private String JavaDoc _filename;
91   private int _line;
92
93   private int _peek = -1;
94   private int _peekToken = -1;
95
96   private CharBuffer _cb = new CharBuffer();
97   private String JavaDoc _lexeme;
98
99   private int _generatedId;
100
101   CompactParser()
102   {
103   }
104
105   /**
106    * Gets the root pattern.
107    */

108   public GrammarPattern getGrammar()
109   {
110     return _grammar;
111   }
112
113   public void setGeneratedId(int id)
114   {
115     _generatedId = id;
116   }
117
118   public String JavaDoc generateId()
119   {
120     return "__caucho_" + _generatedId++;
121   }
122
123   /**
124    * Parses the relax file.
125    */

126   public void parse(InputSource JavaDoc source)
127     throws SAXException JavaDoc, IOException JavaDoc, RelaxException
128   {
129     InputStream JavaDoc is = source.getByteStream();
130
131     _pwd = null;
132
133     if (is instanceof ReadStream) {
134       _is = (ReadStream) is;
135       _filename = _is.getUserPath();
136       _pwd = _is.getPath().getParent();
137     }
138     if (is != null)
139       _is = Vfs.openRead(is);
140     else
141       _is = Vfs.openRead(source.getSystemId());
142
143     if (_filename == null)
144       _filename = source.getSystemId();
145     _line = 1;
146
147     if (_pwd == null)
148       _pwd = Vfs.lookup(_filename).getParent();
149
150     try {
151       parse();
152     } catch (RelaxException e) {
153       log.log(Level.FINER, e.toString(), e);
154       
155       // xml/1196
156
//throw new SAXException(_filename + ":" + _line + ": " + e.getMessage(), e);
157
throw new SAXException JavaDoc(_filename + ":" + _line + ": " + e.getMessage());
158     } finally {
159       _is.close();
160     }
161   }
162
163   /**
164    * Internal parser.
165    */

166   private void parse()
167     throws SAXException JavaDoc, IOException JavaDoc, RelaxException
168   {
169     _grammar = new GrammarPattern();
170     _nsMap = new HashMap JavaDoc<String JavaDoc,String JavaDoc>();
171
172     parseDeclarations();
173     
174     int token = parseToken();
175     _peekToken = token;
176
177     switch (token) {
178     case START:
179     case IDENTIFIER:
180     case INCLUDE:
181       parseGrammar(_grammar);
182       break;
183
184     case COMMENT:
185       break;
186
187     default:
188       _grammar.setStart(parsePattern(_grammar));
189       break;
190     }
191   }
192
193   /**
194    * Parses declarations.
195    */

196   private void parseDeclarations()
197     throws SAXException JavaDoc, IOException JavaDoc, RelaxException
198   {
199     while (true) {
200       int token = parseToken();
201
202       _peekToken = token;
203       
204       switch (token) {
205       case DEFAULT:
206       case NAMESPACE:
207         parseNamespace();
208         break;
209         
210       case COMMENT:
211         break;
212
213       default:
214         return;
215       }
216     }
217   }
218   
219   /**
220    * Parses the namespace declaration
221    */

222   private void parseNamespace()
223     throws SAXException JavaDoc, IOException JavaDoc, RelaxException
224   {
225     boolean isDefault = false;
226     int token = parseToken();
227
228     if (token == DEFAULT) {
229       isDefault = true;
230       token = parseToken();
231     }
232
233     if (token != NAMESPACE)
234       throw error(L.l("expected `namespace' at {0}", _cb));
235       
236     token = parseToken();
237
238     if (token != IDENTIFIER)
239       throw error(L.l("expected identifier at {0}", _cb));
240
241     String JavaDoc prefix = _lexeme;
242
243     token = parseToken();
244     
245     if (token != '=')
246       throw error(L.l("expected `=' at {0}", _cb));
247
248     String JavaDoc value = parseLiteral();
249
250     if (isDefault)
251       _ns = value;
252
253     _nsMap.put(prefix, value);
254   }
255
256   /**
257    * Parses top-level grammar stuff.
258    */

259   private void parseGrammar(GrammarPattern grammar)
260     throws IOException JavaDoc, SAXException JavaDoc, RelaxException, RelaxException
261   {
262     while (true) {
263       int token = parseToken();
264       Pattern pattern;
265
266       switch (token) {
267       case -1:
268         return;
269
270       case COMMENT:
271         break;
272
273       case START:
274         int next = parseToken();
275         if (next == '=')
276           grammar.setStart(parsePattern(grammar));
277         else
278           throw error(L.l("expected `=' at {0}", _cb));
279         break;
280
281       case IDENTIFIER:
282         String JavaDoc name = _lexeme;
283         Pattern oldPattern = grammar.getDefinition(name);
284         pattern = new GroupPattern();
285         next = parseToken();
286         if (next == '=') {
287           grammar.setDefinition(name, parsePattern(grammar));
288         }
289         else
290           throw error(L.l("expected `=' at {0}", _cb));
291         break;
292
293       case INCLUDE:
294         parseInclude(grammar);
295         break;
296
297       default:
298         throw error(L.l("unexpected token {0}", _cb));
299       }
300     }
301   }
302         
303   private void parseInclude(GrammarPattern grammar)
304     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
305   {
306     String JavaDoc uri = parseLiteral();
307
308     Path sub = _pwd.lookup(uri);
309
310     ReadStream is = null;
311     
312     try {
313       is = sub.openRead();
314
315       InputSource JavaDoc source = new InputSource JavaDoc(is);
316       source.setSystemId(uri);
317
318       CompactParser parser = new CompactParser();
319       parser.setGeneratedId(_generatedId);
320       parser.parse(source);
321
322       GrammarPattern subGrammar = parser.getGrammar();
323
324       _generatedId = parser._generatedId;
325
326       grammar.mergeInclude(subGrammar);
327     } finally {
328       if (is != null)
329         is.close();
330     }
331   }
332         
333   /**
334    * Parses a pattern.
335    */

336   private Pattern parsePattern(GrammarPattern grammar)
337     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
338   {
339     Pattern pattern = parseTerm(grammar);
340
341     int token = parseToken();
342
343     switch (token) {
344     case '|':
345       return parseChoicePattern(grammar, pattern);
346     case '&':
347       return parseInterleavePattern(grammar, pattern);
348     case ',':
349       return parseGroupPattern(grammar, pattern);
350
351     default:
352       _peekToken = token;
353       return pattern;
354     }
355   }
356
357   /**
358    * Parses a interleave pattern.
359    */

360   private Pattern parseInterleavePattern(GrammarPattern grammar,
361                                          Pattern pattern)
362     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
363   {
364     int token;
365
366     do {
367       if (! (pattern instanceof InterleavePattern)) {
368         Pattern child = pattern;
369         pattern = new InterleavePattern();
370         pattern.addChild(child);
371       }
372       
373       pattern.addChild(parseTerm(grammar));
374     } while ((token = parseToken()) == '&');
375
376     _peekToken = token;
377
378     return pattern;
379   }
380
381   /**
382    * Parses a group pattern.
383    */

384   private Pattern parseGroupPattern(GrammarPattern grammar, Pattern pattern)
385     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
386   {
387     int token;
388
389     do {
390       if (! (pattern instanceof GroupPattern)) {
391         Pattern child = pattern;
392         pattern = new GroupPattern();
393         pattern.addChild(child);
394       }
395       
396       pattern.addChild(parseTerm(grammar));
397     } while ((token = parseToken()) == ',');
398
399     _peekToken = token;
400
401     return pattern;
402   }
403
404   /**
405    * Parses a choice pattern.
406    */

407   private Pattern parseChoicePattern(GrammarPattern grammar, Pattern pattern)
408     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
409   {
410     int token;
411
412     do {
413       if (! (pattern instanceof ChoicePattern)) {
414         Pattern child = pattern;
415         pattern = new ChoicePattern();
416         pattern.addChild(child);
417       }
418       
419       pattern.addChild(parseTerm(grammar));
420     } while ((token = parseToken()) == '|');
421
422     _peekToken = token;
423
424     return pattern;
425   }
426
427   /**
428    * Parses a term
429    */

430   private Pattern parseTerm(GrammarPattern grammar)
431     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
432   {
433     int token = parseToken();
434
435     while (token == COMMENT) {
436       token = parseToken();
437     }
438
439     Pattern pattern;
440     switch (token) {
441     case EMPTY:
442       return new EmptyPattern();
443       
444     case TEXT:
445       return new TextPattern();
446       
447     case STRING:
448     case LITERAL:
449       return new DataPattern("string");
450       
451     case TOKEN:
452       return new DataPattern("token");
453       
454     case ELEMENT:
455       pattern = parseElement(grammar);
456       break;
457       
458     case ATTRIBUTE:
459       pattern = parseAttribute(grammar);
460       break;
461
462     case '(':
463       pattern = parsePattern(grammar);
464
465       token = parseToken();
466       if (token != ')')
467         throw error(L.l("expected ')' at {0}", _cb));
468       break;
469
470     case IDENTIFIER:
471       pattern = new RefPattern(_grammar, _lexeme);
472       pattern.setLocation(getLocation());
473       break;
474
475     default:
476       throw error(L.l("unknown token {0}", _cb));
477     }
478
479     token = parseToken();
480
481     if (token == '*')
482       pattern = new ZeroOrMorePattern(pattern);
483     else if (token == '?') {
484       ChoicePattern choice = new ChoicePattern();
485       choice.addChild(new EmptyPattern());
486       choice.addChild(pattern);
487       return choice;
488     }
489     else if (token == '+') {
490       GroupPattern group = new GroupPattern();
491       group.addChild(pattern);
492       group.addChild(new ZeroOrMorePattern(pattern));
493       return group;
494     }
495     else {
496       _peekToken = token;
497     }
498
499     return pattern;
500   }
501
502   /**
503    * Parses an element.
504    */

505   private Pattern parseElement(GrammarPattern grammar)
506     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
507   {
508     String JavaDoc id = generateId();
509     ElementPattern elt = new ElementPattern(id);
510     grammar.setDefinition(id, elt);
511     
512     elt.addNameChild(parseNameClass(grammar, true));
513
514     int token = parseToken();
515     if (token == '{') {
516       elt.addChild(parsePattern(grammar));
517
518       token = parseToken();
519       if (token != '}')
520         throw error(L.l("expected `}' at {0}", _cb));
521     }
522
523     return elt;
524   }
525
526   /**
527    * Parses an element.
528    */

529   private Pattern parseAttribute(GrammarPattern grammar)
530     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
531   {
532     AttributePattern elt = new AttributePattern();
533     elt.addNameChild(parseNameClass(grammar, false));
534
535     int token = parseToken();
536     if (token == '{') {
537       token = parseToken();
538
539       if (token == '}')
540         return elt;
541
542       _peekToken = token;
543       
544       elt.addChild(parsePattern(grammar));
545
546       token = parseToken();
547       if (token != '}')
548         throw error(L.l("expected `}' at {0}", _cb));
549     }
550
551     return elt;
552   }
553
554   /**
555    * Parses a name class.
556    */

557   private NameClassPattern parseNameClass(GrammarPattern grammar,
558                                           boolean isElement)
559     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
560   {
561     NameClassPattern left = parseName(grammar, isElement);
562     ChoiceNamePattern choice = null;
563
564     int ch;
565     while ((ch = skipWhitespace()) == '|') {
566       NameClassPattern right = parseName(grammar, isElement);
567
568       if (choice == null) {
569         choice = new ChoiceNamePattern();
570         choice.addNameChild(left);
571       }
572
573       choice.addNameChild(right);
574     }
575
576     _peek = ch;
577
578     if (choice != null)
579       return choice;
580     else
581       return left;
582   }
583
584   /**
585    * Parses a name class.
586    */

587   private NameClassPattern parseName(GrammarPattern grammar, boolean isElement)
588     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
589   {
590     _cb.clear();
591     
592     int ch = skipWhitespace();
593     if (ch == '(') {
594       NameClassPattern name = parseNameClass(grammar, isElement);
595       ch = skipWhitespace();
596       if (ch != ')')
597         throw error(L.l("expected `)' at `{0}'", String.valueOf((char) ch)));
598       return name;
599     }
600     
601     for (; XmlChar.isNameChar(ch); ch = read())
602       _cb.append((char) ch);
603
604     if (ch == '*')
605       _cb.append('*');
606     else
607       _peek = ch;
608
609     if (_cb.length() == 0)
610       throw error(L.l("expected name at `{0}'", String.valueOf((char) ch)));
611
612     String JavaDoc lexeme = _cb.toString();
613
614     int p = lexeme.lastIndexOf(':');
615     String JavaDoc ns = _ns;
616     String JavaDoc localName;
617     
618     if (p < 0) {
619       localName = lexeme;
620
621       if (! isElement)
622         ns = null;
623     }
624     else {
625       String JavaDoc prefix = lexeme.substring(0, p);
626       localName = lexeme.substring(p + 1);
627       ns = _nsMap.get(prefix);
628
629       if (ns == null && localName.equals("*"))
630         throw error(L.l("`{0}' does not match a defined namespace.", lexeme));
631       
632       if (ns == null) {// && isElement) {
633
return new NamePattern(new QName(lexeme, ""));
634       }
635     }
636
637     if (lexeme.equals("*")) {
638       AnyNamePattern pattern = new AnyNamePattern();
639       pattern.setExcept(parseExcept(grammar, isElement));
640       return pattern;
641     }
642     else if (localName.equals("*")) {
643       NsNamePattern pattern = new NsNamePattern(lexeme, ns);
644       pattern.setExcept(parseExcept(grammar, isElement));
645       return pattern;
646     }
647     else if ("".equals(ns) || ns == null) {
648       return new NamePattern(new QName(localName, ""));
649     }
650     else {
651       return new NamePattern(new QName(lexeme, ns));
652     }
653   }
654
655   /**
656    * Parses a name class.
657    */

658   private NameClassPattern parseExcept(GrammarPattern grammar,
659                                        boolean isElement)
660     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
661   {
662     int ch = skipWhitespace();
663
664     if (ch != '-') {
665       _peek = ch;
666       return null;
667     }
668
669     return parseName(grammar, isElement);
670   }
671
672   /**
673    * Parses a token.
674    */

675   private int parseToken()
676     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
677   {
678     int ch = _peekToken;
679
680     if (ch >= 0) {
681       _peekToken = -1;
682       return ch;
683     }
684     
685     ch = skipWhitespace();
686
687     _cb.clear();
688       
689     if (ch < 0) {
690       _cb.append("end of file");
691       return -1;
692     }
693
694     switch (ch) {
695     case '?':
696     case '*':
697     case '+':
698     case ',':
699     case '|':
700     case '&':
701     case '{':
702     case '}':
703     case '(':
704     case ')':
705     case '=':
706       _cb.append((char) ch);
707       return ch;
708
709     case '\"':
710     case '\'':
711       _peek = ch;
712       _lexeme = parseLiteral();
713       return LITERAL;
714
715     case '#':
716       do {
717         ch = read();
718         if (ch != '#')
719           throw error(L.l("expeced `#' at `{0}'", String.valueOf((char) ch)));
720         
721         if (_cb.length() > 0)
722           _cb.append('\n');
723
724         for (ch = read(); ch > 0 && ch != '\n' && ch != '\r'; ch = read())
725           _cb.append((char) ch);
726
727         if (ch == '\r') {
728           ch = read();
729           if (ch != '\n')
730             _peek = ch;
731         }
732
733         ch = read();
734       } while (ch == '#');
735
736       _peek = ch;
737       _lexeme = _cb.toString();
738       return COMMENT;
739
740     default:
741       if (XmlChar.isNameStart(ch)) {
742         for (; XmlChar.isNameChar(ch); ch = read()) {
743           _cb.append((char) ch);
744         }
745         _peek = ch;
746         _lexeme = _cb.toString().intern();
747
748         int token = _tokenMap.get(_lexeme);
749
750         if (token > 0)
751           return token;
752         else
753           return IDENTIFIER;
754       }
755       else {
756         throw error(L.l("Unknown character `{0}'", String.valueOf((char) ch)));
757       }
758     }
759   }
760
761   private String JavaDoc parseLiteral()
762     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
763   {
764     int end = skipWhitespace();
765
766     if (end != '"' && end != '\'')
767       throw error(L.l("expected `\"' at `{0}'", String.valueOf((char) end)));
768
769     _cb.clear();
770     int ch = read();
771     for (; ch >= 0 && ch != end; ch = read()) {
772       _cb.append((char) ch);
773     }
774
775     if (ch != end)
776       throw error(L.l("expected `\"' at `{0}'", String.valueOf((char) ch)));
777
778     return _cb.toString();
779   }
780
781
782   private String JavaDoc parseIdentifier()
783     throws IOException JavaDoc, SAXException JavaDoc, RelaxException
784   {
785     int ch = skipWhitespace();
786
787     if (! XmlChar.isNameChar(ch))
788       throw error(L.l("expected identifier character at `{0}'", String.valueOf((char) ch)));
789
790     _cb.clear();
791     for (; XmlChar.isNameChar(ch); ch = read()) {
792       _cb.append((char) ch);
793     }
794
795     return _cb.toString();
796   }
797
798   /**
799    * Parses whitespace.
800    */

801   private int skipWhitespace()
802     throws IOException JavaDoc, SAXException JavaDoc
803   {
804     int ch;
805       
806     for (ch = read(); XmlChar.isWhitespace(ch); ch = read()) {
807     }
808
809     return ch;
810   }
811
812   /**
813    * Creates an error.
814    */

815   private SAXException JavaDoc error(String JavaDoc msg)
816   {
817     return new SAXException JavaDoc(_filename + ":" + _line + ": " + msg);
818   }
819
820   /**
821    * Returns the current location string.
822    */

823   public String JavaDoc getLocation()
824   {
825     return _filename + ":" + _line;
826   }
827
828   /**
829    * Reads a character.
830    */

831   private int read()
832     throws IOException JavaDoc
833   {
834     int ch = _peek;
835     
836     if (ch >= 0) {
837       _peek = -1;
838       return ch;
839     }
840       
841     ch = _is.read();
842
843     if (ch == '\n')
844       _line++;
845     else if (ch == '\r') {
846       _line++;
847       ch = _is.read();
848       
849       if (ch != '\n') {
850         _peek = ch;
851         ch = '\n';
852       }
853     }
854
855     return ch;
856   }
857
858   static {
859     _tokenMap.put("namespace", NAMESPACE);
860     _tokenMap.put("default", DEFAULT);
861     
862     _tokenMap.put("start", START);
863     _tokenMap.put("div", DIV);
864     
865     _tokenMap.put("element", ELEMENT);
866     _tokenMap.put("attribute", ATTRIBUTE);
867     
868     _tokenMap.put("text", TEXT);
869     _tokenMap.put("string", STRING);
870     _tokenMap.put("token", TOKEN);
871     
872     _tokenMap.put("empty", EMPTY);
873     
874     _tokenMap.put("include", INCLUDE);
875   }
876 }
877
Popular Tags