KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > fri > patterns > interpreter > parsergenerator > examples > XmlLexer


1 package fri.patterns.interpreter.parsergenerator.examples;
2
3 import java.util.*;
4 import java.io.*;
5 import fri.util.TimeStopper;
6 import fri.util.io.UnicodeReader;
7 import fri.patterns.interpreter.parsergenerator.builder.SerializedLexer;
8 import fri.patterns.interpreter.parsergenerator.lexer.LexerImpl;
9 import fri.patterns.interpreter.parsergenerator.lexer.LexerSemantic;
10 import fri.patterns.interpreter.parsergenerator.lexer.ResultTree;
11 import fri.patterns.interpreter.parsergenerator.syntax.Rule;
12
13 /**
14     Example XML lexer. Not event-driven like SAX, but good for DOM-building.
15     
16     @author Fritz Ritzberger, 2003
17 */

18
19 public class XmlLexer
20 {
21     public static void main(String JavaDoc [] args)
22         throws Exception JavaDoc
23     {
24         if (args.length <= 0) {
25             System.err.println("SYNTAX: java "+XmlLexer.class.getName()+" file.xml [file.xml ...]");
26             System.err.println(" Example XML Parser");
27             System.exit(1);
28         }
29
30         // Standalone lexer as top-down parser.
31
TimeStopper timer = new TimeStopper();
32         // Building lexer from scratch takes 840 millis. Parsing takes 60 millis for a 70 line XML file.
33

34         // read the syntax from EBNF file
35
Reader syntaxInput = new InputStreamReader(XmlLexer.class.getResourceAsStream("Xml.syntax"));
36         boolean PRODUCTION = false; // always build from scratch at development time
37
LexerImpl lexer = (LexerImpl) new SerializedLexer(PRODUCTION).get(syntaxInput, "Xml");
38
39         System.err.println("time to build XML file parser was "+timer.getInterval());
40
41         for (int i = 0; i < args.length; i++) {
42             String JavaDoc parseFile = args[i];
43             Reader parseInput = new UnicodeReader(new FileInputStream(parseFile));
44             lexer.setInput(parseInput);
45             
46             System.err.println("======================== Parsing: "+parseFile+" ========================");
47             boolean result = lexer.lex(new PrintXmlLexerSemantic());
48             System.err.println("========================================================");
49             
50             System.err.println("Lexing took "+timer.getInterval()+" millis.");
51             System.err.println("Result was: "+result);
52         }
53     }
54
55     
56
57     static class PrintXmlLexerSemantic implements LexerSemantic
58     {
59         /**
60          * Receives evaluated lexer ruels and their result.
61          */

62         public void ruleEvaluated(Rule rule, ResultTree resultTree) {
63             System.out.println("Nonterminal="+rule.getNonterminal()+", range("+resultTree.getRange()+"), Input=\""+resultTree.toString()+"\"");
64         }
65     
66         /**
67          * Returns a Set of nonterminal Strings whose rule evaluations the Lexer should
68          * report to this semantic. Could return null to receive all rules.
69          * For XML only a subset of all tokens in the EBNF is needed. There is no other
70          * way than to hardcode those nonterminal names here. When using the SourceGenerator
71          * on the XML EBNF, the Strings could be imported from generated source to be consistent.
72          */

73         public Set getWantedNonterminals() {
74             Set considered = new HashSet();
75             considered.add("Name");
76             considered.add("Nmtoken");
77             considered.add("EntityValue");
78             considered.add("AttValue");
79             considered.add("SystemLiteral");
80             considered.add("PubidLiteral");
81             considered.add("CharData");
82             considered.add("Comment");
83             considered.add("VersionNum");
84             considered.add("PITargetContent");
85             considered.add("PITarget");
86             considered.add("CData");
87             considered.add("doctypedecl");
88             considered.add("SDDecl");
89             considered.add("STag");
90             considered.add("Attribute");
91             considered.add("ETag");
92             considered.add("EmptyElemTag");
93             considered.add("elementdecl");
94             considered.add("contentspec");
95             considered.add("cp");
96             considered.add("ChoiceList");
97             considered.add("SeqListOpt");
98             considered.add("Mixed");
99             considered.add("AttDef");
100             considered.add("StringType");
101             considered.add("TokenizedType");
102             considered.add("NotationType");
103             considered.add("Enumeration");
104             considered.add("DefaultDecl");
105             considered.add("CharRef");
106             considered.add("EntityRef");
107             considered.add("PEReference");
108             considered.add("GEDecl");
109             considered.add("PEDecl");
110             considered.add("EntityDef");
111             considered.add("PEDef");
112             considered.add("ExternalID");
113             considered.add("NDataDecl");
114             considered.add("EncName");
115             considered.add("NotationDecl");
116             return considered;
117         }
118         
119         public Set getIgnoredNonterminals() {
120             return null;
121         }
122
123     }
124
125 }
126
Popular Tags