KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > fri > patterns > interpreter > parsergenerator > Token


1 package fri.patterns.interpreter.parsergenerator;
2
3 /**
4     Lexer-Parser communication struct. Utility methods defining EPSILON and terminals.
5     Definition of all global constants for parsergenerator packages.
6     <p>
7     This class defines special token identifiers: "token" (<i>Token.TOKEN</i>) and
8     "ignored" (<i>Token.IGNORED</i>), which are needed when specifiying a syntax with
9     mixed parser and lexer rules.
10     <p>
11     This class defines special symbols needed to define character sets with <i>Token.UPTO</i>
12     (e.g. A..Z) and intersections with <i>Token.BUTNOT</i> (e.g. `char` - `newline`).
13     
14     @author (c) 2000, Fritz Ritzberger
15 */

16
17 public class Token
18 {
19     /** This special token symbol means "end of input" to the Parser, which stops calling <i>getNextToken()</i> then. */
20     public static final String JavaDoc EPSILON = "\"EoI\"";
21
22     /** Delimiter for literal terminals. */
23     public static final char STRING_QUOTE = '"';
24
25     /** Delimiter for literal terminal. */
26     public static final char CHAR_QUOTE = '\'';
27
28     /** Delimiter for lexer rules (terminal). */
29     public static final char COMMAND_QUOTE = '`';
30
31     /** Symbol used to define lexer character sets, e.g. <i>a..z</i>. */
32     public static final String JavaDoc UPTO = "..";
33
34     /** Symbol used to define lexer character set intersections, e.g. <i>char - "/*" - "//"</i>. */
35     public static final String JavaDoc BUTNOT = "-";
36
37     /** Reserved identifier that marks nonterminals the lexer should deliver to parser. */
38     public static final String JavaDoc TOKEN = "token";
39     
40     /** Reserved identifier that marks tokens not to deliver to parser, e.g. <i>ignored ::= spaces ;</i>. */
41     public static final String JavaDoc IGNORED = "ignored";
42     
43     /** The character used to mark artificial nonterminal (e.g. made from "prolog?"). Every artificial rule starts with it. */
44     public static final String JavaDoc ARTIFICIAL_NONTERMINAL_START_CHARACTER = "_";
45     
46     /** The syntax symbol this Token represents. */
47     public final String JavaDoc symbol;
48     /** The text that was scanned for this Token. */
49     public final Object JavaDoc text;
50     /** The start and end Address of this Token. */
51     public final Range range;
52
53
54     /** Address stores input line number (1-n), column (0-n) and character/byte offset (0-n). */
55     public static class Address implements Comparable JavaDoc
56     {
57         /** The line number (1-n). */
58         public final int line;
59         /** The column number (0-n). */
60         public final int column;
61         /** The character/byte offset (0-n). */
62         public final int offset;
63         
64         public Address() {
65             this(1, 0, 0);
66         }
67         public Address(int line, int column, int offset) {
68             this.line = line;
69             this.column = column;
70             this.offset = offset;
71         }
72         public String JavaDoc toString() {
73             return line+"/"+column;
74         }
75         public boolean equals(Object JavaDoc o) {
76             return offset == ((Address) o).offset;
77         }
78         public int hashCode() {
79             return offset;
80         }
81         public int compareTo(Object JavaDoc o) {
82             return offset - ((Address) o).offset;
83         }
84     }
85
86     /** Range stores start and end Address of a token. */
87     public static class Range implements Comparable JavaDoc
88     {
89         /** The start Address of this Token, pointing to the first character/byte. */
90         public final Address start;
91         /** The end Address of this Token, one after the last character/byte. */
92         public final Address end;
93         
94         public Range(Address start, Address end) {
95             this.start = start != null ? start : new Address();
96             this.end = end != null ? end : new Address();
97         }
98         public boolean equals(Object JavaDoc o) {
99             return start.equals(((Range) o).start) && end.equals(((Range) o).end);
100         }
101         public int hashCode() {
102             return start.hashCode() + end.hashCode();
103         }
104         public String JavaDoc toString() {
105             return start+"-"+end;
106         }
107         public int compareTo(Object JavaDoc o) {
108             return start.compareTo(((Range) o).start) + end.compareTo(((Range) o).end);
109         }
110     }
111
112
113     public Token(String JavaDoc symbol, Object JavaDoc text, Range range) {
114         this.symbol = symbol;
115         this.text = text;
116         this.range = range;
117     }
118
119     /**
120         Epsilon means end of input, EOF, no more bytes available.
121         @return true if passed token is the EPSILON-symbol.
122     */

123     public static boolean isEpsilon(Token token) {
124         return isEpsilon(token.symbol);
125     }
126
127     /**
128         Epsilon means end of input, no more bytes available.
129         @return true if Token symbol not null and is the EPSILON-symbol.
130     */

131     public static boolean isEpsilon(String JavaDoc symbol) {
132         return symbol != null && symbol == EPSILON;
133     }
134
135     /**
136         Distinction of 'terminals' and nonterminals:
137         terminals are either starting with digit or are enclosed in quotes '"` or equal to EPSILON.
138     */

139     public static boolean isTerminal(String JavaDoc symbol) {
140         char c = symbol.charAt(0);
141         return
142                 c == STRING_QUOTE ||
143                 c == CHAR_QUOTE ||
144                 c == COMMAND_QUOTE ||
145                 Character.isDigit(c) ||
146                 Token.isEpsilon(symbol);
147     }
148
149 }
Popular Tags