KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > antlr > works > ate > syntax > generic > ATESyntaxLexer


1 /*
2
3 [The "BSD licence"]
4 Copyright (c) 2005 Jean Bovet
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16 3. The name of the author may not be used to endorse or promote products
17 derived from this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 */

31
32 package org.antlr.works.ate.syntax.generic;
33
34 import org.antlr.works.ate.syntax.misc.ATELine;
35 import org.antlr.works.ate.syntax.misc.ATEToken;
36
37 import java.util.ArrayList JavaDoc;
38 import java.util.List JavaDoc;
39
40 public class ATESyntaxLexer {
41
42     public static final int TOKEN_SINGLE_QUOTE_STRING = 1;
43     public static final int TOKEN_DOUBLE_QUOTE_STRING = 2;
44     public static final int TOKEN_SINGLE_COMMENT = 3;
45     public static final int TOKEN_COMPLEX_COMMENT = 4;
46     public static final int TOKEN_ID = 5;
47     public static final int TOKEN_CHAR = 6;
48     public static final int TOKEN_LPAREN = 7;
49     public static final int TOKEN_RPAREN = 8;
50     public static final int TOKEN_LCURLY = 9;
51     public static final int TOKEN_RCURLY = 10;
52     public static final int TOKEN_LBRACK = 11;
53     public static final int TOKEN_RBRACK = 12;
54     public static final int TOKEN_COLON = 13;
55     public static final int TOKEN_SEMI = 14;
56     public static final int TOKEN_OTHER = 15;
57
58     protected List JavaDoc<ATEToken> tokens;
59     protected String JavaDoc text;
60     protected int position;
61
62     protected int lineNumber;
63     protected int lineIndex; // position of the line in characters
64
protected List JavaDoc<ATELine> lines;
65
66     /** True if the current character is a control character (that is preceeded by a \) */
67     protected boolean controlCharacter;
68
69     /** c0 and c1 are character cache for quick access to the current
70      * character (c0) and the next character (c1)
71      */

72     protected char c0;
73     protected char c1;
74
75     public ATESyntaxLexer() {
76         lines = new ArrayList JavaDoc<ATELine>();
77         tokens = new ArrayList JavaDoc<ATEToken>();
78     }
79
80     public List JavaDoc<ATEToken> getTokens() {
81         return tokens;
82     }
83
84     public List JavaDoc<ATELine> getLines() {
85         return lines;
86     }
87
88     public int getLineNumber() {
89         return lineNumber;
90     }
91
92     public void tokenize(String JavaDoc text) {
93         this.text = text;
94
95         position = -1;
96         lineNumber = 0;
97         lines.clear();
98         lines.add(new ATELine(0));
99
100         tokens.clear();
101         tokenize();
102     }
103
104     protected void tokenize() {
105         while(nextCharacter()) {
106             ATEToken token = customMatch();
107
108             if(token != null) {
109                 // custom match matched something
110
} else if(c0 == '\'')
111                 token = matchSingleQuoteString();
112             else if(c0 == '\"')
113                 token = matchDoubleQuoteString();
114             else if(c0 == '/' && c1 == '/')
115                 token = matchSingleComment();
116             else if(c0 == '/' && c1 == '*')
117                 token = matchComplexComment();
118             else if(isLetter())
119                 token = matchID();
120             else if(c0 == '(')
121                 token = createNewToken(TOKEN_LPAREN);
122             else if(c0 == ')')
123                 token = createNewToken(TOKEN_RPAREN);
124             else if(c0 == '{')
125                 token = createNewToken(TOKEN_LCURLY);
126             else if(c0 == '}')
127                 token = createNewToken(TOKEN_RCURLY);
128             else if(c0 == '[')
129                 token = createNewToken(TOKEN_LBRACK);
130             else if(c0 == ']')
131                 token = createNewToken(TOKEN_RBRACK);
132             else if(c0 == ':')
133                 token = createNewToken(TOKEN_COLON);
134             else if(c0 == ';')
135                 token = createNewToken(TOKEN_SEMI);
136             else if(!isWhitespace())
137                 token = createNewToken(TOKEN_CHAR);
138
139             addToken(token);
140         }
141     }
142
143     protected ATEToken customMatch() {
144         return null;
145     }
146
147     public void addToken(ATEToken token) {
148         if(token != null) {
149             token.index = tokens.size();
150             tokens.add(token);
151         }
152     }
153
154     protected ATEToken matchID() {
155         int sp = position;
156         while(isID(c1) && nextCharacter()) {
157         }
158         return createNewToken(TOKEN_ID, sp);
159     }
160
161     public ATEToken matchSingleQuoteString() {
162         int sp = position;
163         while(nextCharacter()) {
164             if((c0 == '\'' || matchNewLine()) && !controlCharacter) {
165                 return createNewToken(TOKEN_SINGLE_QUOTE_STRING, sp);
166             }
167         }
168         return null;
169     }
170
171     public ATEToken matchDoubleQuoteString() {
172         int sp = position;
173         while(nextCharacter()) {
174             if((c0 == '\"' || matchNewLine()) && !controlCharacter) {
175                 return createNewToken(TOKEN_DOUBLE_QUOTE_STRING, sp);
176             }
177         }
178         return null;
179     }
180
181     public ATEToken matchSingleComment() {
182         int sp = position;
183         while(nextCharacter()) {
184             if(matchNewLine()) {
185                 return createNewToken(TOKEN_SINGLE_COMMENT, sp);
186             }
187         }
188         return createNewToken(TOKEN_SINGLE_COMMENT, sp, position);
189     }
190
191     public ATEToken matchComplexComment() {
192         int sp = position;
193         while(nextCharacter()) {
194             if(c0 == '*' && c1 == '/') {
195                 // Don't forget to eat the next character ;-)
196
nextCharacter();
197                 return createNewToken(TOKEN_COMPLEX_COMMENT, sp, Math.min(position+1, text.length()));
198             }
199         }
200         // Complex comment terminates also at the end of the text
201
return createNewToken(TOKEN_COMPLEX_COMMENT, sp, position);
202     }
203
204     public boolean nextCharacter() {
205         boolean valid = false;
206         final int length = text.length();
207         controlCharacter = false;
208
209         c0 = c1 = 0;
210         position++;
211         if(position < length) {
212             // Skip control character
213
if(text.charAt(position) == '\\') {
214                 controlCharacter = true;
215                 position += 1;
216             }
217
218             valid = position < length;
219             if(valid) {
220                 c0 = text.charAt(position);
221                 if(position + 1 < length)
222                     c1 = text.charAt(position+1);
223             }
224
225             if(matchNewLine()) {
226                 lineNumber++;
227                 lineIndex = position+1;
228                 lines.add(new ATELine(lineIndex));
229             }
230         }
231         return valid;
232     }
233
234     public boolean matchNewLine() {
235         if(c0 == '\n') {
236             // Unix
237
return true;
238         } else if(c0 == '\r' && c1 == '\n') {
239             // Windows
240
return true;
241         } else if(c0 == '\r') {
242             // Mac
243
return true;
244         } else {
245             return false;
246         }
247     }
248
249     public boolean isWhitespace() {
250         return Character.isWhitespace(c0);
251     }
252
253     public boolean isLetter() {
254         return Character.isLetter(c0);
255     }
256
257     public boolean isLetterOrDigit() {
258         return isLetterOrDigit(c0);
259     }
260
261     public boolean isLetterOrDigit(char c) {
262         return Character.isLetterOrDigit(c);
263     }
264
265     public boolean isID(char c) {
266         if(Character.isLetterOrDigit(c))
267             return true;
268
269         return c == '_' || c == '$';
270     }
271
272     public ATEToken createNewToken(int type) {
273         return createNewToken(type, position);
274     }
275
276     public ATEToken createNewToken(int type, int start) {
277         return createNewToken(type, start, position+1);
278     }
279
280     public ATEToken createNewToken(int type, int start, int end) {
281         return createNewToken(type, start, end, lineNumber, lineNumber, lineIndex, lineIndex);
282     }
283
284     public ATEToken createNewToken(int type, int start, int end,
285                                    int startLineNumber, int endLineNumber,
286                                    int startLineIndex, int endLineIndex) {
287         return new ATEToken(type, start, end, startLineNumber, endLineNumber, startLineIndex, endLineIndex, text);
288     }
289
290 }
291
Popular Tags