KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > ruby > lexer > RubyStringLexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19 package org.netbeans.modules.ruby.lexer;
20
21 import org.netbeans.modules.ruby.lexer.RubyStringTokenId;
22 import org.netbeans.api.lexer.Token;
23 import org.netbeans.spi.lexer.Lexer;
24 import org.netbeans.spi.lexer.LexerInput;
25 import org.netbeans.spi.lexer.LexerRestartInfo;
26 import org.netbeans.spi.lexer.TokenFactory;
27
28
29 /**
30  * Lexical analyzer for Ruby quoted Strings
31  *
32  * @author Tor Norbye
33  * @version 1.00
34  */

35 public final class RubyStringLexer implements Lexer<RubyStringTokenId> {
36     private static final int EOF = LexerInput.EOF;
37     private LexerInput input;
38     private TokenFactory<RubyStringTokenId> tokenFactory;
39     private boolean substituting;
40
41     /**
42      * A Lexer for ruby strings
43      * @param substituting If true, handle substitution rules for double quoted strings, otherwise
44      * single quoted strings.
45      */

46     public RubyStringLexer(LexerRestartInfo<RubyStringTokenId> info, boolean substituting) {
47         this.input = info.input();
48         this.tokenFactory = info.tokenFactory();
49         this.substituting = substituting;
50         assert (info.state() == null); // passed argument always null
51
}
52
53     public Object JavaDoc state() {
54         return null;
55     }
56
57     public Token<RubyStringTokenId> nextToken() {
58         return substituting ? nextTokenDoubleQuotes() : nextTokenSingleQuotes();
59     }
60
61     public Token<RubyStringTokenId> nextTokenSingleQuotes() {
62         while (true) {
63             int ch = input.read();
64
65             switch (ch) {
66             case EOF:
67
68                 if (input.readLength() > 0) {
69                     return token(RubyStringTokenId.STRING_TEXT);
70                 } else {
71                     return null;
72                 }
73
74             case '\\':
75
76                 if (input.readLength() > 1) { // already read some text
77
input.backup(1);
78
79                     return tokenFactory.createToken(RubyStringTokenId.STRING_TEXT,
80                         input.readLength());
81                 }
82
83                 switch (ch = input.read()) {
84                 case '\\':
85                 case '"':
86                     return token(RubyStringTokenId.STRING_ESCAPE);
87
88                 default:
89                     return token(RubyStringTokenId.STRING_INVALID);
90                 }
91             }
92         }
93     }
94
95     public Token<RubyStringTokenId> nextTokenDoubleQuotes() {
96         while (true) {
97             int ch = input.read();
98
99             switch (ch) {
100             case EOF:
101
102                 if (input.readLength() > 0) {
103                     return token(RubyStringTokenId.STRING_TEXT);
104                 } else {
105                     return null;
106                 }
107
108             // #{code} = Value of code
109
case '#':
110
111                 int f = input.read();
112                 if (f == '{') {
113                 //if (input.read() == '{') {
114
if (input.readLength() > 2) { // already read some text
115
input.backup(2);
116
117                         return tokenFactory.createToken(RubyStringTokenId.STRING_TEXT,
118                             input.readLength());
119                     }
120
121                     // Look for matching }...
122
// TODO: Figure out if I need to do anything else here,
123
// e.g. avoid escapes and such
124
int c;
125
126                     while (true) {
127                         c = input.read();
128
129                         if ((c == EOF) || (c == '}')) {
130                             break;
131                         }
132                     }
133
134                     return token(RubyStringTokenId.EMBEDDED_RUBY);
135                 } else {
136                     continue;
137                 }
138
139             case '\\':
140
141                 if (input.readLength() > 1) { // already read some text
142
input.backup(1);
143
144                     return tokenFactory.createToken(RubyStringTokenId.STRING_TEXT,
145                         input.readLength());
146                 }
147
148                 switch (ch = input.read()) {
149                 // In general, \x = x
150
// Thus, just special case out the exceptions
151

152                 // Hex escape: \xnn = Hex nn
153
case 'x':
154
155                 // Octal escape: \nnn = Octal nnn
156
case '0':
157                 case '1':
158                 case '2':
159                 case '3':
160
161                     switch (input.read()) {
162                     case '0':
163                     case '1':
164                     case '2':
165                     case '3':
166                     case '4':
167                     case '5':
168                     case '6':
169                     case '7':
170
171                         switch (input.read()) {
172                         case '0':
173                         case '1':
174                         case '2':
175                         case '3':
176                         case '4':
177                         case '5':
178                         case '6':
179                         case '7':
180                             return token(RubyStringTokenId.STRING_ESCAPE); // valid octal escape
181
}
182
183                         input.backup(1);
184
185                         continue;
186                     }
187
188                     input.backup(1);
189
190                     continue; // Just a \0 etc -> 0
191

192                 // \cx = Control-x
193
case 'c': {
194                     // If the next character is x, or -x, then it's a single sequence
195
int next = input.read();
196
197                     if (next == 'x') {
198                         return token(RubyStringTokenId.STRING_ESCAPE);
199                     } else {
200                         input.backup(1);
201                     }
202
203                     continue;
204                 }
205
206                 // \C-x = Control-x
207
case 'C': {
208                     int next = input.read();
209
210                     if (next == '-') {
211                         next = input.read();
212
213                         if (next == 'x') {
214                             return token(RubyStringTokenId.STRING_ESCAPE);
215                         } else {
216                             input.backup(2);
217                         }
218                     } else {
219                         input.backup(1);
220                     }
221
222                     continue;
223                 }
224
225                 // \M-x = Meta-x
226
case 'M': {
227                     int next = input.read();
228
229                     if (next == '-') {
230                         next = input.read();
231
232                         if (next == 'x') {
233                             return token(RubyStringTokenId.STRING_ESCAPE);
234                         } else {
235                             input.backup(2);
236                         }
237                     } else {
238                         input.backup(1);
239                     }
240
241                     continue;
242                 }
243
244                 // TODO
245
// Meta-control-x: \M-\C-x
246
//case 'M':
247
// return;
248
default:
249
250                     // There are lots of special escapes: \a, \b, \e, etc.
251
// but we don't need to actually substitute these, since
252
// lexically they have the same form as \x (which is = x),
253
// so treat these all the same:
254
return token(RubyStringTokenId.STRING_ESCAPE);
255                 }
256             }
257         }
258     }
259
260     private Token<RubyStringTokenId> token(RubyStringTokenId id) {
261         return tokenFactory.createToken(id);
262     }
263
264     public void release() {
265     }
266 }
267
Popular Tags