KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > codehaus > groovy > syntax > lexer > StringLexer


1 package org.codehaus.groovy.syntax.lexer;
2
3 //{{{ imports
4
import org.codehaus.groovy.syntax.ReadException;
5 import org.codehaus.groovy.syntax.Token;
6 import org.codehaus.groovy.GroovyBugError;
7 //}}}
8

9 /**
10  * A Lexer for processing standard strings.
11  *
12  * @author Chris Poirier
13  */

14
15 public class StringLexer extends TextLexerBase
16 {
17
18     protected String JavaDoc delimiter = null;
19     protected char watchFor;
20     protected boolean allowGStrings = false;
21     protected boolean emptyString = true; // If set, we need to send an empty string
22

23
24    /**
25     * If set true, the filter will allow \\ and \$ to pass through unchanged.
26     * You should set this appropriately BEFORE setting source!
27     */

28
29     public void allowGStrings( boolean allow )
30     {
31         allowGStrings = allow;
32     }
33
34
35
36    /**
37     * Returns a single STRING, then null. The STRING is all of the processed
38     * input. Backslashes are stripped, with the \r, \n, and \t converted
39     * appropriately.
40     */

41
42     public Token undelegatedNextToken( ) throws ReadException, LexerException
43     {
44         if( emptyString )
45         {
46             emptyString = false;
47             return Token.newString( "", getStartLine(), getStartColumn() );
48         }
49         else if( finished )
50         {
51             return null;
52         }
53         else
54         {
55             StringBuffer JavaDoc string = new StringBuffer JavaDoc();
56
57             while( la(1) != CharStream.EOS )
58             {
59                 string.append( consume() );
60             }
61             
62             if( la(1) == CharStream.EOS && string.length() == 0 )
63             {
64                 finished = true;
65             }
66
67             return Token.newString( string.toString(), getStartLine(), getStartColumn() );
68         }
69     }
70
71
72
73    /**
74     * Controls delimiter search. When turned on, the first thing we do
75     * is check for and eat our delimiter.
76     */

77
78     public void delimit( boolean delimit )
79     {
80         super.delimit( delimit );
81
82         if( delimit )
83         {
84             try
85             {
86                 if( !finished && la(1) == CharStream.EOS )
87                 {
88                     finishUp();
89
90                     //
91
// The GStringLexer will correctly handle the empty string.
92
// We don't. In order to ensure that an empty string is
93
// supplied, we set a flag that is checked during
94
// undelegatedNextToken().
95

96                     if( !allowGStrings )
97                     {
98                         emptyString = true;
99                     }
100                 }
101             }
102             catch( Exception JavaDoc e )
103             {
104                 finished = true;
105             }
106         }
107     }
108
109
110
111
112    /**
113     * Sets the source lexer and identifies and consumes the opening delimiter.
114     */

115
116     public void setSource( Lexer source )
117     {
118         super.setSource( source );
119
120         emptyString = false;
121
122         try
123         {
124             char c = source.la();
125             switch( c )
126             {
127                 case '\'':
128                 case '"':
129                     mark();
130                     source.consume();
131
132                     if( source.la() == c && source.la(2) == c )
133                     {
134                         source.consume(); source.consume();
135                         delimiter = new StringBuffer JavaDoc().append(c).append(c).append(c).toString();
136                     }
137                     else
138                     {
139                         delimiter = new StringBuffer JavaDoc().append(c).toString();
140                     }
141
142                     watchFor = delimiter.charAt(0);
143                     break;
144
145
146                 default:
147                 {
148                     throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
149                 }
150             }
151
152             restart();
153             delimit( true );
154         }
155         catch( Exception JavaDoc e )
156         {
157             //
158
// If we couldn't read our delimiter, we'll just
159
// cancel our source. nextToken() will return null.
160

161             e.printStackTrace();
162             unsetSource( );
163         }
164     }
165
166
167
168    /**
169     * Unsets our source.
170     */

171
172     public void unsetSource()
173     {
174         super.unsetSource();
175         delimiter = null;
176         finished = true;
177         emptyString = false;
178     }
179
180
181
182
183   //---------------------------------------------------------------------------
184
// STREAM ROUTINES
185

186     private int lookahead = 0; // the number of characters identified
187
private char[] characters = new char[3]; // the next characters identified by la()
188
private int[] widths = new int[3]; // the source widths of the next characters
189

190
191
192    /**
193     * Returns the next <code>k</code>th character, without consuming any.
194     */

195
196     public char la(int k) throws LexerException, ReadException
197     {
198
199         if( !finished && source != null )
200         {
201
202             if( delimited )
203             {
204
205                 if( k > characters.length )
206                 {
207                     throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
208                 }
209
210                 if( lookahead >= k )
211                 {
212                     return characters[k-1];
213                 }
214
215                 lookahead = 0;
216
217                 char c = ' ', c1 = ' ', c2 = ' ';
218                 int offset = 1, width = 0;
219                 for( int i = 1; i <= k; i++ )
220                 {
221                     c1 = source.la(offset);
222                     C1_SWITCH: switch( c1 )
223                     {
224                         case CharStream.EOS:
225                         {
226                             return c1;
227                         }
228
229                         case '\\':
230                         {
231                             c2 = source.la( offset + 1 );
232
233                             ESCAPE_SWITCH: switch( c2 )
234                             {
235
236                                 case CharStream.EOS:
237                                     return c2;
238
239                                 case '\\':
240                                     c = '\\';
241                                     width = 2;
242                                     break ESCAPE_SWITCH;
243                                 case '$':
244                                 {
245                                     if( allowGStrings )
246                                     {
247                                         c = c1;
248                                         width = 1;
249                                     }
250                                     else
251                                     {
252                                         c = c2;
253                                         width = 2;
254                                     }
255                                     break ESCAPE_SWITCH;
256                                 }
257
258                                 case 'r':
259                                     c = '\r';
260                                     width = 2;
261                                     break ESCAPE_SWITCH;
262
263                                 case 't':
264                                     c = '\t';
265                                     width = 2;
266                                     break ESCAPE_SWITCH;
267
268                                 case 'n':
269                                     c = '\n';
270                                     width = 2;
271                                     break ESCAPE_SWITCH;
272
273
274                                 default:
275                                     c = c2;
276                                     width = 2;
277                                     break ESCAPE_SWITCH;
278                             }
279                             break C1_SWITCH;
280                         }
281
282                         default:
283                         {
284                             if( c1 == watchFor )
285                             {
286                                 boolean atEnd = true;
287                                 for( int j = 1; j < delimiter.length(); j++ )
288                                 {
289                                     if( source.la(offset+j) != delimiter.charAt(j) )
290                                     {
291                                         atEnd = false;
292                                         break;
293                                     }
294                                 }
295
296                                 if( atEnd )
297                                 {
298                                     return CharStream.EOS;
299                                 }
300                             }
301
302                             c = c1;
303                             width = 1;
304                             break C1_SWITCH;
305                         }
306                     }
307
308
309                     characters[lookahead] = c;
310                     widths[lookahead] = width;
311
312                     offset += width;
313                     lookahead += 1;
314                 }
315
316                 return c; // <<< FLOW CONTROL <<<<<<<<<
317
}
318
319             lookahead = 0;
320             return source.la(k);
321         }
322
323         return CharStream.EOS;
324
325     }
326
327
328
329    /**
330     * Eats a character from the input stream. Searches for the delimiter if
331     * delimited. Note that turning delimiting on also checks if we are at the
332     * delimiter, so if we aren't finished, there is something to consume.
333     */

334
335     public char consume() throws LexerException, ReadException
336     {
337         if( !finished && source != null )
338         {
339             char c = CharStream.EOS;
340
341             if( delimited )
342             {
343                 if( lookahead < 1 )
344                 {
345                     la( 1 );
346                 }
347
348                 if( lookahead >= 1 )
349                 {
350                     c = characters[0];
351                     for( int i = 0; i < widths[0]; i++ )
352                     {
353                         source.consume();
354                     }
355
356                     lookahead = 0;
357                 }
358
359                 if( la(1) == CharStream.EOS )
360                 {
361                     finishUp();
362                 }
363             }
364             else
365             {
366                 c = source.consume();
367             }
368
369             lookahead = 0;
370             return c;
371         }
372
373         return CharStream.EOS;
374     }
375
376
377
378    /**
379     * Eats our delimiter from the stream and marks us finished.
380     */

381
382     protected void finishUp() throws LexerException, ReadException
383     {
384         for( int i = 0; i < delimiter.length(); i++ )
385         {
386             char c = source.la(1);
387             if( c == CharStream.EOS )
388             {
389                 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
390             }
391             else if( c == delimiter.charAt(i) )
392             {
393                 source.consume();
394             }
395             else
396             {
397                 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
398             }
399         }
400
401         finish();
402     }
403
404 }
405
Popular Tags