KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > codehaus > groovy > syntax > lexer > GroovyLexerBase


1 package org.codehaus.groovy.syntax.lexer;
2
3 import org.codehaus.groovy.syntax.ReadException;
4 import org.codehaus.groovy.syntax.Numbers;
5 import org.codehaus.groovy.syntax.Types;
6 import org.codehaus.groovy.syntax.Token;
7
8 /**
9  * The core code used in lexing Groovy.
10  *
11  * @author Bob Mcwhirter
12  * @author James Strachan
13  * @author John Wilson
14  * @author Chris Poirier
15  */

16
17 public class GroovyLexerBase extends LexerBase
18 {
19
20     protected StringLexer stringLexer = new StringLexer(); // support lexer for processing strings
21
protected GStringLexer gstringLexer = new GStringLexer(); // support lexer for processing GStrings
22

23
24    /**
25     * Finds and returns (and consumes) the next token from the underlying stream.
26     * Returns null when out of tokens.
27     */

28
29     public Token nextToken() throws ReadException, LexerException
30     {
31         // System.out.println( "entering GroovyLexerBase.nextToken() on " + this );
32

33         Token token = null;
34         OUTER_LOOP : while (token == null)
35         {
36
37             //
38
// Get from the delegate, if available
39

40             if( delegate != null )
41             {
42                 token = delegate.nextToken();
43
44                 if( token == null )
45                 {
46                     undelegate();
47                 }
48                 else
49                 {
50                     break OUTER_LOOP;
51                 }
52             }
53
54
55             //
56
// Otherwise, do it the hard way.
57

58             char c = la();
59
60             ROOT_SWITCH : switch (c)
61             {
62                 case (CharStream.EOS) :
63                 {
64                     break OUTER_LOOP;
65                 }
66                 case (' ') :
67                 case ('\t') :
68                 {
69                     consume();
70                     token = null;
71                     break ROOT_SWITCH;
72                 }
73                 case ('\r') :
74                 case ('\n') :
75                 {
76                     mark();
77                     token = tokenizeEOL();
78                     break ROOT_SWITCH;
79                 }
80                 case ('{') :
81                 {
82                     mark();
83                     consume();
84                     token = symbol( Types.LEFT_CURLY_BRACE );
85                     break ROOT_SWITCH;
86                 }
87                 case ('}') :
88                 {
89                     mark();
90                     consume();
91                     token = symbol( Types.RIGHT_CURLY_BRACE );
92                     break ROOT_SWITCH;
93                 }
94                 case ('[') :
95                 {
96                     mark();
97                     consume();
98                     token = symbol( Types.LEFT_SQUARE_BRACKET );
99                     break ROOT_SWITCH;
100                 }
101                 case (']') :
102                 {
103                     mark();
104                     consume();
105                     token = symbol( Types.RIGHT_SQUARE_BRACKET );
106                     break ROOT_SWITCH;
107                 }
108                 case ('(') :
109                 {
110                     mark();
111                     consume();
112                     token = symbol( Types.LEFT_PARENTHESIS );
113                     break ROOT_SWITCH;
114                 }
115                 case (')') :
116                 {
117                     mark();
118                     consume();
119                     token = symbol( Types.RIGHT_PARENTHESIS );
120                     break ROOT_SWITCH;
121                 }
122                 case ('#') :
123                 {
124                     consume();
125
126                     token = symbol( Types.NEWLINE, -1 );
127
128                     CONSUME_LOOP : while( true )
129                     {
130                         switch (c = la())
131                         {
132                             case ('\r') :
133                             case ('\n') :
134                             {
135                                 readEOL();
136                                 break CONSUME_LOOP;
137                             }
138                             case CharStream.EOS :
139                             {
140                                 break CONSUME_LOOP;
141                             }
142                             default :
143                             {
144                                 consume();
145                             }
146                         }
147                     }
148                     break ROOT_SWITCH;
149                 }
150                 case ('/') :
151                 {
152                     mark();
153                     consume();
154
155                     c = la();
156
157                     MULTICHAR_SWITCH : switch (c)
158                     {
159                         case ('=') :
160                         {
161                             consume();
162                             token = symbol( Types.DIVIDE_EQUAL );
163                             break MULTICHAR_SWITCH;
164                         }
165                         case ('/') :
166                         {
167                             consume();
168                             token = symbol( Types.NEWLINE, -2 );
169
170                             CONSUME_LOOP : while (true)
171                             {
172                                 switch (c = la())
173                                 {
174                                     case ('\r') :
175                                     case ('\n') :
176                                     {
177                                         readEOL();
178                                         break CONSUME_LOOP;
179                                     }
180                                     case CharStream.EOS :
181                                     {
182                                         break CONSUME_LOOP;
183                                     }
184                                     default :
185                                     {
186                                         consume();
187                                     }
188                                 }
189                             }
190                             break MULTICHAR_SWITCH;
191                         }
192                         case ('*') :
193                         {
194                             CONSUME_LOOP : while (true)
195                             {
196                                 CONSUME_SWITCH : switch (c = la())
197                                 {
198                                     case ('*') :
199                                     {
200                                         consume();
201                                         if (la() == '/')
202                                         {
203                                             consume();
204                                             break CONSUME_LOOP;
205                                         }
206                                         break CONSUME_SWITCH;
207                                     }
208                                     case ('\r') :
209                                     case ('\n') :
210                                     {
211                                         readEOL();
212                                         break CONSUME_SWITCH;
213                                     }
214                                     case CharStream.EOS :
215                                     {
216                                         break CONSUME_LOOP;
217                                     }
218                                     default :
219                                     {
220                                         consume();
221                                     }
222                                 }
223                             }
224                             token = null;
225                             break MULTICHAR_SWITCH;
226                         }
227                         default :
228                         {
229                             token = symbol( Types.DIVIDE );
230                             break MULTICHAR_SWITCH;
231                         }
232                     }
233                     break ROOT_SWITCH;
234                 }
235                 case ('%') :
236                 {
237                     mark();
238                     consume();
239
240                     c = la();
241
242                     MULTICHAR_SWITCH : switch (c)
243                     {
244                         case ('=') :
245                         {
246                             consume();
247                             token = symbol( Types.MOD_EQUAL );
248                             break MULTICHAR_SWITCH;
249                         }
250                         default :
251                         {
252                             token = symbol( Types.MOD );
253                             break MULTICHAR_SWITCH;
254                         }
255                     }
256                     break ROOT_SWITCH;
257                 }
258                 case ('\\') :
259                 {
260                     mark();
261                     consume();
262
263                     c = la();
264
265                     MULTICHAR_SWITCH : switch (c)
266                     {
267                         case ('=') :
268                         {
269                             consume();
270                             token = symbol( Types.INTDIV_EQUAL );
271                             break MULTICHAR_SWITCH;
272                         }
273                         default :
274                         {
275                             token = symbol( Types.INTDIV );
276                             break MULTICHAR_SWITCH;
277                         }
278                     }
279                     break ROOT_SWITCH;
280                 }
281                 case ('~') :
282                 {
283                     mark();
284                     consume();
285
286                     token = symbol( Types.REGEX_PATTERN );
287                     break ROOT_SWITCH;
288                 }
289                 case ('!') :
290                 {
291                     mark();
292                     consume();
293
294                     c = la();
295
296                     MULTICHAR_SWITCH : switch (c)
297                     {
298                         case ('=') :
299                         {
300                             consume();
301                             if( la() == '=' )
302                             {
303                                 consume();
304                                 token = symbol( Types.COMPARE_NOT_IDENTICAL );
305                             }
306                             else
307                             {
308                                 token = symbol( Types.COMPARE_NOT_EQUAL );
309                             }
310                             break MULTICHAR_SWITCH;
311                         }
312                         default :
313                         {
314                             token = symbol( Types.NOT );
315                             break MULTICHAR_SWITCH;
316                         }
317                     }
318                     break ROOT_SWITCH;
319                 }
320                 case ('=') :
321                 {
322                     mark();
323                     consume();
324
325                     c = la();
326
327                     MULTICHAR_SWITCH : switch (c)
328                     {
329                         case ('=') :
330                         {
331                             consume();
332                             c = la();
333
334                             switch (c)
335                             {
336                                 case '=' :
337                                 {
338                                     consume();
339                                     token = symbol( Types.COMPARE_IDENTICAL );
340                                     break;
341                                 }
342                                 case '~' :
343                                 {
344                                     consume();
345                                     token = symbol( Types.MATCH_REGEX );
346                                     break;
347                                 }
348                                 default :
349                                 {
350                                     token = symbol( Types.COMPARE_EQUAL );
351                                 }
352                             }
353                             break MULTICHAR_SWITCH;
354                         }
355                         case '~' :
356                         {
357                             consume();
358                             token = symbol( Types.FIND_REGEX );
359                             break MULTICHAR_SWITCH;
360                         }
361                         default :
362                         {
363                             token = symbol( Types.EQUAL );
364                             break MULTICHAR_SWITCH;
365                         }
366                     }
367                     break ROOT_SWITCH;
368                 }
369                 case ('&') :
370                 {
371                     mark();
372                     consume();
373
374                     c = la();
375
376                     MULTICHAR_SWITCH : switch (c)
377                     {
378                         case ('&') :
379                         {
380                             consume();
381
382                             if( la() == '=' )
383                             {
384                                 consume();
385                                 token = symbol( Types.LOGICAL_AND_EQUAL );
386                             }
387                             else
388                             {
389                                 token = symbol( Types.LOGICAL_AND );
390                             }
391
392                             break MULTICHAR_SWITCH;
393                         }
394                         default :
395                         {
396                             unexpected( c, new char[] { '&' }, 1 );
397                         }
398                     }
399                     break ROOT_SWITCH;
400                 }
401                 case ('|') :
402                 {
403                     mark();
404                     consume();
405                     c = la();
406
407                     MULTICHAR_SWITCH : switch (c)
408                     {
409                         case ('|') :
410                         {
411                             consume();
412
413                             if( la() == '=' )
414                             {
415                                 consume();
416                                 token = symbol( Types.LOGICAL_OR_EQUAL );
417                             }
418                             else
419                             {
420                                 token = symbol( Types.LOGICAL_OR );
421                             }
422
423                             break MULTICHAR_SWITCH;
424                         }
425                         default :
426                         {
427                             token = symbol( Types.PIPE );
428                             break MULTICHAR_SWITCH;
429                         }
430                     }
431                     break ROOT_SWITCH;
432                 }
433                 case ('+') :
434                 {
435                     mark();
436                     consume();
437
438                     c = la();
439
440                     MULTICHAR_SWITCH : switch (c)
441                     {
442                         case ('+') :
443                         {
444                             consume();
445                             token = symbol( Types.PLUS_PLUS );
446                             break MULTICHAR_SWITCH;
447                         }
448                         case ('=') :
449                         {
450                             consume();
451                             token = symbol( Types.PLUS_EQUAL );
452                             break MULTICHAR_SWITCH;
453                         }
454                         default :
455                         {
456                             token = symbol( Types.PLUS );
457                             break MULTICHAR_SWITCH;
458                         }
459                     }
460                     break ROOT_SWITCH;
461                 }
462                 case ('-') :
463                 {
464                     mark();
465                     consume();
466
467                     c = la();
468
469                     MULTICHAR_SWITCH : switch (c)
470                     {
471                         case ('-') :
472                         {
473                             consume();
474                             token = symbol( Types.MINUS_MINUS );
475                             break MULTICHAR_SWITCH;
476                         }
477                         case ('=') :
478                         {
479                             consume();
480                             token = symbol( Types.MINUS_EQUAL );
481                             break MULTICHAR_SWITCH;
482                         }
483                         case ('>') :
484                         {
485                             consume();
486                             token = symbol( Types.NAVIGATE );
487                             break MULTICHAR_SWITCH;
488                         }
489                         default :
490                         {
491                             token = symbol( Types.MINUS );
492                             break MULTICHAR_SWITCH;
493                         }
494                     }
495                     break ROOT_SWITCH;
496                 }
497                 case ('*') :
498                 {
499                     mark();
500                     consume();
501
502                     c = la();
503
504                     MULTICHAR_SWITCH : switch (c)
505                     {
506                         case ('=') :
507                         {
508                             consume();
509                             token = symbol( Types.MULTIPLY_EQUAL );
510                             break MULTICHAR_SWITCH;
511                         }
512                         default :
513                         {
514                             token = symbol( Types.MULTIPLY );
515                             break MULTICHAR_SWITCH;
516                         }
517                     }
518                     break ROOT_SWITCH;
519                 }
520                 case (':') :
521                 {
522                     mark();
523                     consume();
524
525                     token = symbol( Types.COLON );
526                     break ROOT_SWITCH;
527                 }
528                 case (',') :
529                 {
530                     mark();
531                     consume();
532                     token = symbol( Types.COMMA );
533                     break ROOT_SWITCH;
534                 }
535                 case (';') :
536                 {
537                     mark();
538                     consume();
539                     token = symbol( Types.SEMICOLON );
540                     break ROOT_SWITCH;
541                 }
542                 case ('?') :
543                 {
544                     mark();
545                     consume();
546                     token = symbol( Types.QUESTION );
547                     break ROOT_SWITCH;
548                 }
549                 case ('<') :
550                 {
551                     mark();
552                     consume();
553
554                     c = la();
555
556                     MULTICHAR_SWITCH : switch (c)
557                     {
558                         case ('=') :
559                         {
560                             consume();
561                             c = la();
562                             if (c == '>')
563                             {
564                                 consume();
565                                 token = symbol( Types.COMPARE_TO );
566                             }
567                             else
568                             {
569                                 token = symbol( Types.COMPARE_LESS_THAN_EQUAL );
570                             }
571                             break MULTICHAR_SWITCH;
572                         }
573                         case ('<') :
574                         {
575                             consume();
576                             c = la();
577
578                             //
579
// It's a "here-doc", created using <<<TOK ... \nTOK. The terminator
580
// runs from the <<< to the end of the line. The marker is then used
581
// to create a HereDocLexer which becomes our delegate until the heredoc
582
// is finished.
583

584                             if (c == '<')
585                             {
586                                 consume();
587
588                                 StringBuffer JavaDoc marker = new StringBuffer JavaDoc();
589                                 while( (c = la()) != '\n' && c != '\r' && c != CharStream.EOS )
590                                 {
591                                     marker.append( consume() );
592                                 }
593
594                                 readEOL();
595
596                                 Lexer child = new HereDocLexer( marker.toString() );
597                                 delegate( child );
598
599                                 gstringLexer.reset();
600                                 child.delegate( gstringLexer );
601
602                                 break ROOT_SWITCH;
603                             }
604                             else
605                             {
606                                 token = symbol( Types.LEFT_SHIFT );
607                                 break ROOT_SWITCH;
608                             }
609                         }
610                         default :
611                         {
612                             token = symbol( Types.COMPARE_LESS_THAN );
613                             break MULTICHAR_SWITCH;
614                         }
615                     }
616                     break ROOT_SWITCH;
617                 }
618                 case ('>') :
619                 {
620                     mark();
621                     consume();
622
623                     c = la();
624
625                     MULTICHAR_SWITCH : switch (c)
626                     {
627                         case ('=') :
628                         {
629                             consume();
630                             token = symbol( Types.COMPARE_GREATER_THAN_EQUAL );
631                             break MULTICHAR_SWITCH;
632                         }
633                         case ('>') :
634                         {
635                             consume();
636                             if( la() == '>' )
637                             {
638                                 consume();
639                                 token = symbol( Types.RIGHT_SHIFT_UNSIGNED );
640                             }
641                             else
642                             {
643                                 token = symbol( Types.RIGHT_SHIFT );
644                             }
645                             break MULTICHAR_SWITCH;
646                         }
647                         default :
648                         {
649                             token = symbol( Types.COMPARE_GREATER_THAN );
650                             break MULTICHAR_SWITCH;
651                         }
652                     }
653                     break ROOT_SWITCH;
654                 }
655                 case ('\'') :
656                 {
657                     mark();
658
659                     stringLexer.reset();
660                     stringLexer.allowGStrings(false);
661                     delegate( stringLexer );
662
663                     break ROOT_SWITCH;
664                 }
665                 case ('"') :
666                 {
667                     mark();
668
669                     stringLexer.reset();
670                     stringLexer.allowGStrings(true);
671                     delegate( stringLexer );
672
673                     gstringLexer.reset();
674                     stringLexer.delegate( gstringLexer );
675
676                     break ROOT_SWITCH;
677                 }
678                 case ('0') :
679                 case ('1') :
680                 case ('2') :
681                 case ('3') :
682                 case ('4') :
683                 case ('5') :
684                 case ('6') :
685                 case ('7') :
686                 case ('8') :
687                 case ('9') :
688                 case ('.') :
689                 {
690                     mark();
691
692                     //
693
// If it is a '.' and not followed by a digit,
694
// it's an operator.
695

696                     if( c == '.' && !Numbers.isDigit(la(2)) )
697                     {
698                         consume();
699                         if( la() == '.' )
700                         {
701                             consume();
702                             if( la() == '.' )
703                             {
704                                 consume();
705                                 token = symbol( Types.DOT_DOT_DOT );
706                             }
707                             else
708                             {
709                                 token = symbol( Types.DOT_DOT );
710                             }
711                         }
712                         else
713                         {
714                             token = symbol( Types.DOT );
715                         }
716                         break ROOT_SWITCH;
717                     }
718
719
720                     //
721
// Otherwise, we are processing a number (integer or decimal).
722

723                     StringBuffer JavaDoc numericLiteral = new StringBuffer JavaDoc();
724                     boolean isDecimal = false;
725
726
727                     //
728
// If it starts 0 and isn't a decimal number, we give
729
// special handling for hexadecimal or octal notation.
730

731                     char c2 = la(2);
732                     if( c == '0' && (c2 == 'X' || c2 == 'x' || Numbers.isDigit(c2)) )
733                     {
734                         numericLiteral.append( consume() );
735
736                         if( (c = la()) == 'X' || c == 'x' )
737                         {
738                             numericLiteral.append( consume() );
739                             if( Numbers.isHexDigit(la()) )
740                             {
741                                 while( Numbers.isHexDigit(la()) )
742                                 {
743                                     numericLiteral.append( consume() );
744                                 }
745                             }
746                             else
747                             {
748                                 unexpected( la(), numericLiteral.length(), "expected hexadecimal digit" );
749                             }
750                         }
751                         else
752                         {
753                             while( Numbers.isOctalDigit(la()) )
754                             {
755                                 numericLiteral.append( consume() );
756                             }
757
758                             if( Numbers.isDigit(la()) )
759                             {
760                                 unexpected( la(), numericLiteral.length(), "expected octal digit" );
761                             }
762                         }
763                     }
764
765
766                     //
767
// Otherwise, it's in base 10, integer or decimal.
768

769                     else
770                     {
771                         while( Numbers.isDigit(la()) )
772                         {
773                             numericLiteral.append( consume() );
774                         }
775
776
777                         //
778
// Next, check for a decimal point
779

780                         if( la() == '.' && Numbers.isDigit(la(2)) )
781                         {
782                             isDecimal = true;
783
784                             numericLiteral.append( consume() );
785                             while( Numbers.isDigit(la()) )
786                             {
787                                 numericLiteral.append( consume() );
788                             }
789
790                             //
791
// Check for an exponent
792

793                             if( (c = la()) == 'e' || c == 'E' )
794                             {
795                                 numericLiteral.append( consume() );
796
797                                 if (la() == '+' || la() == '-')
798                                 {
799                                     numericLiteral.append(consume());
800                                 }
801
802                                 if( Numbers.isDigit(la()) )
803                                 {
804                                     while( Numbers.isDigit(la()) )
805                                     {
806                                         numericLiteral.append( consume() );
807                                     }
808                                 }
809                                 else
810                                 {
811                                     unexpected( la(), numericLiteral.length(), "expected exponent" );
812                                 }
813                             }
814                         }
815                     }
816
817
818                     //
819
// If there is a type suffix, include it.
820

821                     if( Numbers.isNumericTypeSpecifier(la(), isDecimal) )
822                     {
823                         numericLiteral.append( consume() );
824                     }
825
826
827                     //
828
// For good error reporting, make sure there is nothing invalid next.
829

830                     if( Character.isJavaIdentifierPart(c = la()) )
831                     {
832                         unexpected( c, numericLiteral.length(), "expected end of numeric literal" );
833                     }
834
835
836                     //
837
// Finally, create the token.
838

839                     if( isDecimal )
840                     {
841                         token = Token.newDecimal( numericLiteral.toString(), getStartLine(), getStartColumn() );
842                     }
843                     else
844                     {
845                         token = Token.newInteger( numericLiteral.toString(), getStartLine(), getStartColumn() );
846                     }
847
848                     break ROOT_SWITCH;
849                 }
850                 default :
851                 {
852                     mark();
853                     if (Character.isJavaIdentifierStart(c))
854                     {
855                         StringBuffer JavaDoc identifier = new StringBuffer JavaDoc();
856
857                         IDENTIFIER_LOOP : while (true)
858                         {
859                             c = la();
860
861                             if (Character.isJavaIdentifierPart(c))
862                             {
863                                 identifier.append(consume());
864                             }
865                             else
866                             {
867                                 break IDENTIFIER_LOOP;
868                             }
869                         }
870
871                         String JavaDoc text = identifier.toString();
872                         token = Token.newKeyword( text, getStartLine(), getStartColumn() );
873
874                         if (token == null)
875                         {
876                             token = Token.newIdentifier( text, getStartLine(), getStartColumn() );
877                         }
878                     }
879                     else
880                     {
881                         unexpected( c, 1 );
882                     }
883
884                     break ROOT_SWITCH;
885                 }
886             }
887         }
888
889         // System.out.println( "" + this + ".nextToken() returning [" + token + "]" );
890

891         return token;
892     }
893
894 }
895
Popular Tags