KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > ruby > rhtml > RhtmlLexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.modules.ruby.rhtml;
21
22 import org.netbeans.api.lexer.Token;
23 import org.netbeans.spi.lexer.Lexer;
24 import org.netbeans.spi.lexer.LexerInput;
25 import org.netbeans.spi.lexer.LexerRestartInfo;
26 import org.netbeans.spi.lexer.TokenFactory;
27
28 /**
29  * Syntax class for RHTML tags, based on the JSP Lexer by Petr Jiricka and Marek Fukala
30  *
31  * @author Petr Jiricka
32  * @author Marek Fukala
33  * @author Tor Norbye
34  *
35  * @version 1.00
36  */

37
38 public final class RhtmlLexer implements Lexer<RhtmlTokenId> {
39     
40     private static final int EOF = LexerInput.EOF;
41     
42     private LexerInput input;
43     
44     private TokenFactory<RhtmlTokenId> tokenFactory;
45     
46     public Object JavaDoc state() {
47         return state + before_el_state * 1000;
48     }
49     
50     //main internal lexer state
51
private int state = INIT;
52     
53     //secondary internal state for EL expressions in JSP
54
//is it used to eliminate a number of lexer states when EL is found -
55
//we have 8 states just in attribute value so I would have to copy the EL
56
//recognition code eight-times.
57
private int before_el_state = INIT;
58     
59     // Internal analyzer states
60
// general
61
private static final int INIT = 0; // initial lexer state = content language
62
private static final int ISI_ERROR = 1; // when the fragment does not start with <
63
private static final int ISA_LT = 2; // after '<' char
64
// tags and directives
65
private static final int ISI_TAGNAME = 3; // inside JSP tag name
66
private static final int ISI_DIRNAME = 4; // inside JSP directive name
67
private static final int ISP_TAG = 5; // after JSP tag name
68
private static final int ISP_DIR = 6; // after JSP directive name
69
private static final int ISI_TAG_I_WS = 7; // inside JSP tag after whitespace
70
private static final int ISI_DIR_I_WS = 8; // inside JSP directive after whitespace
71
private static final int ISI_ENDTAG = 9; // inside end JSP tag
72
private static final int ISI_TAG_ATTR = 10; // inside tag attribute
73
private static final int ISI_DIR_ATTR = 11; // inside directive attribute
74
private static final int ISP_TAG_EQ = 12; // just after '=' in tag
75
private static final int ISP_DIR_EQ = 13; // just after '=' in directive
76
private static final int ISI_TAG_STRING = 14; // inside string (value - "") in tag
77
private static final int ISI_DIR_STRING = 15; // inside string (value - "") in directive
78
private static final int ISI_TAG_STRING_B = 16; // inside string (value - "") after backslash in tag
79
private static final int ISI_DIR_STRING_B = 17; // inside string (value - "") after backslash in directive
80
private static final int ISI_TAG_STRING2 = 18; // inside string (value - '') in tag
81
private static final int ISI_DIR_STRING2 = 19; // inside string (value - '') in directive
82
private static final int ISI_TAG_STRING2_B = 20; // inside string (value - '') after backslash in tag
83
private static final int ISI_DIR_STRING2_B = 21; // inside string (value - '') after backslash in directive
84
private static final int ISA_ENDSLASH = 22; // after ending '/' in JSP tag
85
private static final int ISA_ENDPC = 23; // after ending '%' in JSP directive
86
// comments (+directives)
87
private static final int ISA_LT_PC = 24; // after '<%' - comment or directive or scriptlet
88
private static final int ISI_JSP_COMMENT = 25; // after <%-
89

90     private static final int ISI_JSP_COMMENT_M = 26; // inside JSP comment after -
91
private static final int ISI_JSP_COMMENT_MM = 27; // inside JSP comment after --
92
private static final int ISI_JSP_COMMENT_MMP = 28; // inside JSP comment after --%
93
// end state
94
// static final int ISA_END_JSP = 29; // JSP fragment has finished and control
95
// should be returned to master syntax
96
// more errors
97
private static final int ISI_TAG_ERROR = 30; // error in tag, can be cleared by > or \n
98
private static final int ISI_DIR_ERROR = 31; // error in directive, can be cleared by %>, \n, \t or space
99
private static final int ISI_DIR_ERROR_P = 32; // error in directive after %, can be cleared by > or \n
100

101     private static final int ISA_LT_PC_AT = 33; // after '<%@' (directive)
102
private static final int ISA_LT_SLASH = 34; // after '</' sequence
103
private static final int ISA_LT_PC_DASH = 35; // after <%- ;not comment yet
104

105     private static final int ISI_SCRIPTLET = 36; // inside java scriptlet/declaration/expression
106
private static final int ISP_SCRIPTLET_PC = 37; // just after % in scriptlet
107

108     //expression language
109

110     //EL in content language
111
private static final int ISA_EL_DELIM = 38; //after $ or # in content language
112
private static final int ISI_EL = 39; //expression language in content (after ${ or #{ )
113

114     public RhtmlLexer(LexerRestartInfo<RhtmlTokenId> info) {
115         this.input = info.input();
116         this.tokenFactory = info.tokenFactory();
117         if (info.state() == null) {
118             this.state = INIT;
119         } else {
120             int encoded = ((Integer JavaDoc) info.state()).intValue();
121             before_el_state = encoded / 1000;
122             state = encoded % 1000;
123         }
124     }
125     
126     public boolean isIdentifierPart(char ch) {
127         return Character.isJavaIdentifierPart(ch);
128     }
129     
130     private Token<RhtmlTokenId> token(RhtmlTokenId id) {
131 // System.out.print("JSP token(" + id + "; '" + input.readText().toString() + "')");
132
if(input.readLength() == 0) {
133             new Exception JavaDoc("Error - token length is zero!; state = " + state).printStackTrace();
134         }
135         Token<RhtmlTokenId> t = tokenFactory.createToken(id);
136 // System.out.println(t.id() + "; " + t.length());
137
return t;
138     }
139     
140     /** Determines whether a given string is a JSP tag. */
141     protected boolean isJspTag(String JavaDoc tagName) {
142         boolean canBeJsp = tagName.startsWith("jsp:"); // NOI18N
143
//TODO handle custom tags from JSP parser here
144
return canBeJsp;
145     }
146     
147     /** Looks ahead into the character buffer and checks if a jsp tag name follows. */
148     private boolean followsJspTag() {
149         int actChar;
150         int prev_read = input.readLength(); //remember the size of the read sequence
151
int read = 0;
152         while(true) {
153             actChar = input.read();
154             read++;
155             if(!(Character.isLetter(actChar) ||
156                     Character.isDigit(actChar) ||
157                     (actChar == '_') ||
158                     (actChar == '-') ||
159                     (actChar == ':') ||
160                     (actChar == '.')) ||
161                     (actChar == EOF)) { // EOL or not alpha
162
//end of tagname
163
String JavaDoc tagName = input.readText().toString().substring(prev_read);
164                 input.backup(read); //put the lookahead text back to the buffer
165
return isJspTag(tagName);
166             }
167         }
168     }
169     
170     public Token<RhtmlTokenId> nextToken() {
171         int actChar;
172         while (true) {
173             actChar = input.read();
174             
175             if (actChar == EOF) {
176                 if(input.readLengthEOF() == 1) {
177                     return null; //just EOL is read
178
} else {
179                     //there is something else in the buffer except EOL
180
//we will return last token now
181
input.backup(1); //backup the EOL, we will return null in next nextToken() call
182
break;
183                 }
184             }
185             
186             switch (state) {
187                 case INIT:
188                     switch (actChar) {
189 // case '\n':
190
// return token(RhtmlTokenId.EOL);
191
case '<':
192                             state = ISA_LT;
193                             break;
194 // default:
195
// state = ISI_ERROR;
196
// break;
197
case '$':
198                         case '#': //maybe expression language
199
before_el_state = state; //remember main state
200
state = ISA_EL_DELIM;
201                             break;
202                     }
203                     break;
204                     
205                 case ISA_EL_DELIM:
206                     switch(actChar) {
207                         case '{':
208                             if(input.readLength() > 2) {
209                                 //we have something read except the '${' or '#{' => it's content language
210
input.backup(2); //backup the '$/#{'
211
state = before_el_state; //we will read the '$/#{' again
212
before_el_state = INIT;
213                                 return token(RhtmlTokenId.TEXT); //return the content language token
214
}
215                             state = ISI_EL;
216                             break;
217                         default:
218                             state = before_el_state;
219                             before_el_state = INIT;
220                     }
221                     break;
222                     
223                 case ISI_EL:
224                     if(actChar == '}') {
225                         //return EL token
226
state = before_el_state;
227                         before_el_state = INIT;
228                         return token(RhtmlTokenId.EL);
229                     }
230                     //stay in EL
231
break;
232                     
233                 case ISA_LT:
234                     if (Character.isLetter(actChar) ||
235                             (actChar == '_')
236                             ) { // possible tag begining
237
input.backup(1); //backup the read letter
238
if(followsJspTag()) { //test if a jsp tag follows
239
if(input.readLength() > 1) {
240                                 //we have something read except the '<' => it's content language
241
input.backup(1); //backup the '<'
242
state = INIT; //we will read the '<' again
243
return token(RhtmlTokenId.TEXT); //return the content language token
244
}
245                             state = ISI_TAGNAME;
246                             break;
247                         } else {
248                             //just a content language
249
state = INIT;
250                             break;
251                         }
252 // input.backup(1);
253
// return token(RhtmlTokenId.SYMBOL);
254
}
255                     
256                     switch (actChar) {
257                         case '/':
258                             state = ISA_LT_SLASH;
259                             break;
260 // case '\n':
261
// state = ISI_TAG_ERROR;
262
// input.backup(1);
263
// return token(RhtmlTokenId.SYMBOL);
264
case '%':
265                             state = ISA_LT_PC;
266                             break;
267                         default:
268                             state = INIT; //just content
269
// state = ISI_TAG_ERROR;
270
// break;
271
}
272                     break;
273                     
274                 case ISA_LT_SLASH:
275                     if (Character.isLetter(actChar) ||
276                             (actChar == '_')) {
277                         //possible end tag beginning
278
input.backup(1); //backup the first letter
279
if(followsJspTag()) {
280                             if(input.readLength() > 2) {
281                                 //we have something read except the '</' symbol
282
input.backup(2);
283                                 state = INIT;
284                                 return token(RhtmlTokenId.TEXT);
285                             } else {
286                                 state = ISI_ENDTAG;
287                             }
288                             break;
289                         } else {
290                             //just a content language
291
state = INIT;
292                             break;
293                         }
294                     }
295                     
296                     //everyting alse is an error
297
state = ISI_TAG_ERROR;
298                     break;
299                     
300                 case ISI_TAGNAME:
301                 case ISI_DIRNAME:
302                     
303                     if (!(Character.isLetter(actChar) ||
304                             Character.isDigit(actChar) ||
305                             (actChar == '_') ||
306                             (actChar == '-') ||
307                             (actChar == ':') ||
308                             (actChar == '.'))) { // not alpha
309
switch(actChar) {
310                             case '<':
311                                 state = INIT;
312                                 input.backup(1);
313                                 break;
314                             case '/':
315                                 input.backup(1);
316                                 state = ((state == ISI_TAGNAME) ? ISP_TAG : ISP_DIR);
317                                 break;
318                             case '>':
319                                 state = INIT;
320                                 break;
321                             case ' ':
322                                 input.backup(1);
323                                 state = ((state == ISI_TAGNAME) ? ISP_TAG : ISP_DIR);
324                                 break;
325                             default:
326                                 state = ((state == ISI_TAGNAME) ? ISP_TAG : ISP_DIR);
327                         }
328                         return token(RhtmlTokenId.TAG);
329                     }
330                     break;
331                     
332                 case ISP_TAG:
333                 case ISP_DIR:
334                     if (Character.isLetter(actChar) ||
335                             (actChar == '_')
336                             ) {
337                         state = ((state == ISP_TAG) ? ISI_TAG_ATTR : ISI_DIR_ATTR);
338                         break;
339                     }
340                     switch (actChar) {
341                         case '\n':
342 // if (input.readLength() == 1) { // no char
343
return token(RhtmlTokenId.EOL);
344 // } else { // return string first
345
// input.backup(1);
346
// return decide_jsp_tag_token();
347
// }
348
case '>': // for tags
349
if (state == ISP_TAG) {
350 // if (input.readLength() == 1) { // no char
351
// state = ISA_END_JSP;
352
state = INIT;
353                                 return token(RhtmlTokenId.SYMBOL);
354 // } else { // return string first
355
// input.backup(1);
356
// return decide_jsp_tag_token();
357
// }
358
} else { // directive
359
//state = ISI_DIR_ERROR;
360
//commented out to minimize errors during the process of writing directives
361
break;
362                             }
363                         case '/': // for tags
364
if (state == ISP_TAG) {
365 // if (input.readLength() == 1) { // no char
366
state = ISA_ENDSLASH;
367                                 break;
368 // } else { // return string first
369
// input.backup(1);
370
// return decide_jsp_tag_token();
371
// }
372
} else { // directive
373
//state = ISI_DIR_ERROR;
374
//commented out to minimize errors during the process of writing directives
375
break;
376                             }
377                         case '%': // for directives
378
if (state == ISP_DIR) {
379 // if (input.readLength() == 1) { // no char
380
state = ISA_ENDPC;
381                                 break;
382 // } else { // return string first
383
// input.backup(1);
384
// return decide_jsp_tag_token();
385
// }
386
} else { // tag
387
state = ISI_TAG_ERROR;
388                                 break;
389                             }
390                         case '=':
391                             state = ((state == ISP_TAG) ? ISP_TAG_EQ : ISP_DIR_EQ);
392                             return token(RhtmlTokenId.SYMBOL);
393                         case ' ':
394                         case '\t':
395                             state = ((state == ISP_TAG) ? ISI_TAG_I_WS : ISI_DIR_I_WS);
396                             break;
397                         case '<': // assume that this is the start of the next tag
398
// state=ISA_END_JSP;
399
state = INIT;
400                             input.backup(1);
401                             return token(RhtmlTokenId.TAG);
402                         default: //numbers or illegal symbols
403
state = ((state == ISP_TAG) ? ISI_TAG_ERROR : ISI_DIR_ERROR);
404                             break;
405                     }
406                     break;
407                     
408                 case ISI_TAG_I_WS:
409                 case ISI_DIR_I_WS:
410                     switch (actChar) {
411                         case ' ':
412                         case '\t':
413                             break;
414                         case '<': //start of the next tag
415
// state = ISA_END_JSP;
416
state = INIT;
417                             input.backup(1);
418                             return token(RhtmlTokenId.TAG);
419                         default:
420                             state = ((state == ISI_TAG_I_WS) ? ISP_TAG : ISP_DIR);
421                             input.backup(1);
422                             return token(RhtmlTokenId.WHITESPACE);
423                     }
424                     break;
425                     
426                 case ISI_ENDTAG:
427                     if (!(Character.isLetter(actChar) ||
428                             Character.isDigit(actChar) ||
429                             (actChar == '_') ||
430                             (actChar == '-') ||
431                             (actChar == ':'))
432                             ) { // not alpha
433
state = ISP_TAG;
434                         input.backup(1);
435                         return token(RhtmlTokenId.TAG);
436                     }
437                     break;
438                     
439                 case ISI_TAG_ATTR:
440                 case ISI_DIR_ATTR:
441                     if (!(Character.isLetter(actChar) ||
442                             Character.isDigit(actChar) ||
443                             (actChar == '_') ||
444                             (actChar == ':') ||
445                             (actChar == '-'))
446                             ) { // not alpha or '-' (http-equiv)
447
state = ((state == ISI_TAG_ATTR) ? ISP_TAG : ISP_DIR);
448                         input.backup(1);
449                         return token(RhtmlTokenId.ATTRIBUTE);
450                     }
451                     break;
452                     
453                 case ISP_TAG_EQ:
454                 case ISP_DIR_EQ:
455                     switch (actChar) {
456                         case '\n':
457 // if (input.readLength() == 1) { // no char
458
return token(RhtmlTokenId.EOL);
459 // } else { // return string first
460
// input.backup(1);
461
// return token(RhtmlTokenId.ATTR_VALUE);
462
// }
463
case '"':
464                             state = ((state == ISP_TAG_EQ) ? ISI_TAG_STRING : ISI_DIR_STRING);
465                             break;
466                         case '\'':
467                             state = ((state == ISP_TAG_EQ) ? ISI_TAG_STRING2 : ISI_DIR_STRING2);
468                             break;
469                         case ' ':
470                         case '\t':
471                             // don't change the state
472
break;
473                         default:
474                             state = ((state == ISP_TAG_EQ) ? ISP_TAG : ISP_DIR);
475                             input.backup(1);
476                             //return token(RhtmlTokenId.ATTR_VALUE);
477
break;
478                     }
479                     break;
480                     
481                 case ISI_TAG_STRING:
482                 case ISI_DIR_STRING:
483                 case ISI_TAG_STRING2:
484                 case ISI_DIR_STRING2:
485                     if ((actChar == '"') && ((state == ISI_TAG_STRING) || (state == ISI_DIR_STRING))) {
486                         state = ((state == ISI_TAG_STRING) ? ISP_TAG : ISP_DIR);
487                         return token(RhtmlTokenId.ATTR_VALUE);
488                     }
489                     
490                     if ((actChar == '\'') && ((state == ISI_TAG_STRING2) || (state == ISI_DIR_STRING2))) {
491                         state = ((state == ISI_TAG_STRING2) ? ISP_TAG : ISP_DIR);
492                         return token(RhtmlTokenId.ATTR_VALUE);
493                     }
494                     
495                     switch (actChar) {
496                         case '\\':
497                             switch (state) {
498                                 case ISI_TAG_STRING:
499                                     state = ISI_TAG_STRING_B;
500                                     break;
501                                 case ISI_DIR_STRING:
502                                     state = ISI_DIR_STRING_B;
503                                     break;
504                                 case ISI_TAG_STRING2:
505                                     state = ISI_TAG_STRING2_B;
506                                     break;
507                                 case ISI_DIR_STRING2:
508                                     state = ISI_DIR_STRING2_B;
509                                     break;
510                             }
511                             break;
512                         case '\n':
513 // if (input.readLength() == 1) { // no char
514
return token(RhtmlTokenId.EOL);
515 //
516
// } else { // return string first
517
// input.backup(1);
518
// return token(RhtmlTokenId.ATTR_VALUE);
519
// }
520
case '$':
521                         case '#':
522                             before_el_state = state; //remember main state
523
state = ISA_EL_DELIM;
524                             break;
525                             
526                         default:
527                             //stay in ISI_TAG_STRING/2;
528
}
529                     break;
530                     
531                 case ISI_TAG_STRING_B:
532                 case ISI_DIR_STRING_B:
533                 case ISI_TAG_STRING2_B:
534                 case ISI_DIR_STRING2_B:
535                     switch (actChar) {
536                         case '"':
537                         case '\'':
538                         case '\\':
539                             break;
540                         default:
541                             input.backup(1);
542                             break;
543                     }
544                     switch (state) {
545                         case ISI_TAG_STRING_B:
546                             state = ISI_TAG_STRING;
547                             break;
548                         case ISI_DIR_STRING_B:
549                             state = ISI_DIR_STRING;
550                             break;
551                         case ISI_TAG_STRING2_B:
552                             state = ISI_TAG_STRING2;
553                             break;
554                         case ISI_DIR_STRING2_B:
555                             state = ISI_DIR_STRING2;
556                             break;
557                     }
558                     break;
559                     
560                 case ISA_ENDSLASH:
561                     switch (actChar) {
562                         case '>':
563 // state = ISA_END_JSP;
564
state = INIT;
565                             return token(RhtmlTokenId.SYMBOL);
566                         case '\n':
567                             state = ISI_TAG_ERROR;
568                             input.backup(1);
569                             return token(RhtmlTokenId.SYMBOL);
570                         default:
571                             state = ISP_TAG;
572                             input.backup(1);
573                             return token(RhtmlTokenId.SYMBOL);
574                     }
575                     //break; not reached
576

577                 case ISA_ENDPC:
578                     switch (actChar) {
579                         case '>':
580 // state = ISA_END_JSP;
581
state = INIT;
582                             return token(RhtmlTokenId.SYMBOL);
583                         case '\n':
584                             state = ISI_DIR_ERROR;
585                             input.backup(1);
586                             return token(RhtmlTokenId.SYMBOL);
587                         default:
588                             state = ISP_DIR;
589                             input.backup(1);
590                             return token(RhtmlTokenId.SYMBOL);
591                     }
592                     //break; not reached
593

594                 case ISA_LT_PC:
595                     switch (actChar) {
596                         case '@':
597                             if(input.readLength() == 3) {
598                                 // just <%@ read
599
state = ISA_LT_PC_AT;
600                                 return token(RhtmlTokenId.SYMBOL);
601                             } else {
602                                 //jsp symbol, but we also have content language in the buffer
603
input.backup(3); //backup <%@
604
state = INIT;
605                                 return token(RhtmlTokenId.TEXT); //return CL token
606
}
607                         case '-': //may be JSP comment
608
state = ISA_LT_PC_DASH;
609                             break;
610                         case '!': // java declaration
611
case '=': // java expression
612
if(input.readLength() == 3) {
613                                 // just <%! or <%= read
614
state = ISI_SCRIPTLET;
615                                 return token(RhtmlTokenId.SYMBOL2);
616                             } else {
617                                 //jsp symbol, but we also have content language in the buffer
618
input.backup(3); //backup <%! or <%=
619
state = INIT;
620                                 return token(RhtmlTokenId.TEXT); //return CL token
621
}
622                         default: //java scriptlet delimiter '<%'
623
if(input.readLength() == 3) {
624                                 // just <% + something != [-,!,=,@] read
625
state = ISI_SCRIPTLET;
626                                 input.backup(1); //backup the third character, it is a part of the java scriptlet
627
return token(RhtmlTokenId.SYMBOL2);
628                             } else {
629                                 //jsp symbol, but we also have content language in the buffer
630
input.backup(3); //backup <%@
631
state = INIT;
632                                 return token(RhtmlTokenId.TEXT); //return CL token
633
}
634                     }
635                     break;
636                     
637                 case ISI_SCRIPTLET:
638                     switch(actChar) {
639                         case '%':
640                             state = ISP_SCRIPTLET_PC;
641                             break;
642                     }
643                     break;
644                     
645                 case ISP_SCRIPTLET_PC:
646                     switch(actChar) {
647                         case '>':
648                             if(input.readLength() == 2) {
649                                 //just the '%>' symbol read
650
state = INIT;
651                                 return token(RhtmlTokenId.SYMBOL2);
652                             } else {
653                                 //return the scriptlet content
654
input.backup(2); // backup '%>' we will read JUST them again
655
state = ISI_SCRIPTLET;
656                                 return token(RhtmlTokenId.SCRIPTLET);
657                             }
658                         default:
659                             state = ISI_SCRIPTLET;
660                             break;
661                     }
662                     break;
663                     
664                 case ISA_LT_PC_DASH:
665                     switch(actChar) {
666                         case '-':
667                             if(input.readLength() == 4) {
668                                 //just the '<%--' symbol read
669
state = ISI_JSP_COMMENT;
670                             } else {
671                                 //return the scriptlet content
672
input.backup(4); // backup '<%--', we will read it again
673
state = INIT;
674                                 return token(RhtmlTokenId.TEXT);
675                             }
676                             break;
677                         default:
678 // state = ISA_END_JSP;
679
state = INIT; //XXX how to handle content language?
680
return token(RhtmlTokenId.TEXT); //marek: should I token here????
681
}
682                     
683                     // JSP states
684
case ISI_JSP_COMMENT:
685                     switch (actChar) {
686                         case '\n':
687                             if (input.readLength() == 1) { // no char
688
return token(RhtmlTokenId.EOL);
689                             } else { // return block comment first
690
input.backup(1);
691                                 return token(RhtmlTokenId.COMMENT);
692                             }
693                         case '-':
694                             state = ISI_JSP_COMMENT_M;
695                             break;
696                     }
697                     break;
698                     
699                 case ISI_JSP_COMMENT_M:
700                     switch (actChar) {
701                         case '\n':
702                             state = ISI_JSP_COMMENT;
703                             if (input.readLength() == 1) { // no char
704
return token(RhtmlTokenId.EOL);
705                             } else { // return block comment first
706
input.backup(1);
707                                 return token(RhtmlTokenId.COMMENT);
708                             }
709                         case '-':
710                             state = ISI_JSP_COMMENT_MM;
711                             break;
712                         default:
713                             state = ISI_JSP_COMMENT;
714                             break;
715                     }
716                     break;
717                     
718                 case ISI_JSP_COMMENT_MM:
719                     switch (actChar) {
720                         case '\n':
721                             state = ISI_JSP_COMMENT;
722                             if (input.readLength() == 1) { // no char
723
return token(RhtmlTokenId.EOL);
724                             } else { // return block comment first
725
input.backup(1);
726                                 return token(RhtmlTokenId.COMMENT);
727                             }
728                         case '%':
729                             state = ISI_JSP_COMMENT_MMP;
730                             break;
731                         case '-':
732                             state = ISI_JSP_COMMENT_MM;
733                             break;
734                         default:
735                             state = ISI_JSP_COMMENT;
736                             break;
737                     }
738                     break;
739                     
740                 case ISI_JSP_COMMENT_MMP:
741                     switch (actChar) {
742                         case '\n':
743                             state = ISI_JSP_COMMENT;
744                             if (input.readLength() == 1) { // no char
745
return token(RhtmlTokenId.EOL);
746                             } else { // return block comment first
747
input.backup(1);
748                                 return token(RhtmlTokenId.COMMENT);
749                             }
750                         case '>':
751 // state = ISA_END_JSP;
752
state = INIT;
753                             return token(RhtmlTokenId.COMMENT);
754                         default:
755                             state = ISI_JSP_COMMENT;
756                             break;
757                     }
758                     break;
759                     
760                 case ISI_ERROR:
761                     switch (actChar) {
762                         case '\n':
763                             state = INIT;
764                             input.backup(1);
765                             return token(RhtmlTokenId.ERROR);
766                         case '<':
767                             state = ISA_LT;
768                             input.backup(1);
769                             return token(RhtmlTokenId.ERROR);
770                     }
771                     break;
772                     
773                 case ISI_TAG_ERROR:
774                     switch (actChar) {
775                         case '\n':
776                             if (input.readLength() == 1) { // no char
777
state = ISI_TAG_I_WS;
778                                 return token(RhtmlTokenId.EOL);
779                             } else { // return error first
780
// input.backup(1);
781
return token(RhtmlTokenId.ERROR);
782                             }
783                         case '>':
784                         case ' ':
785                         case '\t':
786                             state = ISP_TAG;
787                             input.backup(1);
788                             return token(RhtmlTokenId.ERROR);
789                     }
790                     break;
791                     
792                 case ISI_DIR_ERROR:
793                     switch (actChar) {
794                         case '\n':
795                             if (input.readLength() == 1) { // no char
796
state = ISI_DIR_I_WS;
797                                 return token(RhtmlTokenId.EOL);
798                             } else { // return error first
799
// input.backup(1);
800
return token(RhtmlTokenId.ERROR);
801                             }
802                         case '%':
803                         case '\t':
804                         case ' ':
805                             state = ISP_DIR;
806                             input.backup(1);
807                             return token(RhtmlTokenId.ERROR);
808                     }
809                     break;
810                     
811                 case ISI_DIR_ERROR_P:
812                     switch (actChar) {
813                         case '\n':
814                             if (input.readLength() == 1) { // no char
815
state = ISI_DIR_I_WS;
816                                 return token(RhtmlTokenId.EOL);
817                             } else { // return error first
818
input.backup(1);
819                                 return token(RhtmlTokenId.ERROR);
820                             }
821                         case '>':
822                             input.backup(2);
823                             state = ISI_DIR_I_WS;
824                             return token(RhtmlTokenId.ERROR);
825                     }
826                     break;
827                     
828 // case ISA_END_JSP:
829
// if (input.readLength() == 1) {
830
// offset++;
831
// return RhtmlTokenId.AFTER_UNEXPECTED_LT;
832
// }
833
// else {
834
// return RhtmlTokenId.TEXT;
835
// }
836
// //break;
837

838                     // added states
839
case ISA_LT_PC_AT:
840                     if (Character.isLetter(actChar) ||
841                             (actChar == '_')
842                             ) { // the directive starts
843
state = ISI_DIRNAME;
844 // marek: why to create an empty tag token????
845
// input.backup(1);
846
// return decide_jsp_tag_token();
847
}
848                     
849                     switch (actChar) {
850                         case '\n':
851                             if (input.readLength() == 1) { // no char
852
return token(RhtmlTokenId.EOL);
853                             } else {
854                                 input.backup(1);
855                                 return token(RhtmlTokenId.TAG);
856                             }
857                     }
858                     break;
859                     
860             }
861             
862         }
863         
864         // At this stage there's no more text in the scanned buffer.
865
// Scanner first checks whether this is completely the last
866
// available buffer.
867

868         switch(state) {
869             case INIT:
870                 if (input.readLength() == 0) {
871                     return null;
872                 } else {
873                     return token(RhtmlTokenId.TEXT);
874                 }
875             case ISI_ERROR:
876             case ISI_TAG_ERROR:
877                 state = INIT;
878                 return token(RhtmlTokenId.ERROR);
879             case ISI_DIR_ERROR:
880             case ISI_DIR_ERROR_P:
881                 state = INIT;
882                 return token(RhtmlTokenId.ERROR);
883             case ISA_LT:
884             case ISA_LT_SLASH:
885             case ISA_ENDSLASH:
886             case ISP_TAG_EQ:
887                 state = INIT;
888                 return token(RhtmlTokenId.SYMBOL);
889             case ISA_LT_PC:
890             case ISA_LT_PC_DASH:
891             case ISA_ENDPC:
892             case ISP_DIR_EQ:
893                 state = INIT;
894                 return token(RhtmlTokenId.SYMBOL);
895             case ISI_TAGNAME:
896             case ISI_ENDTAG:
897                 state = INIT;
898                 return token(RhtmlTokenId.TAG);
899             case ISI_DIRNAME:
900                 state = INIT;
901                 return token(RhtmlTokenId.TAG);
902             case ISP_TAG:
903             case ISI_TAG_I_WS:
904                 state = INIT;
905                 return token(RhtmlTokenId.TAG);
906             case ISP_DIR:
907             case ISI_DIR_I_WS:
908             case ISA_LT_PC_AT:
909                 state = INIT;
910                 return token(RhtmlTokenId.TAG);
911             case ISI_TAG_ATTR:
912                 state = INIT;
913                 return token(RhtmlTokenId.ATTRIBUTE);
914             case ISI_DIR_ATTR:
915                 state = INIT;
916                 return token(RhtmlTokenId.ATTRIBUTE);
917             case ISI_TAG_STRING:
918             case ISI_TAG_STRING_B:
919             case ISI_TAG_STRING2:
920             case ISI_TAG_STRING2_B:
921                 state = INIT;
922                 return token(RhtmlTokenId.ATTR_VALUE);
923             case ISI_DIR_STRING:
924             case ISI_DIR_STRING_B:
925             case ISI_DIR_STRING2:
926             case ISI_DIR_STRING2_B:
927                 state = INIT;
928                 return token(RhtmlTokenId.ATTR_VALUE);
929             case ISI_JSP_COMMENT:
930             case ISI_JSP_COMMENT_M:
931             case ISI_JSP_COMMENT_MM:
932             case ISI_JSP_COMMENT_MMP:
933                 state = INIT;
934                 return token(RhtmlTokenId.COMMENT);
935             case ISA_EL_DELIM:
936                 state = INIT;
937                 return token(RhtmlTokenId.TEXT);
938             case ISI_EL:
939                 state = INIT;
940                 return token(RhtmlTokenId.EL);
941             case ISP_SCRIPTLET_PC:
942                 state = INIT;
943                 return token(RhtmlTokenId.SYMBOL2);
944             case ISI_SCRIPTLET:
945                 state = INIT;
946                 return token(RhtmlTokenId.SCRIPTLET);
947                 
948             default:
949                 System.out.println("RhtmlLexer - unhandled state : " + state); // NOI18N
950
}
951         
952         return null;
953         
954     }
955     
956     public void release() {
957     }
958 }
959
960
Popular Tags