KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > lib > jsp > lexer > JspLexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.lib.jsp.lexer;
21
22 import java.util.logging.Level JavaDoc;
23 import java.util.logging.Logger JavaDoc;
24 import org.netbeans.api.jsp.lexer.JspTokenId;
25 import org.netbeans.api.lexer.InputAttributes;
26 import org.netbeans.api.lexer.LanguagePath;
27 import org.netbeans.api.lexer.PartType;
28 import org.netbeans.api.lexer.Token;
29 import org.netbeans.spi.jsp.lexer.JspParseData;
30 import org.netbeans.spi.lexer.Lexer;
31 import org.netbeans.spi.lexer.LexerInput;
32 import org.netbeans.spi.lexer.LexerRestartInfo;
33 import org.netbeans.spi.lexer.TokenFactory;
34 import org.netbeans.spi.lexer.TokenPropertyProvider;
35
36 /**
37  * Syntax class for JSP tags.
38  *
39  * @author Petr Jiricka
40  * @author Marek Fukala
41  *
42  * @version 1.00
43  */

44
45 public class JspLexer implements Lexer<JspTokenId> {
46     
47     private static final Logger JavaDoc LOGGER = Logger.getLogger(JspLexer.class.getName());
48     private static final boolean LOG = Boolean.getBoolean("j2ee_lexer_debug"); //NOI18N
49

50     
51     private static final int EOF = LexerInput.EOF;
52     
53     private final LexerInput input;
54     
55     private final InputAttributes inputAttributes;
56     private final JspParseData jspParseData;
57     
58     private final TokenFactory<JspTokenId> tokenFactory;
59     
60     public Object JavaDoc state() {
61         return lexerState + lexerStateBeforeEL * 1000 + lexerStateJspScriptlet * 1000000;
62     }
63     
64     //main internal lexer state
65
private int lexerState = INIT;
66     
67     //secondary internal state for EL expressions in JSP
68
//is it used to eliminate a number of lexer states when EL is found -
69
//we have 8 states just in attribute value so I would have to copy the EL
70
//recognition code eight-times.
71
private int lexerStateBeforeEL = INIT;
72     
73     //internal state signalling whether the lexer is in <jsp:scriptlet> tag
74
private int lexerStateJspScriptlet = INIT;
75     
76     // Internal analyzer states
77
// general
78
private static final int INIT = 0; // initial lexer state = content language
79
private static final int ISI_ERROR = 1; // when the fragment does not start with <
80
private static final int ISA_LT = 2; // after '<' char
81
// tags and directives
82
private static final int ISI_TAGNAME = 3; // inside JSP tag name
83
private static final int ISI_DIRNAME = 4; // inside JSP directive name
84
private static final int ISP_TAG = 5; // after JSP tag name
85
private static final int ISP_DIR = 6; // after JSP directive name
86
private static final int ISI_TAG_I_WS = 7; // inside JSP tag after whitespace
87
private static final int ISI_DIR_I_WS = 8; // inside JSP directive after whitespace
88
private static final int ISI_ENDTAG = 9; // inside end JSP tag
89
private static final int ISI_TAG_ATTR = 10; // inside tag attribute
90
private static final int ISI_DIR_ATTR = 11; // inside directive attribute
91
private static final int ISP_TAG_EQ = 12; // just after '=' in tag
92
private static final int ISP_DIR_EQ = 13; // just after '=' in directive
93
private static final int ISI_TAG_STRING = 14; // inside string (value - "") in tag
94
private static final int ISI_DIR_STRING = 15; // inside string (value - "") in directive
95
private static final int ISI_TAG_STRING_B = 16; // inside string (value - "") after backslash in tag
96
private static final int ISI_DIR_STRING_B = 17; // inside string (value - "") after backslash in directive
97
private static final int ISI_TAG_STRING2 = 18; // inside string (value - '') in tag
98
private static final int ISI_DIR_STRING2 = 19; // inside string (value - '') in directive
99
private static final int ISI_TAG_STRING2_B = 20; // inside string (value - '') after backslash in tag
100
private static final int ISI_DIR_STRING2_B = 21; // inside string (value - '') after backslash in directive
101
private static final int ISA_ENDSLASH = 22; // after ending '/' in JSP tag
102
private static final int ISA_ENDPC = 23; // after ending '%' in JSP directive
103
// comments (+directives)
104
private static final int ISA_LT_PC = 24; // after '<%' - comment or directive or scriptlet
105
private static final int ISI_JSP_COMMENT = 25; // after <%-
106

107     private static final int ISI_JSP_COMMENT_M = 26; // inside JSP comment after -
108
private static final int ISI_JSP_COMMENT_MM = 27; // inside JSP comment after --
109
private static final int ISI_JSP_COMMENT_MMP = 28; // inside JSP comment after --%
110
// end state
111
// static final int ISA_END_JSP = 29; // JSP fragment has finished and control
112
// should be returned to master syntax
113
// more errors
114
private static final int ISI_TAG_ERROR = 30; // error in tag, can be cleared by > or \n
115
private static final int ISI_DIR_ERROR = 31; // error in directive, can be cleared by %>, \n, \t or space
116
private static final int ISI_DIR_ERROR_P = 32; // error in directive after %, can be cleared by > or \n
117

118     private static final int ISA_LT_PC_AT = 33; // after '<%@' (directive)
119
private static final int ISA_LT_SLASH = 34; // after '</' sequence
120
private static final int ISA_LT_PC_DASH = 35; // after <%- ;not comment yet
121

122     private static final int ISI_SCRIPTLET = 36; // inside java scriptlet/declaration/expression
123
private static final int ISP_SCRIPTLET_PC = 37; // just after % in scriptlet
124

125     //expression language
126

127     //EL in content language
128
private static final int ISA_EL_DELIM = 38; //after $ or # in content language
129
private static final int ISI_EL = 39; //expression language in content (after ${ or #{ )
130

131     private static final int ISA_BS = 40; //after backslash in text - needed to disable EL by scaping # or $
132

133     //scriptlet substate states
134
//in standart syntax jsp
135
private static final int JAVA_SCRITPLET = 1; //java scriptlet
136
private static final int JAVA_DECLARATION = 2; //java declaration
137
private static final int JAVA_EXPRESSION = 3; //java expression
138
//in xml syntax jsp (jsp document)
139
private static final int JAVA_SCRITPLET_DOCUMENT = 4; //java scriptlet in JSP document
140
private static final int JAVA_DECLARATION_DOCUMENT = 5; //java declaration in JSP document
141
private static final int JAVA_EXPRESSION_DOCUMENT = 6; //java expression in JSP document
142

143    
144     public JspLexer(LexerRestartInfo<JspTokenId> info) {
145         this.input = info.input();
146         this.inputAttributes = info.inputAttributes();
147         this.tokenFactory = info.tokenFactory();
148         if (info.state() == null) {
149             lexerState = INIT;
150             lexerStateBeforeEL = INIT;
151             lexerStateJspScriptlet = INIT;
152         } else {
153             int encoded = ((Integer JavaDoc) info.state()).intValue();
154             lexerStateJspScriptlet = encoded / 1000000;
155             int reminder = encoded % 1000000;
156             lexerStateBeforeEL = reminder / 1000;
157             lexerState = encoded % 1000;
158         }
159         if(inputAttributes != null) {
160             jspParseData = (JspParseData)inputAttributes.getValue(LanguagePath.get(JspTokenId.language()), JspParseData.class);
161         } else {
162             jspParseData = null;
163         }
164     }
165     
166     public boolean isIdentifierPart(char character) {
167         return Character.isJavaIdentifierPart(character);
168     }
169     
170     /** Determines whether a given string is a JSP tag. */
171     private boolean isJspTag(String JavaDoc tagName) {
172         if(tagName.startsWith("jsp:")) { // NOI18N
173
return true;
174         }
175         
176         //TODO handle custom tags from JSP parser here
177
if(jspParseData != null) {
178             int colonIndex = tagName.indexOf(':');//NOI18N
179
if(colonIndex != -1) {
180                 String JavaDoc prefix = tagName.substring(0, colonIndex);
181                 return jspParseData.isTagLibRegistered(prefix);
182             }
183         }
184         
185         return false;
186     }
187     
188     private boolean isELIgnored() {
189         return jspParseData == null ? false : jspParseData.isELIgnored();
190     }
191     
192     private boolean isXMLSyntax() {
193         return jspParseData == null ? false: jspParseData.isXMLSyntax();
194     }
195     
196     private String JavaDoc getPossibleTagName() {
197         int actChar;
198         int prev_read = input.readLength(); //remember the size of the read sequence
199
int read = 0;
200         while(true) {
201             actChar = input.read();
202             read++;
203             if(!(Character.isLetter(actChar) ||
204                     Character.isDigit(actChar) ||
205                     (actChar == '_') ||
206                     (actChar == '-') ||
207                     (actChar == ':') ||
208                     (actChar == '.') ||
209                     (actChar == '/')) ||
210                     (actChar == EOF)) { // EOL or not alpha
211
//end of tagname
212
String JavaDoc tagName = input.readText().toString().substring(prev_read, prev_read + read - 1);
213                 input.backup(read); //put the lookahead text back to the buffer
214
return tagName;
215             }
216         }
217     }
218     
219     /** Looks ahead into the character buffer and checks if a jsp tag name follows. */
220     private boolean followsJspTag() {
221         return isJspTag(getPossibleTagName());
222     }
223     
224     public Token<JspTokenId> nextToken() {
225         int actChar;
226         while (true) {
227             actChar = input.read();
228             
229             if (actChar == EOF) {
230                 if(input.readLengthEOF() == 1) {
231                     return null; //just EOL is read
232
} else {
233                     //there is something else in the buffer except EOL
234
//we will return last token now
235
input.backup(1); //backup the EOL, we will return null in next nextToken() call
236
break;
237                 }
238             }
239             
240             switch (lexerState) {
241                 case INIT:
242                     switch (actChar) {
243                         // case '\n':
244
// return token(JspTokenId.EOL);
245
case '<':
246                             lexerState = ISA_LT;
247                             break;
248                             // default:
249
// state = ISI_ERROR;
250
// break;
251
case '\\':
252                             lexerState = ISA_BS;
253                             break;
254                         case '$':
255                         case '#': //maybe expression language
256
lexerStateBeforeEL = lexerState; //remember main state
257
lexerState = ISA_EL_DELIM;
258                             break;
259                     }
260                     break;
261                     
262                 case ISA_BS:
263                     if(actChar != '\\') {
264                         lexerState = INIT; //prevent scaped EL in text being recognized
265
}
266                     break;
267                     
268                 case ISA_EL_DELIM:
269                     if(isELIgnored()) {
270                         //reset to previous state - do not recognize EL
271
lexerState = lexerStateBeforeEL;
272                         lexerStateBeforeEL = INIT;
273                     } else {
274                         switch(actChar) {
275                             case '{':
276                                 if(input.readLength() > 2) {
277                                     //we have something read except the '${' or '#{' => it's content language
278
input.backup(2); //backup the '$/#{'
279
lexerState = lexerStateBeforeEL; //we will read the '$/#{' again
280
lexerStateBeforeEL = INIT;
281                                     return token(JspTokenId.TEXT); //return the content language token
282
}
283                                 lexerState = ISI_EL;
284                                 break;
285                             default:
286                                 lexerState = lexerStateBeforeEL;
287                                 lexerStateBeforeEL = INIT;
288                         }
289                     }
290                     break;
291                     
292                 case ISI_EL:
293                     if(actChar == '}') {
294                         //return EL token
295
lexerState = lexerStateBeforeEL;
296                         lexerStateBeforeEL = INIT;
297                         return token(JspTokenId.EL);
298                     }
299                     //stay in EL
300
break;
301                     
302                 case ISA_LT:
303                     if (Character.isLetter(actChar) ||
304                             (actChar == '_')
305                             ) { // possible tag begining
306
input.backup(1); //backup the read letter
307
String JavaDoc tagName = getPossibleTagName();
308                         if(isJspTag(tagName)) { //test if a jsp tag follows
309
if(input.readLength() > 1) {
310                                 //we have something read except the '<' => it's content language
311
input.backup(1); //backup the '<'
312
lexerState = INIT; //we will read the '<' again
313
return token(JspTokenId.TEXT); //return the content language token
314
}
315                             //possibly switch to scriptlet when <jsp:scriptlet> found
316

317                             if("jsp:scriptlet".equals(tagName)) { //NOI18N
318
lexerStateJspScriptlet = JAVA_SCRITPLET_DOCUMENT;
319                             } else if("jsp:declaration".equals(tagName)) { //NOI18N
320
lexerStateJspScriptlet = JAVA_DECLARATION_DOCUMENT;
321                             } else if("jsp:expression".equals(tagName)) { //NOI18N
322
lexerStateJspScriptlet = JAVA_EXPRESSION_DOCUMENT;
323                             }
324                             
325                             lexerState = ISI_TAGNAME;
326                             break;
327                         } else {
328                             //just a content language
329
lexerState = INIT;
330                             break;
331                         }
332                         // input.backup(1);
333
// return token(JspTokenId.SYMBOL);
334
}
335                     
336                     switch (actChar) {
337                         case '/':
338                             lexerState = ISA_LT_SLASH;
339                             break;
340                             // case '\n':
341
// state = ISI_TAG_ERROR;
342
// input.backup(1);
343
// return token(JspTokenId.SYMBOL);
344
case '%':
345                             lexerState = ISA_LT_PC;
346                             break;
347                         default:
348                             lexerState = INIT; //just content
349
// state = ISI_TAG_ERROR;
350
// break;
351
}
352                     break;
353                     
354                 case ISA_LT_SLASH:
355                     if (Character.isLetter(actChar) ||
356                             (actChar == '_')) {
357                         //possible end tag beginning
358
input.backup(1); //backup the first letter
359
if(followsJspTag()) {
360                             if(input.readLength() > 2) {
361                                 //we have something read except the '</' symbol
362
input.backup(2);
363                                 lexerState = INIT;
364                                 return token(JspTokenId.TEXT);
365                             } else {
366                                 lexerState = ISI_ENDTAG;
367                             }
368                             break;
369                         } else {
370                             //just a content language
371
lexerState = INIT;
372                             break;
373                         }
374                     }
375                     
376                     //not jsp end tag -> just content -> switch to init state
377
lexerState = INIT;
378                     break;
379                     
380                 case ISI_TAGNAME:
381                 case ISI_DIRNAME:
382                     
383                     if (!(Character.isLetter(actChar) ||
384                             Character.isDigit(actChar) ||
385                             (actChar == '_') ||
386                             (actChar == '-') ||
387                             (actChar == ':') ||
388                             (actChar == '.'))) { // not alpha
389
switch(actChar) {
390                             case '<':
391                                 lexerState = INIT;
392                                 input.backup(1);
393                                 break;
394                             case '/':
395                                 input.backup(1);
396                                 lexerState = ((lexerState == ISI_TAGNAME) ? ISP_TAG : ISP_DIR);
397                                 break;
398                             case '>':
399                                 if(lexerStateJspScriptlet != INIT) {
400                                     //switch to java scriptlet
401
lexerState = ISI_SCRIPTLET;
402                                 } else {
403                                     lexerState = INIT;
404                                 }
405                                 break;
406                             case ' ':
407                                 input.backup(1);
408                                 lexerState = ((lexerState == ISI_TAGNAME) ? ISP_TAG : ISP_DIR);
409                                 break;
410                             default:
411                                 lexerState = ((lexerState == ISI_TAGNAME) ? ISP_TAG : ISP_DIR);
412                         }
413                         return token(JspTokenId.TAG);
414                     }
415                     break;
416                     
417                 case ISP_TAG:
418                 case ISP_DIR:
419                     if (Character.isLetter(actChar) ||
420                             (actChar == '_')
421                             ) {
422                         lexerState = ((lexerState == ISP_TAG) ? ISI_TAG_ATTR : ISI_DIR_ATTR);
423                         break;
424                     }
425                     switch (actChar) {
426                         case '\n':
427                             // if (input.readLength() == 1) { // no char
428
return token(JspTokenId.EOL);
429                             // } else { // return string first
430
// input.backup(1);
431
// return decide_jsp_tag_token();
432
// }
433
case '>': // for tags
434
if (lexerState == ISP_TAG) {
435                                 // if (input.readLength() == 1) { // no char
436
// state = ISA_END_JSP;
437
lexerState = INIT;
438                                 return token(JspTokenId.SYMBOL);
439                                 // } else { // return string first
440
// input.backup(1);
441
// return decide_jsp_tag_token();
442
// }
443
} else { // directive
444
lexerState = ISI_DIR_ERROR;
445                                 break;
446                             }
447                         case '/': // for tags
448
if (lexerState == ISP_TAG) {
449                                 // if (input.readLength() == 1) { // no char
450
lexerState = ISA_ENDSLASH;
451                                 break;
452                                 // } else { // return string first
453
// input.backup(1);
454
// return decide_jsp_tag_token();
455
// }
456
} else { // directive
457
lexerState = ISI_DIR_ERROR;
458                                 break;
459                             }
460                         case '%': // for directives
461
if (lexerState == ISP_DIR) {
462                                 // if (input.readLength() == 1) { // no char
463
lexerState = ISA_ENDPC;
464                                 break;
465                                 // } else { // return string first
466
// input.backup(1);
467
// return decide_jsp_tag_token();
468
// }
469
} else { // tag
470
lexerState = ISI_TAG_ERROR;
471                                 break;
472                             }
473                         case '=':
474                             lexerState = ((lexerState == ISP_TAG) ? ISP_TAG_EQ : ISP_DIR_EQ);
475                             return token(JspTokenId.SYMBOL);
476                         case ' ':
477                         case '\t':
478                             lexerState = ((lexerState == ISP_TAG) ? ISI_TAG_I_WS : ISI_DIR_I_WS);
479                             break;
480                         case '<':
481                             // assume that this is the start of the next tag
482
//we shouldn't have anything else than then the < char in buffer
483
assert input.readLength() == 1 : "There is something more than '<' char in the read text: '" + input.readText() + "'"; //NOI18N
484
input.backup(1);
485                             lexerState = INIT;
486                         default: //numbers or illegal symbols
487
lexerState = ((lexerState == ISP_TAG) ? ISI_TAG_ERROR : ISI_DIR_ERROR);
488                             break;
489                     }
490                     break;
491                     
492                 case ISI_TAG_I_WS:
493                 case ISI_DIR_I_WS:
494                     switch (actChar) {
495                         case ' ':
496                         case '\t':
497                             break;
498                         case '<': //start of the next tag
499
// state = ISA_END_JSP;
500
lexerState = INIT;
501                             input.backup(1);
502                             return token(JspTokenId.TAG);
503                         default:
504                             lexerState = ((lexerState == ISI_TAG_I_WS) ? ISP_TAG : ISP_DIR);
505                             input.backup(1);
506                             return token(JspTokenId.WHITESPACE);
507                     }
508                     break;
509                     
510                 case ISI_ENDTAG:
511                     if (!(Character.isLetter(actChar) ||
512                             Character.isDigit(actChar) ||
513                             (actChar == '_') ||
514                             (actChar == '-') ||
515                             (actChar == ':'))
516                             ) { // not alpha
517
lexerState = ISP_TAG;
518                         input.backup(1);
519                         return token(JspTokenId.TAG);
520                     }
521                     break;
522                     
523                 case ISI_TAG_ATTR:
524                 case ISI_DIR_ATTR:
525                     if (!(Character.isLetter(actChar) ||
526                             Character.isDigit(actChar) ||
527                             (actChar == '_') ||
528                             (actChar == ':') ||
529                             (actChar == '-'))
530                             ) { // not alpha or '-' (http-equiv)
531
lexerState = ((lexerState == ISI_TAG_ATTR) ? ISP_TAG : ISP_DIR);
532                         input.backup(1);
533                         return token(JspTokenId.ATTRIBUTE);
534                     }
535                     break;
536                     
537                 case ISP_TAG_EQ:
538                 case ISP_DIR_EQ:
539                     switch (actChar) {
540                         case '\n':
541                             // if (input.readLength() == 1) { // no char
542
return token(JspTokenId.EOL);
543                             // } else { // return string first
544
// input.backup(1);
545
// return token(JspTokenId.ATTR_VALUE);
546
// }
547
case '"':
548                             lexerState = ((lexerState == ISP_TAG_EQ) ? ISI_TAG_STRING : ISI_DIR_STRING);
549                             break;
550                         case '\'':
551                             lexerState = ((lexerState == ISP_TAG_EQ) ? ISI_TAG_STRING2 : ISI_DIR_STRING2);
552                             break;
553                         case ' ':
554                         case '\t':
555                             // don't change the state
556
break;
557                         default:
558                             //invalid value - lets backup it and swith to tag content
559
lexerState = ((lexerState == ISP_TAG_EQ) ? ISP_TAG : ISP_DIR);
560                             input.backup(input.readLength());
561                             break;
562                     }
563                     break;
564                     
565                 case ISI_TAG_STRING:
566                 case ISI_DIR_STRING:
567                 case ISI_TAG_STRING2:
568                 case ISI_DIR_STRING2:
569                     if ((actChar == '"') && ((lexerState == ISI_TAG_STRING) || (lexerState == ISI_DIR_STRING))) {
570                         lexerState = ((lexerState == ISI_TAG_STRING) ? ISP_TAG : ISP_DIR);
571                         return token(JspTokenId.ATTR_VALUE);
572                     }
573                     
574                     if ((actChar == '\'') && ((lexerState == ISI_TAG_STRING2) || (lexerState == ISI_DIR_STRING2))) {
575                         lexerState = ((lexerState == ISI_TAG_STRING2) ? ISP_TAG : ISP_DIR);
576                         return token(JspTokenId.ATTR_VALUE);
577                     }
578                     
579                     switch (actChar) {
580                         case '\\':
581                             switch (lexerState) {
582                                 case ISI_TAG_STRING:
583                                     lexerState = ISI_TAG_STRING_B;
584                                     break;
585                                 case ISI_DIR_STRING:
586                                     lexerState = ISI_DIR_STRING_B;
587                                     break;
588                                 case ISI_TAG_STRING2:
589                                     lexerState = ISI_TAG_STRING2_B;
590                                     break;
591                                 case ISI_DIR_STRING2:
592                                     lexerState = ISI_DIR_STRING2_B;
593                                     break;
594                             }
595                             break;
596                         case '\n':
597                             if (input.readLength() == 1) { // no char
598
return token(JspTokenId.EOL);
599                             } else { // return string first
600
input.backup(1);
601                                 return token(JspTokenId.ATTR_VALUE);
602                             }
603                         case '$':
604                         case '#':
605                             if(input.readLength() > 1) {
606                                 //return part of the attribute value before EL
607
input.backup(1); //backup $ or #
608
return token(JspTokenId.ATTR_VALUE);
609                             } else {
610                                 lexerStateBeforeEL = lexerState; //remember main state
611
lexerState = ISA_EL_DELIM;
612                             }
613                             break;
614                             
615                         default:
616                             break;//stay in ISI_TAG_STRING/2;
617

618                     }
619                     break;
620                     
621                 case ISI_TAG_STRING_B:
622                 case ISI_DIR_STRING_B:
623                 case ISI_TAG_STRING2_B:
624                 case ISI_DIR_STRING2_B:
625                     switch (actChar) {
626                         case '"':
627                         case '\'':
628                         case '\\':
629                         case '$':
630                         case '#':
631                             break;
632                         default:
633                             input.backup(1);
634                             break;
635                     }
636                     switch (lexerState) {
637                         case ISI_TAG_STRING_B:
638                             lexerState = ISI_TAG_STRING;
639                             break;
640                         case ISI_DIR_STRING_B:
641                             lexerState = ISI_DIR_STRING;
642                             break;
643                         case ISI_TAG_STRING2_B:
644                             lexerState = ISI_TAG_STRING2;
645                             break;
646                         case ISI_DIR_STRING2_B:
647                             lexerState = ISI_DIR_STRING2;
648                             break;
649                     }
650                     break;
651                     
652                 case ISA_ENDSLASH:
653                     switch (actChar) {
654                         case '>':
655                             // state = ISA_END_JSP;
656
lexerState = INIT;
657                             return token(JspTokenId.SYMBOL);
658                         case '\n':
659                             lexerState = ISI_TAG_ERROR;
660                             input.backup(1);
661                             return token(JspTokenId.SYMBOL);
662                         default:
663                             lexerState = ISP_TAG;
664                             input.backup(1);
665                             return token(JspTokenId.SYMBOL);
666                     }
667                     //break; not reached
668

669                 case ISA_ENDPC:
670                     switch (actChar) {
671                         case '>':
672                             // state = ISA_END_JSP;
673
lexerState = INIT;
674                             return token(JspTokenId.SYMBOL);
675                         case '\n':
676                             lexerState = ISI_DIR_ERROR;
677                             input.backup(1);
678                             return token(JspTokenId.SYMBOL);
679                         default:
680                             lexerState = ISP_DIR;
681                             input.backup(1);
682                             return token(JspTokenId.SYMBOL);
683                     }
684                     //break; not reached
685

686                 case ISA_LT_PC:
687                     switch (actChar) {
688                         case '@':
689                             if(input.readLength() == 3) {
690                                 // just <%@ read
691
lexerState = ISA_LT_PC_AT;
692                                 return token(JspTokenId.SYMBOL);
693                             } else {
694                                 //jsp symbol, but we also have content language in the buffer
695
input.backup(3); //backup <%@
696
lexerState = INIT;
697                                 return token(JspTokenId.TEXT); //return CL token
698
}
699                         case '-': //may be JSP comment
700
lexerState = ISA_LT_PC_DASH;
701                             break;
702                         case '!': // java declaration
703
case '=': // java expression
704
if(input.readLength() == 3) {
705                                 // just <%! or <%= read
706
lexerStateJspScriptlet = actChar == '!' ? JAVA_DECLARATION : JAVA_EXPRESSION;
707                                 lexerState = ISI_SCRIPTLET;
708                                 return token(JspTokenId.SYMBOL2);
709                             } else {
710                                 //jsp symbol, but we also have content language in the buffer
711
input.backup(3); //backup <%! or <%=
712
lexerState = INIT;
713                                 return token(JspTokenId.TEXT); //return CL token
714
}
715                         default: //java scriptlet delimiter '<%'
716
if(input.readLength() == 3) {
717                                 // just <% + something != [-,!,=,@] read
718
lexerStateJspScriptlet = JAVA_SCRITPLET;
719                                 lexerState = ISI_SCRIPTLET;
720                                 input.backup(1); //backup the third character, it is a part of the java scriptlet
721
return token(JspTokenId.SYMBOL2);
722                             } else {
723                                 //jsp symbol, but we also have content language in the buffer
724
input.backup(3); //backup <%@
725
lexerState = INIT;
726                                 return token(JspTokenId.TEXT); //return CL token
727
}
728                     }
729                     break;
730                     
731                 case ISI_SCRIPTLET:
732                     switch(actChar) {
733                         case '%':
734                             lexerState = ISP_SCRIPTLET_PC;
735                             break;
736                         case '<':
737                             //may be end of scriptlet section in JSP document
738
String JavaDoc tagName = getPossibleTagName();
739                             if("/jsp:scriptlet".equals(tagName) || //NOI18N
740
"/jsp:declaration".equals(tagName) || //NOI18N
741
"/jsp:expression".equals(tagName)) { //NOI18N
742
if(input.readLength() == 1) {
743                                     //just the '<' symbol read
744
input.backup(1);
745                                     lexerState = INIT;
746                                 } else {
747                                     //return the scriptlet content
748
input.backup(1); // backup '<' we will read it again
749
int lxs = lexerStateJspScriptlet;
750                                     lexerStateJspScriptlet = INIT;
751                                     return scriptletToken(JspTokenId.SCRIPTLET, lxs);
752                                 }
753                             }
754                     }
755                     break;
756                     
757                 case ISP_SCRIPTLET_PC:
758                     switch(actChar) {
759                         case '>':
760                             if(input.readLength() == 2) {
761                                 //just the '%>' symbol read
762
lexerState = INIT;
763                                 return token(JspTokenId.SYMBOL2);
764                             } else {
765                                 //return the scriptlet content
766
input.backup(2); // backup '%>' we will read JUST them again
767
lexerState = ISI_SCRIPTLET;
768                                 int lxs = lexerStateJspScriptlet;
769                                 lexerStateJspScriptlet = INIT;
770                                 return scriptletToken(JspTokenId.SCRIPTLET, lxs);
771                             }
772                         default:
773                             lexerState = ISI_SCRIPTLET;
774                             break;
775                     }
776                     break;
777                     
778                 case ISA_LT_PC_DASH:
779                     switch(actChar) {
780                         case '-':
781                             if(input.readLength() == 4) {
782                                 //just the '<%--' symbol read
783
lexerState = ISI_JSP_COMMENT;
784                             } else {
785                                 //return the scriptlet content
786
input.backup(4); // backup '<%--', we will read it again
787
lexerState = INIT;
788                                 return token(JspTokenId.TEXT);
789                             }
790                             break;
791                         default:
792                             // state = ISA_END_JSP;
793
lexerState = INIT; //XXX how to handle content language?
794
return token(JspTokenId.TEXT); //marek: should I token here????
795
}
796                     
797                     // JSP states
798
case ISI_JSP_COMMENT:
799                     switch (actChar) {
800                         case '\n':
801                             if (input.readLength() == 1) { // no char
802
return token(JspTokenId.EOL);
803                             } else { // return block comment first
804
input.backup(1);
805                                 return token(JspTokenId.COMMENT);
806                             }
807                         case '-':
808                             lexerState = ISI_JSP_COMMENT_M;
809                             break;
810                     }
811                     break;
812                     
813                 case ISI_JSP_COMMENT_M:
814                     switch (actChar) {
815                         case '\n':
816                             lexerState = ISI_JSP_COMMENT;
817                             if (input.readLength() == 1) { // no char
818
return token(JspTokenId.EOL);
819                             } else { // return block comment first
820
input.backup(1);
821                                 return token(JspTokenId.COMMENT);
822                             }
823                         case '-':
824                             lexerState = ISI_JSP_COMMENT_MM;
825                             break;
826                         default:
827                             lexerState = ISI_JSP_COMMENT;
828                             break;
829                     }
830                     break;
831                     
832                 case ISI_JSP_COMMENT_MM:
833                     switch (actChar) {
834                         case '\n':
835                             lexerState = ISI_JSP_COMMENT;
836                             if (input.readLength() == 1) { // no char
837
return token(JspTokenId.EOL);
838                             } else { // return block comment first
839
input.backup(1);
840                                 return token(JspTokenId.COMMENT);
841                             }
842                         case '%':
843                             lexerState = ISI_JSP_COMMENT_MMP;
844                             break;
845                         case '-':
846                             lexerState = ISI_JSP_COMMENT_MM;
847                             break;
848                         default:
849                             lexerState = ISI_JSP_COMMENT;
850                             break;
851                     }
852                     break;
853                     
854                 case ISI_JSP_COMMENT_MMP:
855                     switch (actChar) {
856                         case '\n':
857                             lexerState = ISI_JSP_COMMENT;
858                             if (input.readLength() == 1) { // no char
859
return token(JspTokenId.EOL);
860                             } else { // return block comment first
861
input.backup(1);
862                                 return token(JspTokenId.COMMENT);
863                             }
864                         case '>':
865                             // state = ISA_END_JSP;
866
lexerState = INIT;
867                             return token(JspTokenId.COMMENT);
868                         default:
869                             lexerState = ISI_JSP_COMMENT;
870                             break;
871                     }
872                     break;
873                     
874                 case ISI_ERROR:
875                     switch (actChar) {
876                         case '\n':
877                             lexerState = INIT;
878                             input.backup(1);
879                             return token(JspTokenId.ERROR);
880                         case '<':
881                             lexerState = ISA_LT;
882                             input.backup(1);
883                             return token(JspTokenId.ERROR);
884                     }
885                     break;
886                     
887                 case ISI_TAG_ERROR:
888                     switch (actChar) {
889                         case '\n':
890                             if (input.readLength() == 1) { // no char
891
lexerState = ISP_TAG;
892                                 return token(JspTokenId.EOL);
893                             } else { // return error first
894
input.backup(1);
895                                 return token(JspTokenId.ERROR);
896                             }
897                         case '>':
898                         case ' ':
899                         case '\t':
900                             lexerState = ISP_TAG;
901                             input.backup(1);
902                             return token(JspTokenId.ERROR);
903                         default:
904                             break;
905                     }
906                     break;
907                     
908                 case ISI_DIR_ERROR:
909                     switch (actChar) {
910                         case '\n':
911                             if (input.readLength() == 1) { // no char
912
lexerState = ISP_DIR;
913                                 return token(JspTokenId.EOL);
914                             } else { // return error first
915
input.backup(1);
916                                 return token(JspTokenId.ERROR);
917                             }
918                             // case '%':
919
case '\t':
920                         case ' ':
921                             lexerState = ISP_DIR;
922                             if(input.readLength() > 1) {
923                                 input.backup(1);
924                                 return token(JspTokenId.ERROR);
925                             }
926                         default:
927                             break;
928                     }
929                     break;
930                     
931                 case ISI_DIR_ERROR_P:
932                     switch (actChar) {
933                         case '\n':
934                             if (input.readLength() == 1) { // no char
935
lexerState = ISI_DIR_I_WS;
936                                 return token(JspTokenId.EOL);
937                             } else { // return error first
938
input.backup(1);
939                                 return token(JspTokenId.ERROR);
940                             }
941                         case '>':
942                             input.backup(2);
943                             lexerState = ISI_DIR_I_WS;
944                             return token(JspTokenId.ERROR);
945                         default:
946                             break;
947                     }
948                     break;
949                     
950                     // case ISA_END_JSP:
951
// if (input.readLength() == 1) {
952
// offset++;
953
// return JspTokenId.AFTER_UNEXPECTED_LT;
954
// }
955
// else {
956
// return JspTokenId.TEXT;
957
// }
958
// //break;
959

960                     // added states
961
case ISA_LT_PC_AT:
962                     if (Character.isLetter(actChar) ||
963                             (actChar == '_')
964                             ) {
965                         // the directive starts
966
lexerState = ISI_DIRNAME;
967                         break;
968                     }
969                     
970                     switch (actChar) {
971                         case '\n':
972                             if (input.readLength() == 1) { // no char
973
return token(JspTokenId.EOL);
974                             } else {
975                                 input.backup(1);
976                                 return token(JspTokenId.SYMBOL);
977                             }
978                         case ' ':
979                         case '\t':
980                             break;
981                         default:
982                             //error
983
lexerState = ISI_DIR_ERROR;
984                             if(input.readLength() > 1) {
985                                 input.backup(1); //backup the error char if there is something more in the buffer
986
return token(JspTokenId.SYMBOL);
987                             }
988                             break;
989                     }
990                     break;
991                     
992             }
993             
994         }
995         
996         // At this stage there's no more text in the scanned buffer.
997
// Scanner first checks whether this is completely the last
998
// available buffer.
999

1000        switch(lexerState) {
1001            case INIT:
1002            case ISA_LT:
1003            case ISA_LT_SLASH:
1004                if (input.readLength() == 0) {
1005                    return null;
1006                } else {
1007                    return token(JspTokenId.TEXT);
1008                }
1009            case ISI_ERROR:
1010            case ISI_TAG_ERROR:
1011                lexerState = INIT;
1012                return token(JspTokenId.ERROR);
1013            case ISI_DIR_ERROR:
1014            case ISI_DIR_ERROR_P:
1015                lexerState = INIT;
1016                return token(JspTokenId.ERROR);
1017            case ISA_ENDSLASH:
1018            case ISP_TAG_EQ:
1019                lexerState = INIT;
1020                return token(JspTokenId.SYMBOL);
1021            case ISA_LT_PC:
1022            case ISA_LT_PC_DASH:
1023            case ISA_ENDPC:
1024            case ISP_DIR_EQ:
1025                lexerState = INIT;
1026                return token(JspTokenId.SYMBOL);
1027            case ISI_TAGNAME:
1028            case ISI_ENDTAG:
1029                lexerState = INIT;
1030                return token(JspTokenId.TAG);
1031            case ISI_DIRNAME:
1032                lexerState = INIT;
1033                return token(JspTokenId.TAG);
1034            case ISP_TAG:
1035            case ISI_TAG_I_WS:
1036                lexerState = INIT;
1037                return token(JspTokenId.TAG);
1038            case ISP_DIR:
1039            case ISI_DIR_I_WS:
1040            case ISA_LT_PC_AT:
1041                lexerState = INIT;
1042                return token(JspTokenId.TAG);
1043            case ISI_TAG_ATTR:
1044                lexerState = INIT;
1045                return token(JspTokenId.ATTRIBUTE);
1046            case ISI_DIR_ATTR:
1047                lexerState = INIT;
1048                return token(JspTokenId.ATTRIBUTE);
1049            case ISI_TAG_STRING:
1050            case ISI_TAG_STRING_B:
1051            case ISI_TAG_STRING2:
1052            case ISI_TAG_STRING2_B:
1053                lexerState = INIT;
1054                return token(JspTokenId.ATTR_VALUE);
1055            case ISI_DIR_STRING:
1056            case ISI_DIR_STRING_B:
1057            case ISI_DIR_STRING2:
1058            case ISI_DIR_STRING2_B:
1059                lexerState = INIT;
1060                return token(JspTokenId.ATTR_VALUE);
1061            case ISI_JSP_COMMENT:
1062            case ISI_JSP_COMMENT_M:
1063            case ISI_JSP_COMMENT_MM:
1064            case ISI_JSP_COMMENT_MMP:
1065                lexerState = INIT;
1066                return token(JspTokenId.COMMENT);
1067            case ISA_EL_DELIM:
1068                lexerState = INIT;
1069                return token(JspTokenId.TEXT);
1070            case ISI_EL:
1071                lexerState = INIT;
1072                return token(JspTokenId.EL);
1073            case ISP_SCRIPTLET_PC:
1074                lexerState = INIT;
1075                return token(JspTokenId.SYMBOL2);
1076            case ISI_SCRIPTLET:
1077                lexerState = INIT;
1078                return scriptletToken(JspTokenId.SCRIPTLET, lexerStateJspScriptlet);
1079            default:
1080                break;
1081        }
1082        
1083        return null;
1084        
1085    }
1086    
1087    private Token<JspTokenId> token(JspTokenId tokenId) {
1088        if(LOG) {
1089            checkToken(tokenId);
1090        }
1091        return tokenFactory.createToken(tokenId);
1092    }
1093    
1094    private Token<JspTokenId> scriptletToken(JspTokenId tokenId, int javaCodeType) {
1095        if(LOG) {
1096            checkToken(tokenId);
1097        }
1098        JspTokenId.JavaCodeType scriptletType;
1099        switch(javaCodeType) {
1100            case JAVA_SCRITPLET:
1101            case JAVA_SCRITPLET_DOCUMENT:
1102                scriptletType = JspTokenId.JavaCodeType.SCRIPTLET;
1103                break;
1104            case JAVA_DECLARATION:
1105            case JAVA_DECLARATION_DOCUMENT:
1106                scriptletType = JspTokenId.JavaCodeType.DECLARATION;
1107                break;
1108            case JAVA_EXPRESSION:
1109            case JAVA_EXPRESSION_DOCUMENT:
1110                scriptletType = JspTokenId.JavaCodeType.EXPRESSION;
1111                break;
1112            default:
1113                throw new IllegalStateException JavaDoc("Unsupported scriptlet type " + lexerStateJspScriptlet);
1114        }
1115        
1116        return tokenFactory.createPropertyToken(tokenId, input.readLength(),
1117                new JspTokenPropertyProvider(scriptletType), PartType.COMPLETE);
1118    }
1119    
1120    private void checkToken(JspTokenId tokenId) {
1121            if(input.readLength() == 0) {
1122                LOGGER.log(Level.INFO, "Found zero length token: ");
1123            }
1124            LOGGER.log(Level.INFO, "[" + this.getClass().getSimpleName() + "] token ('" + input.readText().toString() + "'; id=" + tokenId + "; state=" + state() + ")\n");
1125    }
1126    
1127    public void release() {
1128    }
1129    
1130    private static class JspTokenPropertyProvider implements TokenPropertyProvider {
1131        
1132        private final JspTokenId.JavaCodeType scriptletType;
1133        
1134        JspTokenPropertyProvider(JspTokenId.JavaCodeType scriptletType) {
1135            this.scriptletType = scriptletType;
1136        }
1137
1138        public Object JavaDoc getValue(Token token, Object JavaDoc key) {
1139            if (JspTokenId.SCRIPTLET_TOKEN_TYPE_PROPERTY.equals(key))
1140                return scriptletType;
1141            return null;
1142        }
1143
1144    }
1145    
1146}
1147
1148
Popular Tags