JspLexer


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  package org.netbeans.lib.jsp.lexer;
21  
22  import java.util.logging.Level  ;
23  import java.util.logging.Logger  ;
24  import org.netbeans.api.jsp.lexer.JspTokenId;
25  import org.netbeans.api.lexer.InputAttributes;
26  import org.netbeans.api.lexer.LanguagePath;
27  import org.netbeans.api.lexer.PartType;
28  import org.netbeans.api.lexer.Token;
29  import org.netbeans.spi.jsp.lexer.JspParseData;
30  import org.netbeans.spi.lexer.Lexer;
31  import org.netbeans.spi.lexer.LexerInput;
32  import org.netbeans.spi.lexer.LexerRestartInfo;
33  import org.netbeans.spi.lexer.TokenFactory;
34  import org.netbeans.spi.lexer.TokenPropertyProvider;
35  
36  /**
37   * Syntax class for JSP tags.
38   *
39   * @author Petr Jiricka
40   * @author Marek Fukala
41   *
42   * @version 1.00
43   */
44  
45  public class JspLexer implements Lexer<JspTokenId> {
46      
47      private static final Logger   LOGGER = Logger.getLogger(JspLexer.class.getName());
48      private static final boolean LOG = Boolean.getBoolean("j2ee_lexer_debug"); //NOI18N
49      
50      
51      private static final int EOF = LexerInput.EOF;
52      
53      private final LexerInput input;
54      
55      private final InputAttributes inputAttributes;
56      private final JspParseData jspParseData;
57      
58      private final TokenFactory<JspTokenId> tokenFactory;
59      
60      public Object   state() {
61          return lexerState + lexerStateBeforeEL * 1000 + lexerStateJspScriptlet * 1000000;
62      }
63      
64      //main internal lexer state
65      private int lexerState = INIT;
66      
67      //secondary internal state for EL expressions in JSP
68      //is it used to eliminate a number of lexer states when EL is found -
69      //we have 8 states just in attribute value so I would have to copy the EL
70      //recognition code eight-times.
71      private int lexerStateBeforeEL = INIT;
72      
73      //internal state signalling whether the lexer is in <jsp:scriptlet> tag
74      private int lexerStateJspScriptlet = INIT;
75      
76      // Internal analyzer states
77      // general
78      private static final int INIT                =  0;  // initial lexer state = content language
79      private static final int ISI_ERROR           =  1; // when the fragment does not start with <
80      private static final int ISA_LT              =  2; // after '<' char
81      // tags and directives
82      private static final int ISI_TAGNAME         =  3; // inside JSP tag name
83      private static final int ISI_DIRNAME         =  4; // inside JSP directive name
84      private static final int ISP_TAG             =  5; // after JSP tag name
85      private static final int ISP_DIR             =  6; // after JSP directive name
86      private static final int ISI_TAG_I_WS        =  7; // inside JSP tag after whitespace
87      private static final int ISI_DIR_I_WS        =  8; // inside JSP directive after whitespace
88      private static final int ISI_ENDTAG          =  9; // inside end JSP tag
89      private static final int ISI_TAG_ATTR        = 10; // inside tag attribute
90      private static final int ISI_DIR_ATTR        = 11; // inside directive attribute
91      private static final int ISP_TAG_EQ          = 12; // just after '=' in tag
92      private static final int ISP_DIR_EQ          = 13; // just after '=' in directive
93      private static final int ISI_TAG_STRING      = 14; // inside string (value - "") in tag
94      private static final int ISI_DIR_STRING      = 15; // inside string (value - "") in directive
95      private static final int ISI_TAG_STRING_B    = 16; // inside string (value - "") after backslash in tag
96      private static final int ISI_DIR_STRING_B    = 17; // inside string (value - "") after backslash in directive
97      private static final int ISI_TAG_STRING2     = 18; // inside string (value - '') in tag
98      private static final int ISI_DIR_STRING2     = 19; // inside string (value - '') in directive
99      private static final int ISI_TAG_STRING2_B   = 20; // inside string (value - '') after backslash in tag
100     private static final int ISI_DIR_STRING2_B   = 21; // inside string (value - '') after backslash in directive
101     private static final int ISA_ENDSLASH        = 22; // after ending '/' in JSP tag
102     private static final int ISA_ENDPC           = 23; // after ending '%' in JSP directive
103     // comments (+directives)
104     private static final int ISA_LT_PC           = 24; // after '<%' - comment or directive or scriptlet
105     private static final int ISI_JSP_COMMENT     = 25; // after <%-
106     
107     private static final int ISI_JSP_COMMENT_M   = 26; // inside JSP comment after -
108     private static final int ISI_JSP_COMMENT_MM  = 27; // inside JSP comment after --
109     private static final int ISI_JSP_COMMENT_MMP = 28; // inside JSP comment after --%
110     // end state
111     //    static final int ISA_END_JSP                 = 29; // JSP fragment has finished and control
112     // should be returned to master syntax
113     // more errors
114     private static final int ISI_TAG_ERROR       = 30; // error in tag, can be cleared by > or \n
115     private static final int ISI_DIR_ERROR       = 31; // error in directive, can be cleared by %>, \n, \t or space
116     private static final int ISI_DIR_ERROR_P     = 32; // error in directive after %, can be cleared by > or \n
117     
118     private static final int ISA_LT_PC_AT        = 33; // after '<%@' (directive)
119     private static final int ISA_LT_SLASH        = 34; // after '</' sequence
120     private static final int ISA_LT_PC_DASH      = 35; // after <%- ;not comment yet
121     
122     private static final int ISI_SCRIPTLET       = 36; // inside java scriptlet/declaration/expression
123     private static final int ISP_SCRIPTLET_PC   = 37; // just after % in scriptlet
124 
125     //expression language
126     
127     //EL in content language
128     private static final int ISA_EL_DELIM        = 38; //after $ or # in content language
129     private static final int ISI_EL              = 39; //expression language in content (after ${ or #{ )
130     
131     private static final int ISA_BS             = 40; //after backslash in text - needed to disable EL by scaping # or $
132     
133     //scriptlet substate states
134     //in standart syntax jsp
135     private static final int JAVA_SCRITPLET = 1; //java scriptlet
136     private static final int JAVA_DECLARATION = 2; //java declaration
137     private static final int JAVA_EXPRESSION = 3; //java expression
138     //in xml syntax jsp (jsp document)
139     private static final int JAVA_SCRITPLET_DOCUMENT = 4; //java scriptlet in JSP document
140     private static final int JAVA_DECLARATION_DOCUMENT = 5; //java declaration in JSP document
141     private static final int JAVA_EXPRESSION_DOCUMENT = 6; //java expression in JSP document
142     
143    
144     public JspLexer(LexerRestartInfo<JspTokenId> info) {
145         this.input = info.input();
146         this.inputAttributes = info.inputAttributes();
147         this.tokenFactory = info.tokenFactory();
148         if (info.state() == null) {
149             lexerState = INIT;
150             lexerStateBeforeEL = INIT;
151             lexerStateJspScriptlet = INIT;
152         } else {
153             int encoded = ((Integer  ) info.state()).intValue();
154             lexerStateJspScriptlet = encoded / 1000000;
155             int reminder = encoded % 1000000;
156             lexerStateBeforeEL = reminder / 1000;
157             lexerState = encoded % 1000;
158         }
159         if(inputAttributes != null) {
160             jspParseData = (JspParseData)inputAttributes.getValue(LanguagePath.get(JspTokenId.language()), JspParseData.class);
161         } else {
162             jspParseData = null;
163         }
164     }
165     
166     public boolean isIdentifierPart(char character) {
167         return Character.isJavaIdentifierPart(character);
168     }
169     
170     /** Determines whether a given string is a JSP tag. */
171     private boolean isJspTag(String   tagName) {
172         if(tagName.startsWith("jsp:")) { // NOI18N
173             return true;
174         }
175         
176         //TODO handle custom tags from JSP parser here
177         if(jspParseData != null) {
178             int colonIndex = tagName.indexOf(':');//NOI18N
179             if(colonIndex != -1) {
180                 String   prefix = tagName.substring(0, colonIndex);
181                 return jspParseData.isTagLibRegistered(prefix);
182             }
183         }
184         
185         return false;
186     }
187     
188     private boolean isELIgnored() {
189         return jspParseData == null ? false : jspParseData.isELIgnored();
190     }
191     
192     private boolean isXMLSyntax() {
193         return jspParseData == null ? false: jspParseData.isXMLSyntax();
194     }
195     
196     private String   getPossibleTagName() {
197         int actChar;
198         int prev_read = input.readLength(); //remember the size of the read sequence
199         int read = 0;
200         while(true) {
201             actChar = input.read();
202             read++;
203             if(!(Character.isLetter(actChar) ||
204                     Character.isDigit(actChar) ||
205                     (actChar == '_') ||
206                     (actChar == '-') ||
207                     (actChar == ':') ||
208                     (actChar == '.') ||
209                     (actChar == '/')) ||
210                     (actChar == EOF)) { // EOL or not alpha
211                 //end of tagname
212                 String   tagName = input.readText().toString().substring(prev_read, prev_read + read - 1);
213                 input.backup(read); //put the lookahead text back to the buffer
214                 return tagName;
215             }
216         }
217     }
218     
219     /** Looks ahead into the character buffer and checks if a jsp tag name follows. */
220     private boolean followsJspTag() {
221         return isJspTag(getPossibleTagName());
222     }
223     
224     public Token<JspTokenId> nextToken() {
225         int actChar;
226         while (true) {
227             actChar = input.read();
228             
229             if (actChar == EOF) {
230                 if(input.readLengthEOF() == 1) {
231                     return null; //just EOL is read
232                 } else {
233                     //there is something else in the buffer except EOL
234                     //we will return last token now
235                     input.backup(1); //backup the EOL, we will return null in next nextToken() call
236                     break;
237                 }
238             }
239             
240             switch (lexerState) {
241                 case INIT:
242                     switch (actChar) {
243                         //                        case '\n':
244                         //                            return token(JspTokenId.EOL);
245                         case '<':
246                             lexerState = ISA_LT;
247                             break;
248                             //                        default:
249                             //                            state = ISI_ERROR;
250                             //                            break;
251                         case '\\':
252                             lexerState = ISA_BS;
253                             break;
254                         case '$':
255                         case '#': //maybe expression language
256                             lexerStateBeforeEL = lexerState; //remember main state
257                             lexerState = ISA_EL_DELIM;
258                             break;
259                     }
260                     break;
261                     
262                 case ISA_BS:
263                     if(actChar != '\\') {
264                         lexerState = INIT; //prevent scaped EL in text being recognized
265                     }
266                     break;
267                     
268                 case ISA_EL_DELIM:
269                     if(isELIgnored()) {
270                         //reset to previous state - do not recognize EL
271                         lexerState = lexerStateBeforeEL;
272                         lexerStateBeforeEL = INIT;
273                     } else {
274                         switch(actChar) {
275                             case '{':
276                                 if(input.readLength() > 2) {
277                                     //we have something read except the '${' or '#{' => it's content language
278                                     input.backup(2); //backup the '$/#{'
279                                     lexerState = lexerStateBeforeEL; //we will read the '$/#{' again
280                                     lexerStateBeforeEL = INIT;
281                                     return token(JspTokenId.TEXT); //return the content language token
282                                 }
283                                 lexerState = ISI_EL;
284                                 break;
285                             default:
286                                 lexerState = lexerStateBeforeEL;
287                                 lexerStateBeforeEL = INIT;
288                         }
289                     }
290                     break;
291                     
292                 case ISI_EL:
293                     if(actChar == '}') {
294                         //return EL token
295                         lexerState = lexerStateBeforeEL;
296                         lexerStateBeforeEL = INIT;
297                         return token(JspTokenId.EL);
298                     }
299                     //stay in EL
300                     break;
301                     
302                 case ISA_LT:
303                     if (Character.isLetter(actChar) ||
304                             (actChar == '_')
305                             ) { // possible tag begining
306                         input.backup(1); //backup the read letter
307                         String   tagName = getPossibleTagName();
308                         if(isJspTag(tagName)) { //test if a jsp tag follows
309                             if(input.readLength() > 1) {
310                                 //we have something read except the '<' => it's content language
311                                 input.backup(1); //backup the '<'
312                                 lexerState = INIT; //we will read the '<' again
313                                 return token(JspTokenId.TEXT); //return the content language token
314                             }
315                             //possibly switch to scriptlet when <jsp:scriptlet> found
316                             
317                             if("jsp:scriptlet".equals(tagName)) { //NOI18N
318                                 lexerStateJspScriptlet = JAVA_SCRITPLET_DOCUMENT;
319                             } else if("jsp:declaration".equals(tagName)) { //NOI18N
320                                 lexerStateJspScriptlet = JAVA_DECLARATION_DOCUMENT;
321                             } else if("jsp:expression".equals(tagName)) { //NOI18N
322                                 lexerStateJspScriptlet = JAVA_EXPRESSION_DOCUMENT;
323                             }
324                             
325                             lexerState = ISI_TAGNAME;
326                             break;
327                         } else {
328                             //just a content language
329                             lexerState = INIT;
330                             break;
331                         }
332                         //                        input.backup(1);
333                         //                        return token(JspTokenId.SYMBOL);
334                     }
335                     
336                     switch (actChar) {
337                         case '/':
338                             lexerState = ISA_LT_SLASH;
339                             break;
340                             //                        case '\n':
341                             //                            state = ISI_TAG_ERROR;
342                             //                            input.backup(1);
343                             //                            return token(JspTokenId.SYMBOL);
344                         case '%':
345                             lexerState = ISA_LT_PC;
346                             break;
347                         default:
348                             lexerState = INIT; //just content
349                             //                            state = ISI_TAG_ERROR;
350                             //                            break;
351                     }
352                     break;
353                     
354                 case ISA_LT_SLASH:
355                     if (Character.isLetter(actChar) ||
356                             (actChar == '_')) {
357                         //possible end tag beginning
358                         input.backup(1); //backup the first letter
359                         if(followsJspTag()) {
360                             if(input.readLength() > 2) {
361                                 //we have something read except the '</' symbol
362                                 input.backup(2);
363                                 lexerState = INIT;
364                                 return token(JspTokenId.TEXT);
365                             } else {
366                                 lexerState = ISI_ENDTAG;
367                             }
368                             break;
369                         } else {
370                             //just a content language
371                             lexerState = INIT;
372                             break;
373                         }
374                     }
375                     
376                     //not jsp end tag -> just content -> switch to init state
377                     lexerState = INIT;
378                     break;
379                     
380                 case ISI_TAGNAME:
381                 case ISI_DIRNAME:
382                     
383                     if (!(Character.isLetter(actChar) ||
384                             Character.isDigit(actChar) ||
385                             (actChar == '_') ||
386                             (actChar == '-') ||
387                             (actChar == ':') ||
388                             (actChar == '.'))) { // not alpha
389                         switch(actChar) {
390                             case '<':
391                                 lexerState = INIT;
392                                 input.backup(1);
393                                 break;
394                             case '/':
395                                 input.backup(1);
396                                 lexerState = ((lexerState == ISI_TAGNAME) ? ISP_TAG : ISP_DIR);
397                                 break;
398                             case '>':
399                                 if(lexerStateJspScriptlet != INIT) {
400                                     //switch to java scriptlet
401                                     lexerState = ISI_SCRIPTLET;
402                                 } else {
403                                     lexerState = INIT;
404                                 }
405                                 break;
406                             case ' ':
407                                 input.backup(1);
408                                 lexerState = ((lexerState == ISI_TAGNAME) ? ISP_TAG : ISP_DIR);
409                                 break;
410                             default:
411                                 lexerState = ((lexerState == ISI_TAGNAME) ? ISP_TAG : ISP_DIR);
412                         }
413                         return token(JspTokenId.TAG);
414                     }
415                     break;
416                     
417                 case ISP_TAG:
418                 case ISP_DIR:
419                     if (Character.isLetter(actChar) ||
420                             (actChar == '_')
421                             ) {
422                         lexerState = ((lexerState == ISP_TAG) ? ISI_TAG_ATTR : ISI_DIR_ATTR);
423                         break;
424                     }
425                     switch (actChar) {
426                         case '\n':
427                             //                            if (input.readLength() == 1) { // no char
428                             return token(JspTokenId.EOL);
429                             //                            } else { // return string first
430                             //                                input.backup(1);
431                             //                                return decide_jsp_tag_token();
432                             //                            }
433                         case '>': // for tags
434                             if (lexerState == ISP_TAG) {
435                                 //                                if (input.readLength() == 1) {  // no char
436                                 //                                    state = ISA_END_JSP;
437                                 lexerState = INIT;
438                                 return token(JspTokenId.SYMBOL);
439                                 //                                } else { // return string first
440                                 //                                    input.backup(1);
441                                 //                                    return decide_jsp_tag_token();
442                                 //                                }
443                             } else { // directive
444                                 lexerState = ISI_DIR_ERROR;
445                                 break;
446                             }
447                         case '/': // for tags
448                             if (lexerState == ISP_TAG) {
449                                 //                                if (input.readLength() == 1) {  // no char
450                                 lexerState = ISA_ENDSLASH;
451                                 break;
452                                 //                                } else { // return string first
453                                 //                                    input.backup(1);
454                                 //                                    return decide_jsp_tag_token();
455                                 //                                }
456                             } else { // directive
457                                 lexerState = ISI_DIR_ERROR;
458                                 break;
459                             }
460                         case '%': // for directives
461                             if (lexerState == ISP_DIR) {
462                                 //                                if (input.readLength() == 1) {  // no char
463                                 lexerState = ISA_ENDPC;
464                                 break;
465                                 //                                } else { // return string first
466                                 //                                    input.backup(1);
467                                 //                                    return decide_jsp_tag_token();
468                                 //                                }
469                             } else { // tag
470                                 lexerState = ISI_TAG_ERROR;
471                                 break;
472                             }
473                         case '=':
474                             lexerState = ((lexerState == ISP_TAG) ? ISP_TAG_EQ : ISP_DIR_EQ);
475                             return token(JspTokenId.SYMBOL);
476                         case ' ':
477                         case '\t':
478                             lexerState = ((lexerState == ISP_TAG) ? ISI_TAG_I_WS : ISI_DIR_I_WS);
479                             break;
480                         case '<':
481                             // assume that this is the start of the next tag
482                             //we shouldn't have anything else than then the < char in buffer
483                             assert input.readLength() == 1 : "There is something more than '<' char in the read text: '" + input.readText() + "'"; //NOI18N
484                             input.backup(1);
485                             lexerState = INIT;
486                         default: //numbers or illegal symbols
487                             lexerState = ((lexerState == ISP_TAG) ? ISI_TAG_ERROR : ISI_DIR_ERROR);
488                             break;
489                     }
490                     break;
491                     
492                 case ISI_TAG_I_WS:
493                 case ISI_DIR_I_WS:
494                     switch (actChar) {
495                         case ' ':
496                         case '\t':
497                             break;
498                         case '<': //start of the next tag
499                             //                            state = ISA_END_JSP;
500                             lexerState = INIT;
501                             input.backup(1);
502                             return token(JspTokenId.TAG);
503                         default:
504                             lexerState = ((lexerState == ISI_TAG_I_WS) ? ISP_TAG : ISP_DIR);
505                             input.backup(1);
506                             return token(JspTokenId.WHITESPACE);
507                     }
508                     break;
509                     
510                 case ISI_ENDTAG:
511                     if (!(Character.isLetter(actChar) ||
512                             Character.isDigit(actChar) ||
513                             (actChar == '_') ||
514                             (actChar == '-') ||
515                             (actChar == ':'))
516                             ) { // not alpha
517                         lexerState = ISP_TAG;
518                         input.backup(1);
519                         return token(JspTokenId.TAG);
520                     }
521                     break;
522                     
523                 case ISI_TAG_ATTR:
524                 case ISI_DIR_ATTR:
525                     if (!(Character.isLetter(actChar) ||
526                             Character.isDigit(actChar) ||
527                             (actChar == '_') ||
528                             (actChar == ':') ||
529                             (actChar == '-'))
530                             ) { // not alpha or '-' (http-equiv)
531                         lexerState = ((lexerState == ISI_TAG_ATTR) ? ISP_TAG : ISP_DIR);
532                         input.backup(1);
533                         return token(JspTokenId.ATTRIBUTE);
534                     }
535                     break;
536                     
537                 case ISP_TAG_EQ:
538                 case ISP_DIR_EQ:
539                     switch (actChar) {
540                         case '\n':
541                             //                            if (input.readLength() == 1) { // no char
542                             return token(JspTokenId.EOL);
543                             //                            } else { // return string first
544                             //                                input.backup(1);
545                             //                                return token(JspTokenId.ATTR_VALUE);
546                             //                            }
547                         case '"':
548                             lexerState = ((lexerState == ISP_TAG_EQ) ? ISI_TAG_STRING : ISI_DIR_STRING);
549                             break;
550                         case '\'':
551                             lexerState = ((lexerState == ISP_TAG_EQ) ? ISI_TAG_STRING2 : ISI_DIR_STRING2);
552                             break;
553                         case ' ':
554                         case '\t':
555                             // don't change the state
556                             break;
557                         default:
558                             //invalid value - lets backup it and swith to tag content
559                             lexerState = ((lexerState == ISP_TAG_EQ) ? ISP_TAG : ISP_DIR);
560                             input.backup(input.readLength());
561                             break;
562                     }
563                     break;
564                     
565                 case ISI_TAG_STRING:
566                 case ISI_DIR_STRING:
567                 case ISI_TAG_STRING2:
568                 case ISI_DIR_STRING2:
569                     if ((actChar == '"') && ((lexerState == ISI_TAG_STRING) || (lexerState == ISI_DIR_STRING))) {
570                         lexerState = ((lexerState == ISI_TAG_STRING) ? ISP_TAG : ISP_DIR);
571                         return token(JspTokenId.ATTR_VALUE);
572                     }
573                     
574                     if ((actChar == '\'') && ((lexerState == ISI_TAG_STRING2) || (lexerState == ISI_DIR_STRING2))) {
575                         lexerState = ((lexerState == ISI_TAG_STRING2) ? ISP_TAG : ISP_DIR);
576                         return token(JspTokenId.ATTR_VALUE);
577                     }
578                     
579                     switch (actChar) {
580                         case '\\':
581                             switch (lexerState) {
582                                 case ISI_TAG_STRING:
583                                     lexerState = ISI_TAG_STRING_B;
584                                     break;
585                                 case ISI_DIR_STRING:
586                                     lexerState = ISI_DIR_STRING_B;
587                                     break;
588                                 case ISI_TAG_STRING2:
589                                     lexerState = ISI_TAG_STRING2_B;
590                                     break;
591                                 case ISI_DIR_STRING2:
592                                     lexerState = ISI_DIR_STRING2_B;
593                                     break;
594                             }
595                             break;
596                         case '\n':
597                             if (input.readLength() == 1) { // no char
598                                 return token(JspTokenId.EOL);
599                             } else { // return string first
600                                 input.backup(1);
601                                 return token(JspTokenId.ATTR_VALUE);
602                             }
603                         case '$':
604                         case '#':
605                             if(input.readLength() > 1) {
606                                 //return part of the attribute value before EL
607                                 input.backup(1); //backup $ or #
608                                 return token(JspTokenId.ATTR_VALUE);
609                             } else {
610                                 lexerStateBeforeEL = lexerState; //remember main state
611                                 lexerState = ISA_EL_DELIM;
612                             }
613                             break;
614                             
615                         default:
616                             break;//stay in ISI_TAG_STRING/2;
617                             
618                     }
619                     break;
620                     
621                 case ISI_TAG_STRING_B:
622                 case ISI_DIR_STRING_B:
623                 case ISI_TAG_STRING2_B:
624                 case ISI_DIR_STRING2_B:
625                     switch (actChar) {
626                         case '"':
627                         case '\'':
628                         case '\\':
629                         case '$':
630                         case '#':
631                             break;
632                         default:
633                             input.backup(1);
634                             break;
635                     }
636                     switch (lexerState) {
637                         case ISI_TAG_STRING_B:
638                             lexerState = ISI_TAG_STRING;
639                             break;
640                         case ISI_DIR_STRING_B:
641                             lexerState = ISI_DIR_STRING;
642                             break;
643                         case ISI_TAG_STRING2_B:
644                             lexerState = ISI_TAG_STRING2;
645                             break;
646                         case ISI_DIR_STRING2_B:
647                             lexerState = ISI_DIR_STRING2;
648                             break;
649                     }
650                     break;
651                     
652                 case ISA_ENDSLASH:
653                     switch (actChar) {
654                         case '>':
655                             //                            state = ISA_END_JSP;
656                             lexerState = INIT;
657                             return token(JspTokenId.SYMBOL);
658                         case '\n':
659                             lexerState = ISI_TAG_ERROR;
660                             input.backup(1);
661                             return token(JspTokenId.SYMBOL);
662                         default:
663                             lexerState = ISP_TAG;
664                             input.backup(1);
665                             return token(JspTokenId.SYMBOL);
666                     }
667                     //break; not reached
668                     
669                 case ISA_ENDPC:
670                     switch (actChar) {
671                         case '>':
672                             //                            state = ISA_END_JSP;
673                             lexerState = INIT;
674                             return token(JspTokenId.SYMBOL);
675                         case '\n':
676                             lexerState = ISI_DIR_ERROR;
677                             input.backup(1);
678                             return token(JspTokenId.SYMBOL);
679                         default:
680                             lexerState = ISP_DIR;
681                             input.backup(1);
682                             return token(JspTokenId.SYMBOL);
683                     }
684                     //break; not reached
685                     
686                 case ISA_LT_PC:
687                     switch (actChar) {
688                         case '@':
689                             if(input.readLength() == 3) {
690                                 // just <%@ read
691                                 lexerState = ISA_LT_PC_AT;
692                                 return token(JspTokenId.SYMBOL);
693                             } else {
694                                 //jsp symbol, but we also have content language in the buffer
695                                 input.backup(3); //backup <%@
696                                 lexerState = INIT;
697                                 return token(JspTokenId.TEXT); //return CL token
698                             }
699                         case '-': //may be JSP comment
700                             lexerState = ISA_LT_PC_DASH;
701                             break;
702                         case '!': // java declaration
703                         case '=': // java expression
704                             if(input.readLength() == 3) {
705                                 // just <%! or <%= read
706                                 lexerStateJspScriptlet = actChar == '!' ? JAVA_DECLARATION : JAVA_EXPRESSION;
707                                 lexerState = ISI_SCRIPTLET;
708                                 return token(JspTokenId.SYMBOL2);
709                             } else {
710                                 //jsp symbol, but we also have content language in the buffer
711                                 input.backup(3); //backup <%! or <%=
712                                 lexerState = INIT;
713                                 return token(JspTokenId.TEXT); //return CL token
714                             }
715                         default:  //java scriptlet delimiter '<%'
716                             if(input.readLength() == 3) {
717                                 // just <% + something != [-,!,=,@] read
718                                 lexerStateJspScriptlet = JAVA_SCRITPLET;
719                                 lexerState = ISI_SCRIPTLET;
720                                 input.backup(1); //backup the third character, it is a part of the java scriptlet
721                                 return token(JspTokenId.SYMBOL2);
722                             } else {
723                                 //jsp symbol, but we also have content language in the buffer
724                                 input.backup(3); //backup <%@
725                                 lexerState = INIT;
726                                 return token(JspTokenId.TEXT); //return CL token
727                             }
728                     }
729                     break;
730                     
731                 case ISI_SCRIPTLET:
732                     switch(actChar) {
733                         case '%':
734                             lexerState = ISP_SCRIPTLET_PC;
735                             break;
736                         case '<':
737                             //may be end of scriptlet section in JSP document
738                             String   tagName = getPossibleTagName();
739                             if("/jsp:scriptlet".equals(tagName) || //NOI18N
740                                     "/jsp:declaration".equals(tagName) || //NOI18N
741                                     "/jsp:expression".equals(tagName)) { //NOI18N
742                                 if(input.readLength() == 1) {
743                                     //just the '<' symbol read
744                                     input.backup(1);
745                                     lexerState = INIT;
746                                 } else {
747                                     //return the scriptlet content
748                                     input.backup(1); // backup '<' we will read it again
749                                     int lxs = lexerStateJspScriptlet;
750                                     lexerStateJspScriptlet = INIT;
751                                     return scriptletToken(JspTokenId.SCRIPTLET, lxs);
752                                 }
753                             }
754                     }
755                     break;
756                     
757                 case ISP_SCRIPTLET_PC:
758                     switch(actChar) {
759                         case '>':
760                             if(input.readLength() == 2) {
761                                 //just the '%>' symbol read
762                                 lexerState = INIT;
763                                 return token(JspTokenId.SYMBOL2);
764                             } else {
765                                 //return the scriptlet content
766                                 input.backup(2); // backup '%>' we will read JUST them again
767                                 lexerState = ISI_SCRIPTLET;
768                                 int lxs = lexerStateJspScriptlet;
769                                 lexerStateJspScriptlet = INIT;
770                                 return scriptletToken(JspTokenId.SCRIPTLET, lxs);
771                             }
772                         default:
773                             lexerState = ISI_SCRIPTLET;
774                             break;
775                     }
776                     break;
777                     
778                 case ISA_LT_PC_DASH:
779                     switch(actChar) {
780                         case '-':
781                             if(input.readLength() == 4) {
782                                 //just the '<%--' symbol read
783                                 lexerState = ISI_JSP_COMMENT;
784                             } else {
785                                 //return the scriptlet content
786                                 input.backup(4); // backup '<%--', we will read it again
787                                 lexerState = INIT;
788                                 return token(JspTokenId.TEXT);
789                             }
790                             break;
791                         default:
792                             //                            state = ISA_END_JSP;
793                             lexerState = INIT; //XXX how to handle content language?
794                             return token(JspTokenId.TEXT); //marek: should I token here????
795                     }
796                     
797                     // JSP states
798                 case ISI_JSP_COMMENT:
799                     switch (actChar) {
800                         case '\n':
801                             if (input.readLength() == 1) { // no char
802                                 return token(JspTokenId.EOL);
803                             } else { // return block comment first
804                                 input.backup(1);
805                                 return token(JspTokenId.COMMENT);
806                             }
807                         case '-':
808                             lexerState = ISI_JSP_COMMENT_M;
809                             break;
810                     }
811                     break;
812                     
813                 case ISI_JSP_COMMENT_M:
814                     switch (actChar) {
815                         case '\n':
816                             lexerState = ISI_JSP_COMMENT;
817                             if (input.readLength() == 1) { // no char
818                                 return token(JspTokenId.EOL);
819                             } else { // return block comment first
820                                 input.backup(1);
821                                 return token(JspTokenId.COMMENT);
822                             }
823                         case '-':
824                             lexerState = ISI_JSP_COMMENT_MM;
825                             break;
826                         default:
827                             lexerState = ISI_JSP_COMMENT;
828                             break;
829                     }
830                     break;
831                     
832                 case ISI_JSP_COMMENT_MM:
833                     switch (actChar) {
834                         case '\n':
835                             lexerState = ISI_JSP_COMMENT;
836                             if (input.readLength() == 1) { // no char
837                                 return token(JspTokenId.EOL);
838                             } else { // return block comment first
839                                 input.backup(1);
840                                 return token(JspTokenId.COMMENT);
841                             }
842                         case '%':
843                             lexerState = ISI_JSP_COMMENT_MMP;
844                             break;
845                         case '-':
846                             lexerState = ISI_JSP_COMMENT_MM;
847                             break;
848                         default:
849                             lexerState = ISI_JSP_COMMENT;
850                             break;
851                     }
852                     break;
853                     
854                 case ISI_JSP_COMMENT_MMP:
855                     switch (actChar) {
856                         case '\n':
857                             lexerState = ISI_JSP_COMMENT;
858                             if (input.readLength() == 1) { // no char
859                                 return token(JspTokenId.EOL);
860                             } else { // return block comment first
861                                 input.backup(1);
862                                 return token(JspTokenId.COMMENT);
863                             }
864                         case '>':
865                             //                            state = ISA_END_JSP;
866                             lexerState = INIT;
867                             return token(JspTokenId.COMMENT);
868                         default:
869                             lexerState = ISI_JSP_COMMENT;
870                             break;
871                     }
872                     break;
873                     
874                 case ISI_ERROR:
875                     switch (actChar) {
876                         case '\n':
877                             lexerState = INIT;
878                             input.backup(1);
879                             return token(JspTokenId.ERROR);
880                         case '<':
881                             lexerState = ISA_LT;
882                             input.backup(1);
883                             return token(JspTokenId.ERROR);
884                     }
885                     break;
886                     
887                 case ISI_TAG_ERROR:
888                     switch (actChar) {
889                         case '\n':
890                             if (input.readLength() == 1) { // no char
891                                 lexerState = ISP_TAG;
892                                 return token(JspTokenId.EOL);
893                             } else { // return error first
894                                 input.backup(1);
895                                 return token(JspTokenId.ERROR);
896                             }
897                         case '>':
898                         case ' ':
899                         case '\t':
900                             lexerState = ISP_TAG;
901                             input.backup(1);
902                             return token(JspTokenId.ERROR);
903                         default:
904                             break;
905                     }
906                     break;
907                     
908                 case ISI_DIR_ERROR:
909                     switch (actChar) {
910                         case '\n':
911                             if (input.readLength() == 1) { // no char
912                                 lexerState = ISP_DIR;
913                                 return token(JspTokenId.EOL);
914                             } else { // return error first
915                                 input.backup(1);
916                                 return token(JspTokenId.ERROR);
917                             }
918                             //                        case '%':
919                         case '\t':
920                         case ' ':
921                             lexerState = ISP_DIR;
922                             if(input.readLength() > 1) {
923                                 input.backup(1);
924                                 return token(JspTokenId.ERROR);
925                             }
926                         default:
927                             break;
928                     }
929                     break;
930                     
931                 case ISI_DIR_ERROR_P:
932                     switch (actChar) {
933                         case '\n':
934                             if (input.readLength() == 1) { // no char
935                                 lexerState = ISI_DIR_I_WS;
936                                 return token(JspTokenId.EOL);
937                             } else { // return error first
938                                 input.backup(1);
939                                 return token(JspTokenId.ERROR);
940                             }
941                         case '>':
942                             input.backup(2);
943                             lexerState = ISI_DIR_I_WS;
944                             return token(JspTokenId.ERROR);
945                         default:
946                             break;
947                     }
948                     break;
949                     
950                     //                case ISA_END_JSP:
951                     //                    if (input.readLength() == 1) {
952                     //                        offset++;
953                     //                        return JspTokenId.AFTER_UNEXPECTED_LT;
954                     //                    }
955                     //                    else {
956                     //                        return JspTokenId.TEXT;
957                     //                    }
958                     //                    //break;
959                     
960                     // added states
961                 case ISA_LT_PC_AT:
962                     if (Character.isLetter(actChar) ||
963                             (actChar == '_')
964                             ) {
965                         // the directive starts
966                         lexerState = ISI_DIRNAME;
967                         break;
968                     }
969                     
970                     switch (actChar) {
971                         case '\n':
972                             if (input.readLength() == 1) { // no char
973                                 return token(JspTokenId.EOL);
974                             } else {
975                                 input.backup(1);
976                                 return token(JspTokenId.SYMBOL);
977                             }
978                         case ' ':
979                         case '\t':
980                             break;
981                         default:
982                             //error
983                             lexerState = ISI_DIR_ERROR;
984                             if(input.readLength() > 1) {
985                                 input.backup(1); //backup the error char if there is something more in the buffer
986                                 return token(JspTokenId.SYMBOL);
987                             }
988                             break;
989                     }
990                     break;
991                     
992             }
993             
994         }
995         
996         // At this stage there's no more text in the scanned buffer.
997         // Scanner first checks whether this is completely the last
998         // available buffer.
999         
1000        switch(lexerState) {
1001            case INIT:
1002            case ISA_LT:
1003            case ISA_LT_SLASH:
1004                if (input.readLength() == 0) {
1005                    return null;
1006                } else {
1007                    return token(JspTokenId.TEXT);
1008                }
1009            case ISI_ERROR:
1010            case ISI_TAG_ERROR:
1011                lexerState = INIT;
1012                return token(JspTokenId.ERROR);
1013            case ISI_DIR_ERROR:
1014            case ISI_DIR_ERROR_P:
1015                lexerState = INIT;
1016                return token(JspTokenId.ERROR);
1017            case ISA_ENDSLASH:
1018            case ISP_TAG_EQ:
1019                lexerState = INIT;
1020                return token(JspTokenId.SYMBOL);
1021            case ISA_LT_PC:
1022            case ISA_LT_PC_DASH:
1023            case ISA_ENDPC:
1024            case ISP_DIR_EQ:
1025                lexerState = INIT;
1026                return token(JspTokenId.SYMBOL);
1027            case ISI_TAGNAME:
1028            case ISI_ENDTAG:
1029                lexerState = INIT;
1030                return token(JspTokenId.TAG);
1031            case ISI_DIRNAME:
1032                lexerState = INIT;
1033                return token(JspTokenId.TAG);
1034            case ISP_TAG:
1035            case ISI_TAG_I_WS:
1036                lexerState = INIT;
1037                return token(JspTokenId.TAG);
1038            case ISP_DIR:
1039            case ISI_DIR_I_WS:
1040            case ISA_LT_PC_AT:
1041                lexerState = INIT;
1042                return token(JspTokenId.TAG);
1043            case ISI_TAG_ATTR:
1044                lexerState = INIT;
1045                return token(JspTokenId.ATTRIBUTE);
1046            case ISI_DIR_ATTR:
1047                lexerState = INIT;
1048                return token(JspTokenId.ATTRIBUTE);
1049            case ISI_TAG_STRING:
1050            case ISI_TAG_STRING_B:
1051            case ISI_TAG_STRING2:
1052            case ISI_TAG_STRING2_B:
1053                lexerState = INIT;
1054                return token(JspTokenId.ATTR_VALUE);
1055            case ISI_DIR_STRING:
1056            case ISI_DIR_STRING_B:
1057            case ISI_DIR_STRING2:
1058            case ISI_DIR_STRING2_B:
1059                lexerState = INIT;
1060                return token(JspTokenId.ATTR_VALUE);
1061            case ISI_JSP_COMMENT:
1062            case ISI_JSP_COMMENT_M:
1063            case ISI_JSP_COMMENT_MM:
1064            case ISI_JSP_COMMENT_MMP:
1065                lexerState = INIT;
1066                return token(JspTokenId.COMMENT);
1067            case ISA_EL_DELIM:
1068                lexerState = INIT;
1069                return token(JspTokenId.TEXT);
1070            case ISI_EL:
1071                lexerState = INIT;
1072                return token(JspTokenId.EL);
1073            case ISP_SCRIPTLET_PC:
1074                lexerState = INIT;
1075                return token(JspTokenId.SYMBOL2);
1076            case ISI_SCRIPTLET:
1077                lexerState = INIT;
1078                return scriptletToken(JspTokenId.SCRIPTLET, lexerStateJspScriptlet);
1079            default:
1080                break;
1081        }
1082        
1083        return null;
1084        
1085    }
1086    
1087    private Token<JspTokenId> token(JspTokenId tokenId) {
1088        if(LOG) {
1089            checkToken(tokenId);
1090        }
1091        return tokenFactory.createToken(tokenId);
1092    }
1093    
1094    private Token<JspTokenId> scriptletToken(JspTokenId tokenId, int javaCodeType) {
1095        if(LOG) {
1096            checkToken(tokenId);
1097        }
1098        JspTokenId.JavaCodeType scriptletType;
1099        switch(javaCodeType) {
1100            case JAVA_SCRITPLET:
1101            case JAVA_SCRITPLET_DOCUMENT:
1102                scriptletType = JspTokenId.JavaCodeType.SCRIPTLET;
1103                break;
1104            case JAVA_DECLARATION:
1105            case JAVA_DECLARATION_DOCUMENT:
1106                scriptletType = JspTokenId.JavaCodeType.DECLARATION;
1107                break;
1108            case JAVA_EXPRESSION:
1109            case JAVA_EXPRESSION_DOCUMENT:
1110                scriptletType = JspTokenId.JavaCodeType.EXPRESSION;
1111                break;
1112            default:
1113                throw new IllegalStateException  ("Unsupported scriptlet type " + lexerStateJspScriptlet);
1114        }
1115        
1116        return tokenFactory.createPropertyToken(tokenId, input.readLength(),
1117                new JspTokenPropertyProvider(scriptletType), PartType.COMPLETE);
1118    }
1119    
1120    private void checkToken(JspTokenId tokenId) {
1121            if(input.readLength() == 0) {
1122                LOGGER.log(Level.INFO, "Found zero length token: ");
1123            }
1124            LOGGER.log(Level.INFO, "[" + this.getClass().getSimpleName() + "] token ('" + input.readText().toString() + "'; id=" + tokenId + "; state=" + state() + ")\n");
1125    }
1126    
1127    public void release() {
1128    }
1129    
1130    private static class JspTokenPropertyProvider implements TokenPropertyProvider {
1131        
1132        private final JspTokenId.JavaCodeType scriptletType;
1133        
1134        JspTokenPropertyProvider(JspTokenId.JavaCodeType scriptletType) {
1135            this.scriptletType = scriptletType;
1136        }
1137
1138        public Object   getValue(Token token, Object   key) {
1139            if (JspTokenId.SCRIPTLET_TOKEN_TYPE_PROPERTY.equals(key))
1140                return scriptletType;
1141            return null;
1142        }
1143
1144    }
1145    
1146}
1147
1148
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags