KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > editor > ext > html > HTMLSyntax


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.editor.ext.html;
21
22 import org.netbeans.editor.Syntax;
23 import org.netbeans.editor.TokenID;
24
25 /**
26 * Lexical anlyzer for HTML source files.
27 *
28 * @author Petr Nejedly
29 * @author Miloslav Metelka
30 * @version 1.00
31 *
32 * @deprecated Use Lexer API instead. See {@link HTMLLexer} and {@link HTMLTokenId}.
33 */

34
35 public class HTMLSyntax extends Syntax {
36
37     /** Internal state of the lexical analyzer before entering subanalyzer of
38      * character references. It is initially set to INIT, but before first usage,
39      * this will be overwritten with state, which originated transition to
40      * charref subanalyzer.
41     */

42     protected int subState = INIT;
43
44
45     // Internal states
46
private static final int ISI_TEXT = 1; // Plain text between tags
47
private static final int ISI_ERROR = 2; // Syntax error in HTML syntax
48
private static final int ISA_LT = 3; // After start of tag delimiter - "<"
49
private static final int ISA_SLASH = 4; // After ETAGO - "</"
50
private static final int ISI_ENDTAG = 5; // Inside endtag - "</[a..Z]+"
51
private static final int ISP_ENDTAG_X = 6; // X-switch after ENDTAG's name
52
private static final int ISP_ENDTAG_WS = 7; // In WS in ENDTAG - "</A_ _>"
53
private static final int ISI_TAG = 8; // Inside tag - "<[a..Z]+"
54
private static final int ISP_TAG_X = 9; // X-switch after TAG's name
55
private static final int ISP_TAG_WS = 10; // In WS in TAG - "<A_ _...>"
56
private static final int ISI_ARG = 11; // Inside tag's argument - "<A h_r_...>"
57
private static final int ISP_ARG_X = 12; // X-switch after ARGUMENT's name
58
private static final int ISP_ARG_WS = 13; // Inside WS after argument awaiting '='
59
private static final int ISP_EQ = 14; // X-switch after '=' in TAG's ARGUMENT
60
private static final int ISP_EQ_WS = 15; // In WS after '='
61
private static final int ISI_VAL = 16; // Non-quoted value
62
private static final int ISI_VAL_QUOT = 17; // Single-quoted value - may contain " chars
63
private static final int ISI_VAL_DQUOT = 18; // Double-quoted value - may contain ' chars
64
private static final int ISA_SGML_ESCAPE = 19; // After "<!"
65
private static final int ISA_SGML_DASH = 20; // After "<!-"
66
private static final int ISI_HTML_COMMENT = 21; // Somewhere after "<!--"
67
private static final int ISA_HTML_COMMENT_DASH = 22; // Dash in comment - maybe end of comment
68
private static final int ISI_HTML_COMMENT_WS = 23; // After end of comment, awaiting end of comment declaration
69
private static final int ISI_SGML_DECL = 24;
70     private static final int ISA_SGML_DECL_DASH = 25;
71     private static final int ISI_SGML_COMMENT = 26;
72     private static final int ISA_SGML_COMMENT_DASH = 27;
73     private static final int ISA_REF = 28; // when comes to character reference, e.g. &amp;, after &
74
private static final int ISI_REF_NAME = 29; // if the reference is symbolic - by predefined name
75
private static final int ISA_REF_HASH = 30; // for numeric references - after &#
76
private static final int ISI_REF_DEC = 31; // decimal character reference, e.g. &#345;
77
private static final int ISA_REF_X = 32; //
78
private static final int ISI_REF_HEX = 33; // hexadecimal reference, in &#xa.. of &#X9..
79
private static final int ISI_TAG_SLASH = 34; //after slash in html tag
80

81     public HTMLSyntax() {
82         tokenContextPath = HTMLTokenContext.contextPath;
83     }
84
85     private final boolean isAZ( char ch ) {
86         return( (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') );
87     }
88
89     private final boolean isName( char ch ) {
90         return Character.isLetterOrDigit(ch) ||
91                 ch == '-' || ch == '_' || ch == '.' || ch == ':';
92 // return( (ch >= 'a' && ch <= 'z') ||
93
// (ch >= 'A' && ch <= 'Z') ||
94
// (ch >= '0' && ch <= '9') ||
95
// ch == '-' || ch == '_' || ch == '.' || ch == ':' );
96

97     }
98
99     /**
100      * Resolves if given char is whitespace in terms of HTML4.0 specs
101      * According to specs, following characters are treated as whitespace:
102      * Space - <CODE>' '</CODE>, Tab - <CODE>' '</CODE>,
103      * Formfeed - <CODE>' '</CODE>,Zero-width space - <CODE>'?'</CODE>,
104      * Carriage return - <CODE>'
105 '</CODE> and Line feed - <CODE>'
106 '</CODE>
107      * CR's are included for completenes only, they should never appear in document
108      */

109
110     private final boolean isWS( char ch ) {
111         return Character.isWhitespace(ch);
112 // return ( ch == '\u0020' || ch == '\u0009' || ch == '\u000c'
113
// || ch == '\u200b' || ch == '\n' || ch == '\r' );
114
}
115
116     protected TokenID parseToken() {
117         char actChar;
118
119         while(offset < stopOffset) {
120             actChar = buffer[offset];
121              //System.out.println("HTMLSyntax: parseToken tokenOffset=" + tokenOffset + ", actChar='" + actChar + "', offset=" + offset + ", state=" + getStateName(state) +
122
// ", stopOffset=" + stopOffset + ", lastBuffer=" + lastBuffer);
123
switch( state ) {
124             case INIT: // DONE
125
switch( actChar ) {
126                 case '<':
127                     state = ISA_LT;
128                     break;
129                 case '&':
130                     state = ISA_REF;
131                     subState = ISI_TEXT;
132                     break;
133                 default:
134                     state = ISI_TEXT;
135                     break;
136                 }
137                 break;
138
139             case ISI_TEXT: // DONE
140
switch( actChar ) {
141                 case '<':
142                 case '&':
143                     state = INIT;
144                     return HTMLTokenContext.TEXT;
145                 }
146                 break;
147
148             case ISI_ERROR: // DONE
149
offset++;
150                 state = INIT;
151                 return HTMLTokenContext.ERROR;
152
153             case ISA_LT: // PENDING other transitions - e.g '<?'
154
if( isAZ( actChar ) ) { // <'a..Z'
155
state = ISI_TAG;
156                     return HTMLTokenContext.TAG_OPEN_SYMBOL;
157                 }
158                 switch( actChar ) {
159                 case '/': // ETAGO - </
160
state = ISA_SLASH;
161                     offset++;
162                     return HTMLTokenContext.TAG_OPEN_SYMBOL;
163                 case '>': // Empty start tag <>, RELAXED
164
offset++;
165                     state = INIT;
166                     return HTMLTokenContext.TAG_CLOSE_SYMBOL;
167                 case '!':
168                     state = ISA_SGML_ESCAPE;
169                     break;
170                 default: // Part of text, RELAXED
171
state = ISI_TEXT;
172                     continue; // don't eat the char, maybe its '&'
173
}
174                 break;
175
176             case ISA_SLASH: // DONE
177
if( isAZ( actChar ) ) { // </'a..Z'
178
state = ISI_ENDTAG;
179                     break;
180                 }
181                 switch( actChar ) {
182                 case '>': // Empty end tag </>, RELAXED
183
offset++;
184                     state = INIT;
185                     return HTMLTokenContext.TAG_CLOSE_SYMBOL;
186                 default: // Part of text, e.g. </3, </'\n', RELAXED
187
state = ISI_TEXT;
188                     continue; // don'e eat the char
189
}
190                 //break;
191

192             case ISI_ENDTAG: // DONE
193
if( isName( actChar ) ) break; // Still in endtag identifier, eat next char
194
state = ISP_ENDTAG_X;
195                 return HTMLTokenContext.TAG_CLOSE;
196
197
198             case ISP_ENDTAG_X: // DONE
199
if( isWS( actChar ) ) {
200                     state = ISP_ENDTAG_WS;
201                     break;
202                 }
203                 switch( actChar ) {
204                 case '>': // Closing of endtag, e.g. </H6 _>_
205
offset++;
206                     state = INIT;
207                     return HTMLTokenContext.TAG_CLOSE_SYMBOL;
208                 case '<': // next tag, e.g. </H6 _<_, RELAXED
209
state = INIT;
210                     continue;
211                 default:
212                     state = ISI_ERROR;
213                     continue; //don't eat
214
}
215                 //break;
216

217             case ISP_ENDTAG_WS: // DONE
218
if( isWS( actChar ) ) break; // eat all WS
219
state = ISP_ENDTAG_X;
220                 return HTMLTokenContext.WS;
221
222
223             case ISI_TAG: // DONE
224
if( isName( actChar ) ) break; // Still in tag identifier, eat next char
225
state = ISP_TAG_X;
226                 return HTMLTokenContext.TAG_OPEN;
227
228             case ISP_TAG_X: // DONE
229
if( isWS( actChar ) ) {
230                     state = ISP_TAG_WS;
231                     break;
232                 }
233                 if( isAZ( actChar ) ) {
234                     state = ISI_ARG;
235                     break;
236                 }
237                 switch( actChar ) {
238                 case '/':
239                     offset++;
240                     state = ISI_TAG_SLASH;
241                     continue;
242                 case '>':
243                     offset++;
244                     state = INIT;
245                     return HTMLTokenContext.TAG_CLOSE_SYMBOL;
246                 case '<':
247                     state = INIT;
248                     continue; // don't eat it!!!
249
default:
250                     state = ISI_ERROR;
251                     continue;
252                 }
253                 //break;
254

255             case ISP_TAG_WS: // DONE
256
if( isWS( actChar ) ) break; // eat all WS
257
state = ISP_TAG_X;
258                 return HTMLTokenContext.WS;
259
260             case ISI_TAG_SLASH:
261                 switch( actChar ) {
262                     case '>':
263                         offset++;
264                         state = INIT;
265                         return HTMLTokenContext.TAG_CLOSE_SYMBOL;
266                     default:
267                         state = ISI_ERROR;
268                         continue;
269                 }
270                 
271             case ISI_ARG: // DONE
272
if( isName( actChar ) ) break; // eat next char
273
state = ISP_ARG_X;
274                 return HTMLTokenContext.ARGUMENT;
275
276             case ISP_ARG_X:
277                 if( isWS( actChar ) ) {
278                     state = ISP_ARG_WS;
279                     break;
280                 }
281                 if( isAZ( actChar ) ) {
282                     state = ISI_ARG;
283                     break;
284                 }
285                 switch( actChar ) {
286                 case '/':
287                 case '>':
288                     offset++;
289                     state = INIT;
290                     return HTMLTokenContext.TAG_OPEN;
291                 case '<':
292                     state = INIT;
293                     continue; // don't eat !!!
294
case '=':
295                     offset++;
296                     state = ISP_EQ;
297                     return HTMLTokenContext.OPERATOR;
298                 default:
299                     state = ISI_ERROR;
300                     continue;
301                 }
302                 //break;
303

304             case ISP_ARG_WS:
305                 if( isWS( actChar ) ) break; // Eat all WhiteSpace
306
state = ISP_ARG_X;
307                 return HTMLTokenContext.WS;
308
309             case ISP_EQ:
310                 if( isWS( actChar ) ) {
311                     state = ISP_EQ_WS;
312                     break;
313                 }
314                 switch( actChar ) {
315                 case '\'':
316                     state = ISI_VAL_QUOT;
317                     break;
318                 case '"':
319                     state = ISI_VAL_DQUOT;
320                     break;
321                 case '>':
322                     offset++;
323                     state = INIT;
324                     return HTMLTokenContext.TAG_OPEN;
325                 default:
326                     state = ISI_VAL; //everything else if attribute value
327
break;
328                 }
329                 break;
330
331             case ISP_EQ_WS:
332                 if( isWS( actChar ) ) break; // Consume all WS
333
state = ISP_EQ;
334                 return HTMLTokenContext.WS;
335
336
337             case ISI_VAL:
338                 if( !isWS( actChar )
339                     && !(actChar == '/' || actChar == '>' || actChar == '<')) break; // Consume whole value
340
state = ISP_TAG_X;
341                 return HTMLTokenContext.VALUE;
342
343             case ISI_VAL_QUOT:
344                 switch( actChar ) {
345                 case '\'':
346                     offset++;
347                     state = ISP_TAG_X;
348                     return HTMLTokenContext.VALUE;
349                 case '&':
350                     if( offset == tokenOffset ) {
351                         subState = state;
352                         state = ISA_REF;
353                         break;
354                     } else {
355                         return HTMLTokenContext.VALUE;
356                     }
357                 }
358                 break; // else simply consume next char of VALUE
359

360             case ISI_VAL_DQUOT:
361                 switch( actChar ) {
362                 case '"':
363                     offset++;
364                     state = ISP_TAG_X;
365                     return HTMLTokenContext.VALUE;
366                 case '&':
367                     if( offset == tokenOffset ) {
368                         subState = state;
369                         state = ISA_REF;
370                         break;
371                     } else {
372                         return HTMLTokenContext.VALUE;
373                     }
374                 }
375                 break; // else simply consume next char of VALUE
376

377
378
379             case ISA_SGML_ESCAPE: // DONE
380
if( isAZ(actChar) ) {
381                     state = ISI_SGML_DECL;
382                     break;
383                 }
384                 switch( actChar ) {
385                 case '-':
386                     state = ISA_SGML_DASH;
387                     break;
388                 default:
389                     state = ISI_TEXT;
390                     continue;
391                 }
392                 break;
393
394             case ISA_SGML_DASH: // DONE
395
switch( actChar ) {
396                 case '-':
397                     state = ISI_HTML_COMMENT;
398                     break;
399                 default:
400                     state = ISI_TEXT;
401                     continue;
402                 }
403                 break;
404
405             case ISI_HTML_COMMENT: // DONE
406
switch( actChar ) {
407                 case '-':
408                     state = ISA_HTML_COMMENT_DASH;
409                     break;
410                 //create an HTML comment token for each line of the comment - a performance fix for #43532
411
case '\n':
412                     offset++;
413                     //leave the some state - we are still in an HTML comment,
414
//we just need to create a token for each line.
415
return HTMLTokenContext.BLOCK_COMMENT;
416                 }
417                 break;
418
419             case ISA_HTML_COMMENT_DASH:
420                 switch( actChar ) {
421                 case '-':
422                     state = ISI_HTML_COMMENT_WS;
423                     break;
424                 default:
425                     state = ISI_HTML_COMMENT;
426                     continue;
427                 }
428                 break;
429
430             case ISI_HTML_COMMENT_WS: // DONE
431
if( isWS( actChar ) ) break; // Consume all WS
432
switch( actChar ) {
433                 case '>':
434                     offset++;
435                     state = INIT;
436                     return HTMLTokenContext.BLOCK_COMMENT;
437                 default:
438                     state = ISI_HTML_COMMENT;
439                     continue;
440                 }
441                 //break;
442

443             case ISI_SGML_DECL:
444                 switch( actChar ) {
445                 case '>':
446                     offset++;
447                     state = INIT;
448                     return HTMLTokenContext.DECLARATION;
449                 case '-':
450                     if( offset == tokenOffset ) {
451                         state = ISA_SGML_DECL_DASH;
452                         break;
453                     } else {
454                         return HTMLTokenContext.DECLARATION;
455                     }
456                 }
457                 break;
458
459             case ISA_SGML_DECL_DASH:
460                 if( actChar == '-' ) {
461                     state = ISI_SGML_COMMENT;
462                     break;
463                 } else {
464                     state = ISI_SGML_DECL;
465                     continue;
466                 }
467
468             case ISI_SGML_COMMENT:
469                 switch( actChar ) {
470                 case '-':
471                     state = ISA_SGML_COMMENT_DASH;
472                     break;
473                 }
474                 break;
475
476             case ISA_SGML_COMMENT_DASH:
477                 if( actChar == '-' ) {
478                     offset++;
479                     state = ISI_SGML_DECL;
480                     return HTMLTokenContext.SGML_COMMENT;
481                 } else {
482                     state = ISI_SGML_COMMENT;
483                     continue;
484                 }
485
486
487             case ISA_REF:
488                 if( isAZ( actChar ) ) {
489                     state = ISI_REF_NAME;
490                     break;
491                 }
492                 if( actChar == '#' ) {
493                     state = ISA_REF_HASH;
494                     break;
495                 }
496                 state = subState;
497                 continue;
498
499             case ISI_REF_NAME:
500                 if( isName( actChar ) ) break;
501                 if( actChar == ';' ) offset++;
502                 state = subState;
503                 return HTMLTokenContext.CHARACTER;
504
505             case ISA_REF_HASH:
506                 if( actChar >= '0' && actChar <= '9' ) {
507                     state = ISI_REF_DEC;
508                     break;
509                 }
510                 if( actChar == 'x' || actChar == 'X' ) {
511                     state = ISA_REF_X;
512                     break;
513                 }
514                 if( isAZ( actChar ) ) {
515                     offset++;
516                     state = subState;
517                     return HTMLTokenContext.ERROR;
518                 }
519                 state = subState;
520                 continue;
521
522             case ISI_REF_DEC:
523                 if( actChar >= '0' && actChar <= '9' ) break;
524                 if( actChar == ';' ) offset++;
525                 state = subState;
526                 return HTMLTokenContext.CHARACTER;
527
528             case ISA_REF_X:
529                 if( (actChar >= '0' && actChar <= '9') ||
530                         (actChar >= 'a' && actChar <= 'f') ||
531                         (actChar >= 'A' && actChar <= 'F')
532                   ) {
533                     state = ISI_REF_HEX;
534                     break;
535                 }
536                 state = subState;
537                 return HTMLTokenContext.ERROR; // error on previous "&#x" sequence
538

539             case ISI_REF_HEX:
540                 if( (actChar >= '0' && actChar <= '9') ||
541                         (actChar >= 'a' && actChar <= 'f') ||
542                         (actChar >= 'A' && actChar <= 'F')
543                   ) break;
544                 if( actChar == ';' ) offset++;
545                 state = subState;
546                 return HTMLTokenContext.CHARACTER;
547             }
548
549
550             offset = ++offset;
551         } // end of while(offset...)
552

553         /** At this stage there's no more text in the scanned buffer.
554         * Scanner first checks whether this is completely the last
555         * available buffer.
556         */

557         if( lastBuffer ) {
558             switch( state ) {
559             case INIT:
560             case ISI_TEXT:
561             case ISA_LT:
562             case ISA_SLASH:
563             case ISA_SGML_ESCAPE:
564             case ISA_SGML_DASH:
565             case ISI_TAG_SLASH:
566                 return HTMLTokenContext.TEXT;
567
568             case ISA_REF:
569             case ISA_REF_HASH:
570                 if( subState == ISI_TEXT ) return HTMLTokenContext.TEXT;
571                 else return HTMLTokenContext.VALUE;
572
573             case ISI_HTML_COMMENT:
574             case ISA_HTML_COMMENT_DASH:
575             case ISI_HTML_COMMENT_WS:
576                 return HTMLTokenContext.BLOCK_COMMENT;
577
578             case ISI_TAG:
579                 return HTMLTokenContext.TAG_OPEN;
580             case ISI_ENDTAG:
581                 return HTMLTokenContext.TAG_CLOSE;
582
583             case ISI_ARG:
584                 return HTMLTokenContext.ARGUMENT;
585
586             case ISI_ERROR:
587                 return HTMLTokenContext.ERROR;
588
589             case ISP_ARG_WS:
590             case ISP_TAG_WS:
591             case ISP_ENDTAG_WS:
592             case ISP_EQ_WS:
593                 return HTMLTokenContext.WS;
594
595             case ISP_ARG_X:
596             case ISP_TAG_X:
597             case ISP_ENDTAG_X:
598             case ISP_EQ:
599                 return HTMLTokenContext.WS;
600
601             case ISI_VAL:
602             case ISI_VAL_QUOT:
603             case ISI_VAL_DQUOT:
604                 return HTMLTokenContext.VALUE;
605
606             case ISI_SGML_DECL:
607             case ISA_SGML_DECL_DASH:
608                 return HTMLTokenContext.DECLARATION;
609
610             case ISI_SGML_COMMENT:
611             case ISA_SGML_COMMENT_DASH:
612                 return HTMLTokenContext.SGML_COMMENT;
613
614             case ISI_REF_NAME:
615             case ISI_REF_DEC:
616             case ISA_REF_X:
617             case ISI_REF_HEX:
618                 return HTMLTokenContext.CHARACTER;
619             }
620         }
621
622         return null;
623     }
624
625     public String JavaDoc getStateName(int stateNumber) {
626         switch(stateNumber) {
627         case INIT:
628             return "INIT"; // NOI18N
629
case ISI_TEXT:
630             return "ISI_TEXT"; // NOI18N
631
case ISA_LT:
632             return "ISA_LT"; // NOI18N
633
case ISA_SLASH:
634             return "ISA_SLASH"; // NOI18N
635
case ISA_SGML_ESCAPE:
636             return "ISA_SGML_ESCAPE"; // NOI18N
637
case ISA_SGML_DASH:
638             return "ISA_SGML_DASH"; // NOI18N
639
case ISI_HTML_COMMENT:
640             return "ISI_HTML_COMMENT";// NOI18N
641
case ISA_HTML_COMMENT_DASH:
642             return "ISA_HTML_COMMENT_DASH";// NOI18N
643
case ISI_HTML_COMMENT_WS:
644             return "ISI_HTML_COMMENT_WS";// NOI18N
645
case ISI_TAG:
646             return "ISI_TAG";// NOI18N
647
case ISI_ENDTAG:
648             return "ISI_ENDTAG";// NOI18N
649
case ISI_ARG:
650             return "ISI_ARG";// NOI18N
651
case ISI_ERROR:
652             return "ISI_ERROR";// NOI18N
653
case ISP_ARG_WS:
654             return "ISP_ARG_WS";// NOI18N
655
case ISP_TAG_WS:
656             return "ISP_TAG_WS";// NOI18N
657
case ISP_ENDTAG_WS:
658             return "ISP_ENDTAG_WS";// NOI18N
659
case ISP_ARG_X:
660             return "ISP_ARG_X";// NOI18N
661
case ISP_TAG_X:
662             return "ISP_TAG_X";// NOI18N
663
case ISP_ENDTAG_X:
664             return "ISP_ENDTAG_X";// NOI18N
665
case ISP_EQ:
666             return "ISP_EQ";// NOI18N
667
case ISI_VAL:
668             return "ISI_VAL";// NOI18N
669
case ISI_VAL_QUOT:
670             return "ISI_VAL_QUOT";// NOI18N
671
case ISI_VAL_DQUOT:
672             return "ISI_VAL_DQUOT";// NOI18N
673
case ISI_SGML_DECL:
674             return "ISI_SGML_DECL";// NOI18N
675
case ISA_SGML_DECL_DASH:
676             return "ISA_SGML_DECL_DASH";// NOI18N
677
case ISI_SGML_COMMENT:
678             return "ISI_SGML_COMMENT";// NOI18N
679
case ISA_SGML_COMMENT_DASH:
680             return "ISA_SGML_COMMENT_DASH";// NOI18N
681
case ISA_REF:
682             return "ISA_REF";// NOI18N
683
case ISI_REF_NAME:
684             return "ISI_REF_NAME";// NOI18N
685
case ISA_REF_HASH:
686             return "ISA_REF_HASH";// NOI18N
687
case ISI_REF_DEC:
688             return "ISI_REF_DEC";// NOI18N
689
case ISA_REF_X:
690             return "ISA_REF_X";// NOI18N
691
case ISI_REF_HEX:
692             return "ISI_REF_HEX";// NOI18N
693
default:
694             return super.getStateName(stateNumber);
695         }
696     }
697
698     /** Load valid mark state into the analyzer. Offsets
699     * are already initialized when this method is called. This method
700     * must get the state from the mark and set it to the analyzer. Then
701     * it must decrease tokenOffset by the preScan stored in the mark state.
702     * @param markState mark state to be loaded into syntax. It must be non-null value.
703     */

704     public void loadState(StateInfo stateInfo) {
705         super.loadState( stateInfo );
706         subState = ((HTMLStateInfo)stateInfo).getSubState();
707     }
708
709     /** Store state of this analyzer into given mark state. */
710     public void storeState(StateInfo stateInfo) {
711         super.storeState( stateInfo );
712         ((HTMLStateInfo)stateInfo).setSubState( subState );
713     }
714
715     /** Compare state of this analyzer to given state info */
716     public int compareState(StateInfo stateInfo) {
717         if( super.compareState( stateInfo ) == DIFFERENT_STATE ) return DIFFERENT_STATE;
718         return ( ((HTMLStateInfo)stateInfo).getSubState() == subState) ? EQUAL_STATE : DIFFERENT_STATE;
719     }
720
721     /** Create state info appropriate for particular analyzer */
722     public StateInfo createStateInfo() {
723         return new HTMLStateInfo();
724     }
725
726
727     /** Base implementation of the StateInfo interface */
728     public static class HTMLStateInfo extends Syntax.BaseStateInfo {
729
730         /** analyzer subState during parsing character references */
731         private int subState;
732
733         public int getSubState() {
734             return subState;
735         }
736
737         public void setSubState(int subState) {
738             this.subState = subState;
739         }
740
741         public String JavaDoc toString(Syntax syntax) {
742             return super.toString(syntax) + ", subState=" + (syntax == null ? "" : syntax.getStateName(getSubState())); // NOI18N
743
}
744
745     }
746
747
748 }
749
Popular Tags