KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > xml > text > syntax > XMLDefaultSyntax


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19 package org.netbeans.modules.xml.text.syntax;
20
21 import org.netbeans.editor.Syntax;
22 import org.netbeans.editor.TokenID;
23 import org.netbeans.modules.xml.text.api.XMLDefaultTokenContext;
24
25 /**
26  * Gracefull lexical analyzer for XML source files. It rather returns <code>ERROR</code>
27  * token than terminates scanning. In such case is stays in current state.
28  *
29  * @author Petr Nejedly
30  * @author Miloslav Metelka
31  * @author Sandeep Singh Randhawa
32  * @author Petr Kuzel
33  *
34  * @version 1.10 XML spec aware
35  */

36
37 public class XMLDefaultSyntax extends Syntax {
38     
39     /**
40      * Internal state of the lexical analyzer before entering subanalyzer of
41      * character references. It is initially set to INIT, but before first
42      * usage, this will be overwritten with state, which originated
43      * ransition to charref subanalyzer.
44      */

45     protected int subState = INIT;
46     
47     /**
48      * Identifies internal DTD layer. Most of functionality is same
49      * as at document layer, however there are minor exceptions.
50      * @see isInternalDTD checks in code
51      */

52     protected boolean subInternalDTD = false;
53     
54     // Internal states I = in state
55
// P = expected (char probed but not consumed)
56
// A = after (char probed and consumed)
57
private static final int ISI_TEXT = 1; // Plain text between tags
58
private static final int ISI_ERROR = 2; // Syntax error in XML syntax
59
private static final int ISA_LT = 3; // After start of tag delimiter - "<"
60
private static final int ISA_SLASH = 4; // After ETAGO - "</"
61
private static final int ISI_ENDTAG = 5; // Inside endtag - "</[a..Z]+"
62
private static final int ISP_ENDTAG_X = 6; // X-switch after ENDTAG's name
63
private static final int ISP_ENDTAG_WS = 7; // In WS in ENDTAG - "</A_ _>"
64
private static final int ISI_TAG = 8; // Inside tag - "<[a..Z]+"
65
private static final int ISP_TAG_X = 9; // X-switch after TAG's name
66
private static final int ISP_TAG_WS = 10; // In WS in TAG - "<A_ _...>"
67
private static final int ISI_ARG = 11; // Inside tag's argument - "<A h_r_...>"
68
private static final int ISP_ARG_X = 12; // X-switch after ARGUMENT's name
69
private static final int ISP_ARG_WS = 13; // Inside WS after argument awaiting '='
70
private static final int ISP_EQ = 14; // X-switch after '=' in TAG's ARGUMENT
71
private static final int ISP_EQ_WS = 15; // In WS after '='
72
private static final int ISI_VAL_APOS = 17; // Single-quoted value - may contain " chars
73
private static final int ISI_VAL_QUOT = 18; // Double-quoted value - may contain ' chars
74
private static final int ISA_SGML_ESCAPE = 19; // After "<!"
75
private static final int ISA_SGML_DASH = 20; // After "<!-"
76
private static final int ISI_XML_COMMENT = 21; // Somewhere after "<!--"
77
private static final int ISA_XML_COMMENT_DASH = 22; // Dash in comment - maybe end of comment
78
private static final int ISI_XML_COMMENT_WS = 23; // After end of comment, awaiting end of comment declaration
79
private static final int ISI_SGML_DECL = 24;
80     private static final int ISA_SGML_DECL_DASH = 25;
81     // private static final int ISI_SGML_COMMENT = 26;
82
// private static final int ISA_SGML_COMMENT_DASH = 27;
83
private static final int ISA_REF = 28; // when comes to character reference, e.g. &amp;, after &
84
private static final int ISI_REF_NAME = 29; // if the reference is symbolic - by predefined name
85
private static final int ISA_REF_HASH = 30; // for numeric references - after &#
86
private static final int ISI_REF_DEC = 31; // decimal character reference, e.g. &#345;
87
private static final int ISA_REF_X = 32; //
88
private static final int ISI_REF_HEX = 33; // hexadecimal reference, in &#xa.. of &#X9..
89

90     
91     private static final int ISI_PI = 35; //after <?...
92
private static final int ISI_PI_TARGET = 36; //in <?..|..
93
private static final int ISP_PI_TARGET_WS = 37; //after <?...|
94
private static final int ISI_PI_CONTENT = 38; //in PI content
95
private static final int ISA_PI_CONTENT_QMARK = 39; //after ? in content
96
private static final int ISP_PI_CONTENT_QMARK = 40; //spotet ? in content
97

98     // CDATA section handler
99
private static final int ISA_LTEXBR = 41;
100     private static final int ISA_LTEXBRC = 42;
101     private static final int ISA_LTEXBRCD = 43;
102     private static final int ISA_LTEXBRCDA = 44;
103     private static final int ISA_LTEXBRCDAT = 45;
104     private static final int ISA_LTEXBRCDATA = 46;
105     private static final int ISI_CDATA = 47;
106     private static final int ISA_CDATA_BR = 48;
107     private static final int ISA_CDATA_BRBR = 49;
108
109     // strings in declaration
110
private static final int ISI_DECL_CHARS = 50;
111     private static final int ISI_DECL_STRING = 51;
112     private static final int ISP_DECL_CHARS = 52;
113     private static final int ISP_DECL_STRING = 53;
114
115     // internal DTD handling
116
private static final int ISA_INIT_BR = 54;
117     
118     public XMLDefaultSyntax() {
119         tokenContextPath = XMLDefaultTokenContext.contextPath;
120     }
121     
122     protected TokenID parseToken() {
123         
124         char actChar;
125         while(offset < stopOffset) {
126             actChar = buffer[offset];
127             switch( state ) {
128                 case INIT: // DONE
129
switch( actChar ) {
130                         case '<':
131                             state = ISA_LT;
132                             break;
133                         case '&':
134                             if (isInternalDTD() == false) {
135                                 state = ISA_REF;
136                                 subState = ISI_TEXT;
137                             } else {
138                                 state = ISI_TEXT;
139                             }
140                             break;
141                         case '%':
142                             if (isInternalDTD()) {
143                                 state = ISA_REF;
144                                 subState = INIT;
145                             } else {
146                                 state = ISI_TEXT;
147                             }
148                             break;
149                         case ']':
150                             if (isInternalDTD()) {
151                                 state = ISA_INIT_BR;
152                             } else {
153                                 state = ISI_TEXT;
154                             }
155                             break;
156                         default:
157                             state = ISI_TEXT;
158                             break;
159                     }
160                     break;
161                     
162                 case ISI_TEXT: // DONE
163
switch( actChar ) {
164                         case '<':
165                             state = INIT;
166                             return XMLDefaultTokenContext.TEXT;
167                         case '&':
168                             if (isInternalDTD() == false) {
169                                 state = INIT;
170                                 return XMLDefaultTokenContext.TEXT;
171                             }
172                             break;
173                         case '%':
174                             if (isInternalDTD()) {
175                                 state = INIT;
176                                 return XMLDefaultTokenContext.TEXT;
177                             }
178                             break;
179                         case ']':
180                             if (isInternalDTD()) {
181                                 state = ISA_INIT_BR;
182                             }
183                             break;
184                     }
185                     break;
186                     
187                 case ISI_ERROR: // DONE
188
offset++;
189                     state = INIT;
190                     return XMLDefaultTokenContext.ERROR;
191                     
192                 case ISA_LT: // DONE
193

194                     if( UnicodeClasses.isXMLNameStartChar( actChar ) && isInternalDTD() == false) {
195                         state = ISI_TAG;
196                         break;
197                     }
198                     switch( actChar ) {
199                         case '/': // ETAGO - </
200
state = ISA_SLASH;
201                             break;
202                         case '!':
203                             state = ISA_SGML_ESCAPE;
204                             break;
205                         case '?':
206                             state = ISI_PI;
207                             offset++;
208                             return XMLDefaultTokenContext.PI_START;
209                         default:
210                             state = ISI_TEXT; //RELAXED to allow editing in the middle of document
211
continue; // don't eat the char, maybe its '&'
212
}
213                     break;
214
215                 case ISI_PI:
216                     if ( UnicodeClasses.isXMLNameStartChar( actChar )) {
217                         state = ISI_PI_TARGET;
218                         break;
219                     }
220                     state = ISI_ERROR;
221                     break;
222                     
223                 case ISI_PI_TARGET:
224                     if ( UnicodeClasses.isXMLNameChar( actChar )) break;
225                     if (isWS( actChar )) {
226                         state = ISP_PI_TARGET_WS;
227                         return XMLDefaultTokenContext.PI_TARGET;
228                     }
229                     state = ISI_ERROR;
230                     break;
231                     
232                 case ISP_PI_TARGET_WS:
233                     if (isWS( actChar)) break;
234                     state = ISI_PI_CONTENT;
235                     return XMLDefaultTokenContext.WS;
236
237                 case ISI_PI_CONTENT:
238                     if (actChar != '?') break; // eat content
239
state = ISP_PI_CONTENT_QMARK;
240                     return XMLDefaultTokenContext.PI_CONTENT; // may do extra break
241

242                 case ISP_PI_CONTENT_QMARK:
243                     if (actChar != '?') throw new IllegalStateException JavaDoc ("'?' expected in ISP_PI_CONTENT_QMARK");
244                     state = ISA_PI_CONTENT_QMARK;
245                     break;
246
247                 case ISA_PI_CONTENT_QMARK:
248                     if (actChar != '>') {
249                         state = ISI_PI_CONTENT;
250                         break;
251                     }
252                     state = INIT;
253                     offset++;
254                     return XMLDefaultTokenContext.PI_END;
255                     
256                 case ISA_SLASH: // DONE
257

258                     if( UnicodeClasses.isXMLNameStartChar( actChar )){
259                         state = ISI_ENDTAG;
260                         break;
261                     }
262                     switch( actChar ) {
263                         case ' ':
264                             state = ISI_TEXT;
265                             continue;
266                         case '\n':
267                             state = ISI_TEXT;
268                             continue;
269                         case '\r':
270                             state = ISI_TEXT;
271                             continue;
272                         default: // Part of text, e.g. </3, </'\n', RELAXED
273
state = ISI_TEXT;
274                             continue; // don'e eat the char
275
}
276                     //break;
277

278                 case ISI_ENDTAG: // DONE
279
if( UnicodeClasses.isXMLNameChar( actChar )){
280                         break; // Still in endtag identifier, eat next char
281
}
282                     
283                     state = ISP_ENDTAG_X;
284                     return XMLDefaultTokenContext.TAG;
285                     
286                     
287                 case ISP_ENDTAG_X: // DONE
288
if( isWS( actChar ) ) {
289                         state = ISP_ENDTAG_WS;
290                         break;
291                     }
292                     switch( actChar ) {
293                         case '>': // Closing of endtag, e.g. </H6 _>_
294
offset++;
295                             state = INIT;
296                             return XMLDefaultTokenContext.TAG;
297                         default:
298                             state = ISI_ERROR;
299                             continue; //don't eat
300
}
301                     //break;
302

303                 case ISP_ENDTAG_WS: // DONE
304
if( isWS( actChar ) ) break; // eat all WS
305
state = ISP_ENDTAG_X;
306                     return XMLDefaultTokenContext.WS;
307                     
308                     
309                 case ISI_TAG: // DONE
310
if( UnicodeClasses.isXMLNameChar( actChar ) ) break; // Still in tag identifier, eat next char
311
state = ISP_TAG_X;
312                     return XMLDefaultTokenContext.TAG;
313                     
314                 case ISP_TAG_X: // DONE
315
if( isWS( actChar ) ) {
316                         state = ISP_TAG_WS;
317                         break;
318                     }
319                     if( UnicodeClasses.isXMLNameStartChar( actChar ) ) {
320                         state = ISI_ARG;
321                         break;
322                     }
323                     switch( actChar ) {
324                         case '/':
325                             offset++;
326                             continue;
327                         case '?': //Prolog and PI's now similar to Tag
328
offset++;
329                             continue;
330                         case '>':
331                             offset++;
332                             state = INIT;
333                             return XMLDefaultTokenContext.TAG;
334                         default:
335                             state = ISI_ERROR;
336                             continue;
337                     }
338                     //break;
339

340                     
341                 case ISP_TAG_WS: // DONE
342
if( isWS( actChar ) ) break; // eat all WS
343
state = ISP_TAG_X;
344                     return XMLDefaultTokenContext.WS;
345                     
346                 case ISI_ARG: // DONE
347
if( UnicodeClasses.isXMLNameChar( actChar ) ) break; // eat next char
348
state = ISP_ARG_X;
349                     return XMLDefaultTokenContext.ARGUMENT;
350                     
351                 case ISP_ARG_X:
352                     if( isWS( actChar ) ) {
353                         state = ISP_ARG_WS;
354                         break;
355                     }
356                     switch( actChar ) {
357                         case '=':
358                             offset++;
359                             state = ISP_EQ;
360                             return XMLDefaultTokenContext.OPERATOR;
361                         default:
362                             state = ISI_ERROR;
363                             continue;
364                     }
365                     //break;
366

367                 case ISP_ARG_WS:
368                     if( isWS( actChar ) ) break; // Eat all WhiteSpace
369
state = ISP_ARG_X;
370                     return XMLDefaultTokenContext.WS;
371                     
372                 case ISP_EQ:
373                     if( isWS( actChar ) ) {
374                         state = ISP_EQ_WS;
375                         break;
376                     }
377                     switch( actChar ) {
378                         case '\'':
379                             state = ISI_VAL_APOS;
380                             break;
381                         case '"':
382                             state = ISI_VAL_QUOT;
383                             break;
384                         default:
385                             state = ISI_ERROR;
386                             continue;
387                     }
388                     break;
389                     
390                 case ISP_EQ_WS:
391                     if( isWS( actChar ) ) break; // Consume all WS
392
state = ISP_EQ;
393                     return XMLDefaultTokenContext.WS;
394                                         
395                 case ISI_VAL_APOS:
396                     switch( actChar ) {
397                         case '\'':
398                             offset++;
399                             state = ISP_TAG_X;
400                             return XMLDefaultTokenContext.VALUE;
401                         case '&':
402                             if( offset == tokenOffset ) {
403                                 subState = state;
404                                 state = ISA_REF;
405                                 break;
406                             } else {
407                                 return XMLDefaultTokenContext.VALUE;
408                             }
409                     }
410                     break; // else simply consume next char of VALUE
411

412                 case ISI_VAL_QUOT:
413                     switch( actChar ) {
414                         case '"':
415                             offset++;
416                             state = ISP_TAG_X;
417                             return XMLDefaultTokenContext.VALUE;
418                         case '&':
419                             if( offset == tokenOffset ) {
420                                 subState = state;
421                                 state = ISA_REF;
422                                 break;
423                             } else {
424                                 return XMLDefaultTokenContext.VALUE;
425                             }
426                     }
427                     break; // else simply consume next char of VALUE
428

429                     
430                 case ISA_SGML_ESCAPE: // DONE
431
if (actChar == '[') {
432                         state = ISA_LTEXBR;
433                         break;
434                     } else if( isAZ(actChar) ) {
435                         state = ISI_SGML_DECL;
436                         break;
437                     }
438                     switch( actChar ) {
439                         case '-':
440                             state = ISA_SGML_DASH;
441                             break;
442                         default:
443                             state = ISI_TEXT;
444                             continue;
445                     }
446                     break;
447                     
448                 case ISA_LTEXBR:
449                     if (actChar == 'C') {
450                         state = ISA_LTEXBRC;
451                         break;
452                     } else {
453                         state = ISI_TEXT;
454                         continue;
455                     }
456
457                 case ISA_LTEXBRC:
458                     if (actChar == 'D') {
459                         state = ISA_LTEXBRCD;
460                         break;
461                     } else {
462                         state = ISI_TEXT;
463                         continue;
464                     }
465
466                 case ISA_LTEXBRCD:
467                     if (actChar == 'A') {
468                         state = ISA_LTEXBRCDA;
469                         break;
470                     } else {
471                         state = ISI_TEXT;
472                         continue;
473                     }
474
475                 case ISA_LTEXBRCDA:
476                     if (actChar == 'T') {
477                         state = ISA_LTEXBRCDAT;
478                         break;
479                     } else {
480                         state = ISI_TEXT;
481                         continue;
482                     }
483
484                 case ISA_LTEXBRCDAT:
485                     if (actChar == 'A') {
486                         state = ISA_LTEXBRCDATA;
487                         break;
488                     } else {
489                         state = ISI_TEXT;
490                         continue;
491                     }
492
493                 case ISA_LTEXBRCDATA:
494                     if (actChar == '[') {
495                         state = ISI_CDATA;
496                         break;
497                     } else {
498                         state = ISI_TEXT;
499                         continue;
500                     }
501
502                 case ISI_CDATA:
503                     if (actChar == ']') {
504                         state = ISA_CDATA_BR;
505                         break;
506                     }
507
508                 case ISA_CDATA_BR:
509                     if (actChar == ']') {
510                         state = ISA_CDATA_BRBR;
511                         break;
512                     } else {
513                         state = ISI_CDATA;
514                         break;
515                     }
516
517                 case ISA_CDATA_BRBR:
518                     if (actChar == '>') {
519                         state = ISI_TEXT; //It s allowed only in content
520
offset++;
521                         return XMLTokenIDs.CDATA_SECTION;
522                     } else if (actChar == ']') {
523                         // stay in the same state
524
break;
525                     } else {
526                         state = ISI_CDATA;
527                         break;
528                     }
529                     
530                     
531                 case ISA_SGML_DASH: // DONE
532
switch( actChar ) {
533                         case '-':
534                             state = ISI_XML_COMMENT;
535                             break;
536                         default:
537                             state=ISI_ERROR;
538                             continue;
539                     }
540                     break;
541                     
542                 case ISI_XML_COMMENT: // DONE
543
switch( actChar ) {
544                         case '-':
545                             state = ISA_XML_COMMENT_DASH;
546                             break;
547                         //create an XML comment token for each line of the comment - a workaround fix for performance bug #39446
548
//this also causes a SyntaxtElement to be created for each line of the comment - see XMLSyntaxSupport.createElement:277
549
//PENDING - this code can be removed after editor solve it somehow in their code
550
case '\n':
551                             offset++;
552                             //leave the some state - we are still in an XML comment,
553
//we just need to create a token for each line.
554
return XMLDefaultTokenContext.BLOCK_COMMENT;
555                     }
556                     break;
557                     
558                 case ISA_XML_COMMENT_DASH:
559                     switch( actChar ) {
560                         case '-':
561                             state = ISI_XML_COMMENT_WS;
562                             break;
563                         default:
564                             state = ISI_XML_COMMENT;
565                             continue;
566                     }
567                     break;
568                     
569                 case ISI_XML_COMMENT_WS: // DONE
570
if( isWS( actChar ) ) break; // Consume all WS
571
switch( actChar ) {
572                         case '>':
573                             offset++;
574                             state = INIT;
575                             return XMLDefaultTokenContext.BLOCK_COMMENT;
576                         default:
577                             state = ISI_ERROR;
578                             return XMLDefaultTokenContext.BLOCK_COMMENT;
579                     }
580                     
581                 case ISP_DECL_STRING:
582                     if (actChar != '"') throw new IllegalStateException JavaDoc("Unexpected " + actChar);
583                     state = ISI_DECL_STRING;
584                     break;
585                     
586                 case ISI_DECL_STRING:
587                     if ( actChar == '"') {
588                             state = ISI_SGML_DECL;
589                             offset++;
590                             return XMLDefaultTokenContext.VALUE;
591                     }
592                     break;
593
594                 case ISP_DECL_CHARS:
595                     if (actChar != '\'') throw new IllegalStateException JavaDoc("Unexpected " + actChar);
596                     state = ISI_DECL_CHARS;
597                     break;
598                     
599                 case ISI_DECL_CHARS:
600                     if ( actChar == '\'') {
601                             state = ISI_SGML_DECL;
602                             offset++;
603                             return XMLDefaultTokenContext.VALUE;
604                     }
605                     break;
606                     
607                 case ISI_SGML_DECL:
608                     switch( actChar ) {
609                         case '"':
610                             state = ISP_DECL_STRING;
611                             return XMLDefaultTokenContext.DECLARATION;
612                         case '\'':
613                             state = ISP_DECL_CHARS;
614                             return XMLDefaultTokenContext.DECLARATION;
615                         case '[':
616                             offset++;
617                             state = INIT;
618                             enterInternalDTD();
619                             return XMLDefaultTokenContext.DECLARATION;
620                         case '>':
621                             offset++;
622                             state = INIT;
623                             return XMLDefaultTokenContext.DECLARATION;
624                     }
625                     break;
626
627                 case ISA_INIT_BR:
628                     if (isWS(actChar)) break;
629                     if (actChar == '>') {
630                         offset++;
631                         state = INIT;
632                         leaveInternalDTD();
633                         return XMLDefaultTokenContext.DECLARATION;
634                     } else {
635                         state = INIT;
636                         return XMLDefaultTokenContext.ERROR;
637                     }
638                     
639                 case ISA_SGML_DECL_DASH:
640                     if( actChar == '-' ) {
641                         state = ISI_ERROR;
642                         break;
643                     } else {
644                         if(isWS(actChar)){
645                             state = ISI_ERROR;
646                             continue;
647                         } else {
648                             state = ISI_SGML_DECL;
649                             continue;
650                         }
651                     }
652                     
653                 case ISA_REF:
654                     if( UnicodeClasses.isXMLNameStartChar( actChar ) ) {
655                         state = ISI_REF_NAME;
656                         break;
657                     }
658                     if( actChar == '#') {
659                         state = ISA_REF_HASH;
660                         break;
661                     }
662                     state = subState;
663                     continue;
664                     
665                 case ISI_REF_NAME:
666                     if( UnicodeClasses.isXMLNameChar( actChar ) ) break;
667                     if( actChar == ';' ) offset++;
668                     state = subState;
669                     return XMLDefaultTokenContext.CHARACTER;
670                     
671                 case ISA_REF_HASH:
672                     if( actChar >= '0' && actChar <= '9' ) {
673                         state = ISI_REF_DEC;
674                         break;
675                     }
676                     if( actChar == 'x' || actChar == 'X' ) {
677                         state = ISA_REF_X;
678                         break;
679                     }
680                     if( isAZ( actChar ) ) {
681                         offset++;
682                         state = subState;
683                         return XMLDefaultTokenContext.ERROR;
684                     }
685                     state = subState;
686                     continue;
687                     
688                 case ISI_REF_DEC:
689                     if( actChar >= '0' && actChar <= '9' ) break;
690                     if( actChar == ';' ) offset++;
691                     state = subState;
692                     return XMLDefaultTokenContext.CHARACTER;
693                     
694                 case ISA_REF_X:
695                     if (isHex(actChar)) {
696                         state = ISI_REF_HEX;
697                         break;
698                     }
699                     state = subState;
700                     return XMLDefaultTokenContext.ERROR; // error on previous "&#x" sequence
701

702                 case ISI_REF_HEX:
703                     if (isHex(actChar)) break;
704                     if (actChar == ';' ) offset++;
705                     state = subState;
706                     return XMLDefaultTokenContext.CHARACTER;
707             }
708             
709             
710             offset++;
711         } // end of while(offset...)
712

713         /** At this stage there's no more text in the scanned buffer.
714          * Scanner first checks whether this is completely the last
715          * available buffer.
716          */

717         if( lastBuffer ) {
718             switch( state ) {
719                 case INIT:
720                 case ISI_TEXT:
721                 case ISA_LT:
722                 case ISA_SLASH:
723                 case ISA_SGML_ESCAPE:
724                 case ISA_SGML_DASH:
725                     return XMLDefaultTokenContext.TEXT;
726                     
727                 case ISA_REF:
728                 case ISA_REF_HASH:
729                     if( subState == ISI_TEXT ) return XMLDefaultTokenContext.TEXT;
730                     else return XMLDefaultTokenContext.VALUE;
731                     
732                 case ISI_XML_COMMENT:
733                 case ISA_XML_COMMENT_DASH:
734                 case ISI_XML_COMMENT_WS:
735                     return XMLDefaultTokenContext.BLOCK_COMMENT;
736                     
737                 case ISI_TAG:
738                 case ISI_ENDTAG:
739                     return XMLDefaultTokenContext.TAG;
740                     
741                 case ISI_ARG:
742                     return XMLDefaultTokenContext.ARGUMENT;
743                     
744                 case ISI_ERROR:
745                     return XMLDefaultTokenContext.ERROR;
746                     
747                 case ISP_ARG_WS:
748                 case ISP_TAG_WS:
749                 case ISP_ENDTAG_WS:
750                 case ISP_EQ_WS:
751                     return XMLDefaultTokenContext.WS;
752                     
753                 case ISP_ARG_X:
754                 case ISP_TAG_X:
755                 case ISP_ENDTAG_X:
756                 case ISP_EQ:
757                     return XMLDefaultTokenContext.WS;
758                     
759                 case ISI_VAL_APOS:
760                 case ISI_VAL_QUOT:
761                 case ISI_DECL_CHARS:
762                 case ISI_DECL_STRING:
763                     return XMLDefaultTokenContext.VALUE;
764                     
765                 case ISI_SGML_DECL:
766                 case ISA_SGML_DECL_DASH:
767                 case ISP_DECL_STRING:
768                 case ISP_DECL_CHARS:
769                     return XMLDefaultTokenContext.DECLARATION;
770                     
771                 case ISI_REF_NAME:
772                 case ISI_REF_DEC:
773                 case ISA_REF_X:
774                 case ISI_REF_HEX:
775                     return XMLDefaultTokenContext.CHARACTER;
776                     
777                 case ISI_PI:
778                     return XMLDefaultTokenContext.PI_START;
779                 case ISI_PI_TARGET:
780                     return XMLDefaultTokenContext.PI_TARGET;
781                 case ISP_PI_TARGET_WS:
782                     return XMLDefaultTokenContext.WS;
783                 case ISI_PI_CONTENT:
784                     return XMLDefaultTokenContext.PI_CONTENT;
785                 case ISA_PI_CONTENT_QMARK:
786                 case ISP_PI_CONTENT_QMARK:
787                     // we are at end of the last buffer and expect that next char will be '>'
788
return XMLDefaultTokenContext.PI_END;
789
790                 case ISA_LTEXBR:
791                 case ISA_LTEXBRC:
792                 case ISA_LTEXBRCD:
793                 case ISA_LTEXBRCDA:
794                 case ISA_LTEXBRCDAT:
795                 case ISA_LTEXBRCDATA:
796                     return XMLDefaultTokenContext.TEXT;
797
798                 case ISI_CDATA:
799                 case ISA_CDATA_BR:
800                 case ISA_CDATA_BRBR:
801                     return XMLTokenIDs.CDATA_SECTION;
802
803                 case ISA_INIT_BR:
804                     return XMLDefaultTokenContext.TEXT;
805                     
806                 default:
807                     throw new IllegalStateException JavaDoc("Last buffer does not handle state " + state + "!"); //NOI18N
808
}
809         }
810         
811         return null; // ask for next buffer
812

813     }
814     
815     public String JavaDoc getStateName(int stateNumber) {
816         switch(stateNumber) {
817             case INIT:
818                 return "INIT"; // NOI18N
819
case ISI_TEXT:
820                 return "ISI_TEXT"; // NOI18N
821
case ISA_LT:
822                 return "ISA_LT"; // NOI18N
823
case ISA_SLASH:
824                 return "ISA_SLASH"; // NOI18N
825
case ISA_SGML_ESCAPE:
826                 return "ISA_SGML_ESCAPE"; // NOI18N
827
case ISA_SGML_DASH:
828                 return "ISA_SGML_DASH"; // NOI18N
829
case ISI_XML_COMMENT:
830                 return "ISI_XML_COMMENT";// NOI18N
831
case ISA_XML_COMMENT_DASH:
832                 return "ISA_XML_COMMENT_DASH";// NOI18N
833
case ISI_XML_COMMENT_WS:
834                 return "ISI_XML_COMMENT_WS";// NOI18N
835
case ISI_TAG:
836                 return "ISI_TAG";// NOI18N
837
case ISI_ENDTAG:
838                 return "ISI_ENDTAG";// NOI18N
839
case ISI_ARG:
840                 return "ISI_ARG";// NOI18N
841
case ISI_ERROR:
842                 return "ISI_ERROR";// NOI18N
843
case ISP_ARG_WS:
844                 return "ISP_ARG_WS";// NOI18N
845
case ISP_TAG_WS:
846                 return "ISP_TAG_WS";// NOI18N
847
case ISP_ENDTAG_WS:
848                 return "ISP_ENDTAG_WS";// NOI18N
849
case ISP_ARG_X:
850                 return "ISP_ARG_X";// NOI18N
851
case ISP_TAG_X:
852                 return "ISP_TAG_X";// NOI18N
853
case ISP_ENDTAG_X:
854                 return "ISP_ENDTAG_X";// NOI18N
855
case ISP_EQ:
856                 return "ISP_EQ";// NOI18N
857
case ISI_VAL_APOS:
858                 return "ISI_VAL_APOS";// NOI18N
859
case ISI_VAL_QUOT:
860                 return "ISI_VAL_QUOT";// NOI18N
861
case ISI_SGML_DECL:
862                 return "ISI_SGML_DECL";// NOI18N
863
case ISA_SGML_DECL_DASH:
864                 return "ISA_SGML_DECL_DASH";// NOI18N
865
// case ISI_SGML_COMMENT:
866
// return "ISI_SGML_COMMENT";// NOI18N
867
// case ISA_SGML_COMMENT_DASH:
868
// return "ISA_SGML_COMMENT_DASH";// NOI18N
869
case ISA_REF:
870                 return "ISA_REF";// NOI18N
871
case ISI_REF_NAME:
872                 return "ISI_REF_NAME";// NOI18N
873
case ISA_REF_HASH:
874                 return "ISA_REF_HASH";// NOI18N
875
case ISI_REF_DEC:
876                 return "ISI_REF_DEC";// NOI18N
877
case ISA_REF_X:
878                 return "ISA_REF_X";// NOI18N
879
case ISI_REF_HEX:
880                 return "ISI_REF_HEX";// NOI18N
881
case ISI_PI:
882                 return "ISI_PI"; // NOI18N
883
case ISI_PI_TARGET:
884                 return "ISI_PI_TARGET";// NOI18N
885
case ISP_PI_TARGET_WS:
886                 return "ISP_PI_TARGET_WS";// NOI18N
887
case ISI_PI_CONTENT:
888                 return "ISI_PI_CONTENT";// NOI18N
889
case ISA_PI_CONTENT_QMARK:
890                 return "ISA_PI_CONTENT_QMARK";// NOI18N
891
case ISP_PI_CONTENT_QMARK:
892                 return "ISP_PI_CONTENT_QMARK";// NOI18N
893

894             case ISI_DECL_CHARS:
895                 return "ISI_DECL_CHARS";
896             case ISI_DECL_STRING:
897                 return "ISI_DECL_STRING";
898             case ISP_DECL_CHARS:
899                 return "ISP_DECL_CHARS";
900             case ISP_DECL_STRING:
901                 return "ISP_DECL_STRING";
902             case ISA_INIT_BR:
903                 return "ISA_INIT_BR";
904             default:
905                 return super.getStateName(stateNumber);
906         }
907     }
908     
909     /** Load valid mark state into the analyzer. Offsets
910      * are already initialized when this method is called. This method
911      * must get the state from the mark and set it to the analyzer. Then
912      * it must decrease tokenOffset by the preScan stored in the mark state.
913      * @param markState mark state to be loaded into syntax. It must be non-null value.
914      */

915     public void loadState(StateInfo stateInfo) {
916         super.loadState( stateInfo );
917         subState = ((XMLStateInfo)stateInfo).getSubState();
918         subInternalDTD = ((XMLStateInfo)stateInfo).isInternalDTD();
919     }
920     
921     /** Store state of this analyzer into given mark state. */
922     public void storeState(StateInfo stateInfo) {
923         super.storeState( stateInfo );
924         ((XMLStateInfo)stateInfo).setSubState( subState );
925         ((XMLStateInfo)stateInfo).setInternalDTD( subInternalDTD );
926     }
927     
928     /** Compare state of this analyzer to given state info */
929     public int compareState(StateInfo stateInfo) {
930         if( super.compareState( stateInfo ) == DIFFERENT_STATE ) return DIFFERENT_STATE;
931         return ( ((XMLStateInfo)stateInfo).getSubState() == subState
932             && ((XMLStateInfo)stateInfo).isInternalDTD() == subInternalDTD)
933             ? EQUAL_STATE : DIFFERENT_STATE;
934     }
935     
936     /** Create state info appropriate for particular analyzer */
937     public StateInfo createStateInfo() {
938         return new XMLStateInfo();
939     }
940
941     // ~~~~~~~~~~~~~~~~~~~~~ utility methods ~~~~~~~~~~~~~~~~~~~~~~~~
942

943     private boolean isAZ( char ch ) {
944         return( (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') );
945     }
946
947     private boolean isHex( char ch) {
948         return (ch >= '0' && ch <= '9') || isAF(ch);
949     }
950
951     private boolean isAF( char ch ) {
952         return( (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') );
953     }
954     
955     private boolean isName( char ch ) {
956         return( (ch >= 'a' && ch <= 'z') ||
957         (ch >= 'A' && ch <= 'Z') ||
958         (ch >= '0' && ch <= '9') ||
959         ch == '-' || ch == '_' || ch == '.' || ch == ':' );
960         
961     }
962
963     private void enterInternalDTD() {
964         subInternalDTD = true;
965     }
966
967     private void leaveInternalDTD() {
968         subInternalDTD = false;
969     }
970     
971     private boolean isInternalDTD() {
972         return subInternalDTD;
973     }
974     
975     /**
976      * Resolves if given char is whitespace in terms of XML 1.0 specs
977      * According to specs, following characters are treated as whitespace:
978      * Space - <CODE>' '</CODE>, Tab - <CODE>' '</CODE>,
979      * Formfeed - <CODE>' '</CODE>,Zero-width space - <CODE>'?'</CODE>,
980      * Carriage return - <CODE>'
981 '</CODE> and Line feed - <CODE>'
982 '</CODE>
983      * CR's are included for completenes only, they should never appear in document
984      */

985     
986     private boolean isWS( char ch ) {
987         return ( ch == '\u0020' || ch == '\u0009' || ch == '\u000c'
988         /*|| ch == '\u200b'*/ || ch == '\n' || ch == '\r' );
989     }
990     
991     
992     /** Base implementation of the StateInfo interface */
993     public static class XMLStateInfo extends Syntax.BaseStateInfo {
994         
995         /** analyzer subState during parsing character references */
996         private int subState;
997         private boolean subInternalDTD;
998         
999         public int getSubState() {
1000            return subState;
1001        }
1002        
1003        public void setSubState(int subState) {
1004            this.subState = subState;
1005        }
1006        
1007        public boolean isInternalDTD() {
1008            return subInternalDTD;
1009        }
1010        
1011        public void setInternalDTD(boolean val) {
1012            subInternalDTD = val;
1013        }
1014        
1015        public String JavaDoc toString(Syntax syntax) {
1016            return super.toString(syntax)
1017                + ", subState=" + syntax.getStateName(getSubState()) // NOI18N
1018
+ ", inDTD=" + subState; // NOI18N
1019
}
1020        
1021    }
1022}
1023
1024
Popular Tags