KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > xml > fastinfoset > DecoderStateTables


1 /*
2  * Fast Infoset ver. 0.1 software ("Software")
3  *
4  * Copyright, 2004-2005 Sun Microsystems, Inc. All Rights Reserved.
5  *
6  * Software is licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License. You may
8  * obtain a copy of the License at:
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15  * License for the specific language governing permissions and limitations.
16  *
17  * Sun supports and benefits from the global community of open source
18  * developers, and thanks the community for its important contributions and
19  * open standards-based technology, which Sun has adopted into many of its
20  * products.
21  *
22  * Please note that portions of Software may be provided with notices and
23  * open source licenses from such communities and third parties that govern the
24  * use of those portions, and any licenses granted hereunder do not alter any
25  * rights and obligations you may have under such open source licenses,
26  * however, the disclaimer of warranty and limitation of liability provisions
27  * in this License will apply to all Software in this distribution.
28  *
29  * You acknowledge that the Software is not designed, licensed or intended
30  * for use in the design, construction, operation or maintenance of any nuclear
31  * facility.
32  *
33  * Apache License
34  * Version 2.0, January 2004
35  * http://www.apache.org/licenses/
36  *
37  */

38
39
40 package com.sun.xml.fastinfoset;
41
42 public class DecoderStateTables {
43     private static int RANGE_INDEX_END = 0;
44     private static int RANGE_INDEX_VALUE = 1;
45
46     public final static int STATE_ILLEGAL = 255;
47     public final static int STATE_UNSUPPORTED = 254;
48
49     // EII child states
50
public final static int EII_NO_AIIS_INDEX_SMALL = 0;
51     public final static int EII_AIIS_INDEX_SMALL = 1;
52     public final static int EII_INDEX_MEDIUM = 2;
53     public final static int EII_INDEX_LARGE = 3;
54     public final static int EII_NAMESPACES = 4;
55     public final static int EII_LITERAL = 5;
56     public final static int CII_UTF8_SMALL_LENGTH = 6;
57     public final static int CII_UTF8_MEDIUM_LENGTH = 7;
58     public final static int CII_UTF8_LARGE_LENGTH = 8;
59     public final static int CII_UTF16_SMALL_LENGTH = 9;
60     public final static int CII_UTF16_MEDIUM_LENGTH = 10;
61     public final static int CII_UTF16_LARGE_LENGTH = 11;
62     public final static int CII_RA = 12;
63     public final static int CII_EA = 13;
64     public final static int CII_INDEX_SMALL = 14;
65     public final static int CII_INDEX_MEDIUM = 15;
66     public final static int CII_INDEX_LARGE = 16;
67     public final static int CII_INDEX_LARGE_LARGE = 17;
68     public final static int COMMENT_II = 18;
69     public final static int PROCESSING_INSTRUCTION_II = 19;
70     public final static int DOCUMENT_TYPE_DECLARATION_II = 20;
71     public final static int UNEXPANDED_ENTITY_REFERENCE_II = 21;
72     public final static int TERMINATOR_SINGLE = 22;
73     public final static int TERMINATOR_DOUBLE = 23;
74
75     public static final int[] DII = new int[256];
76     
77     private static int[][] DII_RANGES = {
78         // EII
79

80         // %00000000 to %00011111 EII no attributes small index
81
{ 0x1F, EII_NO_AIIS_INDEX_SMALL },
82
83         // %00100000 to %00100111 EII medium index
84
{ 0x27, EII_INDEX_MEDIUM },
85
86         // %00101000 to %00101111 EII large index
87
// %00110000 EII very large index
88
// %00101000 to %00110000
89
{ 0x30, EII_INDEX_LARGE },
90
91         // %00110001 to %00110111 ILLEGAL
92
{ 0x37, STATE_ILLEGAL },
93         
94         // %00111000 EII namespaces
95
{ 0x38, EII_NAMESPACES },
96
97         // %00111001 to %00111011 ILLEGAL
98
{ 0x3B, STATE_ILLEGAL },
99
100         // %00111100 EII literal (no prefix, no namespace)
101
{ 0x3C, EII_LITERAL },
102
103         // %00111101 EII literal (no prefix, namespace)
104
{ 0x3D, EII_LITERAL },
105
106         // %00111110 ILLEGAL
107
{ 0x3E, STATE_ILLEGAL },
108
109         // %00111111 EII literal (prefix, namespace)
110
{ 0x3F, EII_LITERAL },
111
112         // %01000000 to %01011111 EII attributes small index
113
{ 0x5F, EII_AIIS_INDEX_SMALL },
114
115         // %01100000 to %01100111 EII medium index
116
{ 0x67, EII_INDEX_MEDIUM },
117         
118         // %01101000 to %01101111 EII large index
119
// %01110000 EII very large index
120
// %01101000 to %01110000
121
{ 0x70, EII_INDEX_LARGE },
122         
123         // %01110001 to %01110111 ILLEGAL
124
{ 0x77, STATE_ILLEGAL },
125         
126         // %01111000 EII attributes namespaces
127
{ 0x78, EII_NAMESPACES },
128
129         // %01111001 to %01111011 ILLEGAL
130
{ 0x7B, STATE_ILLEGAL },
131
132         // %01111100 EII attributes literal (no prefix, no namespace)
133
{ 0x7C, EII_LITERAL },
134
135         // %01111101 EII attributes literal (no prefix, namespace)
136
{ 0x7D, EII_LITERAL },
137
138         // %01111110 ILLEGAL
139
{ 0x7E, STATE_ILLEGAL },
140
141         // %01111111 EII attributes literal (prefix, namespace)
142
{ 0x7F, EII_LITERAL },
143                                 
144         // %10000000 to %11000011
145
{ 0xC3, STATE_ILLEGAL },
146                 
147         // %11000100 to %11000111
148
{ 0xC7, DOCUMENT_TYPE_DECLARATION_II },
149                 
150         // %11001000 to %1110000
151
{ 0xE0, STATE_ILLEGAL },
152         
153         // %11100001 processing instruction
154
{ 0xE1, PROCESSING_INSTRUCTION_II },
155
156         // %11100010 comment
157
{ 0xE2, COMMENT_II},
158
159         // %111000011 to %11101111
160
{ 0xEF, STATE_ILLEGAL },
161         
162         // Terminators
163

164         // %11110000 single terminator
165
{ 0xF0, TERMINATOR_SINGLE },
166
167         // %11110000 to %11111110 ILLEGAL
168
{ 0xFE, STATE_ILLEGAL },
169
170         // %11111111 double terminator
171
{ 0xFF, TERMINATOR_DOUBLE }
172     };
173     
174     public static final int[] EII = new int[256];
175     
176     private static int[][] EII_RANGES = {
177         // EII
178

179         // %00000000 to %00011111 EII no attributes small index
180
{ 0x1F, EII_NO_AIIS_INDEX_SMALL },
181
182         // %00100000 to %00100111 EII medium index
183
{ 0x27, EII_INDEX_MEDIUM },
184
185         // %00101000 to %00101111 EII large index
186
// %00110000 EII very large index
187
// %00101000 to %00110000
188
{ 0x30, EII_INDEX_LARGE },
189
190         // %00110001 to %00110111 ILLEGAL
191
{ 0x37, STATE_ILLEGAL },
192         
193         // %00111000 EII namespaces
194
{ 0x38, EII_NAMESPACES },
195
196         // %00111001 to %00111011 ILLEGAL
197
{ 0x3B, STATE_ILLEGAL },
198
199         // %00111100 EII literal (no prefix, no namespace)
200
{ 0x3C, EII_LITERAL },
201
202         // %00111101 EII literal (no prefix, namespace)
203
{ 0x3D, EII_LITERAL },
204
205         // %00111110 ILLEGAL
206
{ 0x3E, STATE_ILLEGAL },
207
208         // %00111111 EII literal (prefix, namespace)
209
{ 0x3F, EII_LITERAL },
210
211         // %01000000 to %01011111 EII attributes small index
212
{ 0x5F, EII_AIIS_INDEX_SMALL },
213
214         // %01100000 to %01100111 EII medium index
215
{ 0x67, EII_INDEX_MEDIUM },
216         
217         // %01101000 to %01101111 EII large index
218
// %01110000 EII very large index
219
// %01101000 to %01110000
220
{ 0x70, EII_INDEX_LARGE },
221         
222         // %01110001 to %01110111 ILLEGAL
223
{ 0x77, STATE_ILLEGAL },
224         
225         // %01111000 EII attributes namespaces
226
{ 0x78, EII_NAMESPACES },
227
228         // %01111001 to %01111011 ILLEGAL
229
{ 0x7B, STATE_ILLEGAL },
230
231         // %01111100 EII attributes literal (no prefix, no namespace)
232
{ 0x7C, EII_LITERAL },
233
234         // %01111101 EII attributes literal (no prefix, namespace)
235
{ 0x7D, EII_LITERAL },
236
237         // %01111110 ILLEGAL
238
{ 0x7E, STATE_ILLEGAL },
239
240         // %01111111 EII attributes literal (prefix, namespace)
241
{ 0x7F, EII_LITERAL },
242         
243         // CII
244

245         // UTF-8 string
246

247         // %10000000 to %10000001 CII UTF-8 no add to table small length
248
{ 0x81, CII_UTF8_SMALL_LENGTH },
249
250         // %10000010 CII UTF-8 no add to table medium length
251
{ 0x82, CII_UTF8_MEDIUM_LENGTH },
252
253         // %10000011 CII UTF-8 no add to table large length
254
{ 0x83, CII_UTF8_LARGE_LENGTH },
255
256         // UTF-16 string
257

258         // %10000100 to %10000101 CII UTF-16 no add to table small length
259
{ 0x85, CII_UTF16_SMALL_LENGTH },
260
261         // %10000110 CII UTF-16 no add to table medium length
262
{ 0x86, CII_UTF16_MEDIUM_LENGTH },
263
264         // %10000111 CII UTF-16 no add to table large length
265
{ 0x87, CII_UTF16_LARGE_LENGTH },
266
267         // Resitricted alphabet
268

269         // %10001000 to %10001011 CII RA no add to table
270
{ 0x8B, CII_RA },
271
272         // Encoding algorithm
273

274         // %10001100 to %10001111 CII EA no add to table
275
{ 0x8F, CII_EA },
276
277         // UTF-8 string, add to table
278

279         // %10010000 to %10010001 CII add to table small length
280
{ 0x91, CII_UTF8_SMALL_LENGTH },
281
282         // %10010010 CII add to table medium length
283
{ 0x92, CII_UTF8_MEDIUM_LENGTH },
284
285         // %10010011 CII add to table large length
286
{ 0x93, CII_UTF8_LARGE_LENGTH },
287         
288         // UTF-16 string, add to table
289

290         // %10010100 to %10010101 CII UTF-16 add to table small length
291
{ 0x95, CII_UTF16_SMALL_LENGTH },
292
293         // %10010110 CII UTF-16 add to table medium length
294
{ 0x96, CII_UTF16_MEDIUM_LENGTH },
295
296         // %10010111 CII UTF-16 add to table large length
297
{ 0x97, CII_UTF16_LARGE_LENGTH },
298
299         // Restricted alphabet, add to table
300

301         // %10011000 to %10011011 CII RA add to table
302
{ 0x9B, CII_RA },
303
304         // Encoding algorithm, add to table
305

306         // %10011100 to %10011111 CII EA add to table
307
{ 0x9F, CII_EA },
308         
309         // Index
310

311         // %10100000 to %10101111 CII small index
312
{ 0xAF, CII_INDEX_SMALL },
313         
314         // %10110000 to %10110011 CII medium index
315
{ 0xB3, CII_INDEX_MEDIUM },
316
317         // %10110100 to %10110111 CII large index
318
{ 0xB7, CII_INDEX_LARGE },
319
320         // %10111000 CII very large index
321
{ 0xB8, CII_INDEX_LARGE_LARGE },
322                 
323         // %10111001 to %11000111 ILLEGAL
324
{ 0xC7, STATE_ILLEGAL },
325
326         // %11001000 to %11001011
327
{ 0xCB, UNEXPANDED_ENTITY_REFERENCE_II },
328                 
329         // %11001100 to %11100000 ILLEGAL
330
{ 0xE0, STATE_ILLEGAL },
331         
332         // %11100001 processing instruction
333
{ 0xE1, PROCESSING_INSTRUCTION_II },
334
335         // %11100010 comment
336
{ 0xE2, COMMENT_II},
337
338         // %111000011 to %11101111
339
{ 0xEF, STATE_ILLEGAL },
340         
341         // Terminators
342

343         // %11110000 single terminator
344
{ 0xF0, TERMINATOR_SINGLE },
345
346         // %11110000 to %11111110 ILLEGAL
347
{ 0xFE, STATE_ILLEGAL },
348
349         // %11111111 double terminator
350
{ 0xFF, TERMINATOR_DOUBLE }
351     };
352
353     
354     // AII states
355
public final static int AII_INDEX_SMALL = 0;
356     public final static int AII_INDEX_MEDIUM = 1;
357     public final static int AII_INDEX_LARGE = 2;
358     public final static int AII_LITERAL = 3;
359     public final static int AII_TERMINATOR_SINGLE = 4;
360     public final static int AII_TERMINATOR_DOUBLE = 5;
361
362     public static final int[] AII = new int[256];
363
364     private static int[][] AII_RANGES = {
365         // %00000000 to %00111111 AII small index
366
{ 0x3F, AII_INDEX_SMALL },
367
368         // %01000000 to %01011111 AII medium index
369
{ 0x5F, AII_INDEX_MEDIUM },
370         
371         // %01100000 to %01101111 AII large index
372
{ 0x6F, AII_INDEX_LARGE },
373
374         // %01110000 to %01110111 ILLEGAL
375
{ 0x77, STATE_ILLEGAL },
376
377         // %01111000 AII literal (no prefix, no namespace)
378
// %01111001 AII literal (no prefix, namespace)
379
{ 0x79, AII_LITERAL },
380         
381         // %01111010 ILLEGAL
382
{ 0x7A, STATE_ILLEGAL },
383         
384         // %01111011 AII literal (prefix, namespace)
385
{ 0x7B, AII_LITERAL },
386         
387         // %10000000 to %11101111 ILLEGAL
388
{ 0xEF, STATE_ILLEGAL },
389
390         // Terminators
391

392         // %11110000 single terminator
393
{ 0xF0, AII_TERMINATOR_SINGLE },
394
395         // %11110000 to %11111110 ILLEGAL
396
{ 0xFE, STATE_ILLEGAL },
397
398         // %11111111 double terminator
399
{ 0xFF, AII_TERMINATOR_DOUBLE }
400     };
401     
402     
403     // AII value states
404
public final static int NISTRING_UTF8_SMALL_LENGTH = 0;
405     public final static int NISTRING_UTF8_MEDIUM_LENGTH = 1;
406     public final static int NISTRING_UTF8_LARGE_LENGTH = 2;
407     public final static int NISTRING_UTF16_SMALL_LENGTH = 3;
408     public final static int NISTRING_UTF16_MEDIUM_LENGTH = 4;
409     public final static int NISTRING_UTF16_LARGE_LENGTH = 5;
410     public final static int NISTRING_RA = 6;
411     public final static int NISTRING_EA = 7;
412     public final static int NISTRING_INDEX_SMALL = 8;
413     public final static int NISTRING_INDEX_MEDIUM = 9;
414     public final static int NISTRING_INDEX_LARGE = 10;
415     public final static int NISTRING_EMPTY = 11;
416
417     public static final int[] NISTRING = new int[256];
418
419     private static int[][] NISTRING_RANGES = {
420         // UTF-8 string
421

422         // %00000000 to %00000111 UTF-8 no add to table small length
423
{ 0x07, NISTRING_UTF8_SMALL_LENGTH },
424         
425         // %00001000 UTF-8 no add to table medium length
426
{ 0x08, NISTRING_UTF8_MEDIUM_LENGTH },
427
428         // %00001001 to %00001011 ILLEGAL
429
{ 0x0B, STATE_ILLEGAL },
430         
431         // %00001100 UTF-8 no add to table large length
432
{ 0x0C, NISTRING_UTF8_LARGE_LENGTH },
433         
434         // %00001101 to %00001111 ILLEGAL
435
{ 0x0F, STATE_ILLEGAL },
436         
437         // UTF-16 string
438

439         // %00010000 to %00010111 UTF-16 no add to table small length
440
{ 0x17, NISTRING_UTF16_SMALL_LENGTH },
441         
442         // %00001000 UTF-16 no add to table medium length
443
{ 0x18, NISTRING_UTF16_MEDIUM_LENGTH },
444
445         // %00011001 to %00011011 ILLEGAL
446
{ 0x1B, STATE_ILLEGAL },
447         
448         // %00011100 UTF-16 no add to table large length
449
{ 0x1C, NISTRING_UTF16_LARGE_LENGTH },
450         
451         // %00011101 to %00011111 ILLEGAL
452
{ 0x1F, STATE_ILLEGAL },
453         
454         // Restricted alphabet
455

456         // %00100000 to %00101111 RA no add to table small length
457
{ 0x2F, NISTRING_RA },
458
459         // Encoding algorithm
460

461         // %00110000 to %00111111 EA no add to table
462
{ 0x3F, NISTRING_EA },
463
464         // UTF-8 string, add to table
465

466         // %01000000 to %01000111 UTF-8 add to table small length
467
{ 0x47, NISTRING_UTF8_SMALL_LENGTH },
468         
469         // %01001000 UTF-8 add to table medium length
470
{ 0x48, NISTRING_UTF8_MEDIUM_LENGTH },
471
472         // %01001001 to %01001011 ILLEGAL
473
{ 0x4B, STATE_ILLEGAL },
474         
475         // %01001100 UTF-8 add to table large length
476
{ 0x4C, NISTRING_UTF8_LARGE_LENGTH },
477         
478         // %01001101 to %01001111 ILLEGAL
479
{ 0x4F, STATE_ILLEGAL },
480         
481         // UTF-16 string, add to table
482

483         // %01010000 to %01010111 UTF-16 add to table small length
484
{ 0x57, NISTRING_UTF16_SMALL_LENGTH },
485         
486         // %01001000 UTF-16 add to table medium length
487
{ 0x58, NISTRING_UTF16_MEDIUM_LENGTH },
488
489         // %01011001 to %01011011 ILLEGAL
490
{ 0x5B, STATE_ILLEGAL },
491         
492         // %01011100 UTF-16 add to table large length
493
{ 0x5C, NISTRING_UTF16_LARGE_LENGTH },
494         
495         // %01011101 to %01011111 ILLEGAL
496
{ 0x5F, STATE_ILLEGAL },
497         
498         // Restricted alphabet, add to table
499

500         // %01100000 to %01101111 RA no add to table small length
501
{ 0x6F, NISTRING_RA },
502
503         // Encoding algorithm, add to table
504

505         // %01110000 to %01111111 EA add to table
506
{ 0x7F, NISTRING_EA },
507                         
508         // Index
509

510         // %10000000 to %10111111 index small
511
{ 0xBF, NISTRING_INDEX_SMALL },
512
513         // %11000000 to %11011111 index medium
514
{ 0xDF, NISTRING_INDEX_MEDIUM },
515
516         // %11100000 to %11101111 index large
517
{ 0xEF, NISTRING_INDEX_LARGE },
518
519         // %11110000 to %11111110 ILLEGAL
520
{ 0xFE, STATE_ILLEGAL },
521
522         // %11111111 Empty value
523
{ 0xFF, NISTRING_EMPTY },
524     };
525
526     
527     public final static int ISTRING_SMALL_LENGTH = 0;
528     public final static int ISTRING_MEDIUM_LENGTH = 1;
529     public final static int ISTRING_LARGE_LENGTH = 2;
530     public final static int ISTRING_INDEX_SMALL = 3;
531     public final static int ISTRING_INDEX_MEDIUM = 4;
532     public final static int ISTRING_INDEX_LARGE = 5;
533
534     public static final int[] ISTRING = new int[256];
535     
536     private static int[][] ISTRING_RANGES = {
537         // %00000000 to %00111111 small length
538
{ 0x3F, ISTRING_SMALL_LENGTH },
539
540         // %01000000 medium length
541
{ 0x40, ISTRING_MEDIUM_LENGTH },
542
543         // %01000001 to %01011111 ILLEGAL
544
{ 0x5F, STATE_ILLEGAL },
545
546         // %01100000 large length
547
{ 0x60, ISTRING_LARGE_LENGTH },
548
549         // %01100001 to %01111111 ILLEGAL
550
{ 0x7F, STATE_ILLEGAL },
551
552         // %10000000 to %10111111 index small
553
{ 0xBF, ISTRING_INDEX_SMALL },
554
555         // %11000000 to %11011111 index medium
556
{ 0xDF, ISTRING_INDEX_MEDIUM },
557
558         // %11100000 to %11101111 index large
559
{ 0xEF, ISTRING_INDEX_LARGE },
560
561         // %11110000 to %11111111 ILLEGAL
562
{ 0xFF, STATE_ILLEGAL },
563     };
564
565     
566     public final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3 = 6;
567     public final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5 = 7;
568     public final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29 = 8;
569     public final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36 = 9;
570     public final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10;
571     
572     public static final int[] ISTRING_PREFIX_NAMESPACE = new int[256];
573     
574     private static int[][] ISTRING_PREFIX_NAMESPACE_RANGES = {
575         // %00000000 to %00000001 small length
576
{ 0x01, ISTRING_SMALL_LENGTH },
577
578         // %00000010 small length
579
{ 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 },
580         
581         // %00000011 small length
582
{ 0x03, ISTRING_SMALL_LENGTH },
583                 
584         // %00000100 small length
585
{ 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 },
586
587         // %00011011 small length
588
{ 0x1B, ISTRING_SMALL_LENGTH },
589                 
590         // %00011100 small length
591
{ 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 },
592
593         // %00100010 small length
594
{ 0x22, ISTRING_SMALL_LENGTH },
595                                 
596         // %00100011 small length
597
{ 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 },
598                 
599         // %00000101 to %00111111 small length
600
{ 0x3F, ISTRING_SMALL_LENGTH },
601
602                 
603                 
604                 
605         // %01000000 medium length
606
{ 0x40, ISTRING_MEDIUM_LENGTH },
607
608         // %01000001 to %01011111 ILLEGAL
609
{ 0x5F, STATE_ILLEGAL },
610
611         // %01100000 large length
612
{ 0x60, ISTRING_LARGE_LENGTH },
613
614         // %01100001 to %01111111 ILLEGAL
615
{ 0x7F, STATE_ILLEGAL },
616
617         // %10000000 index small, 0
618
{ 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO },
619                 
620         // %10000000 to %10111111 index small
621
{ 0xBF, ISTRING_INDEX_SMALL },
622
623         // %11000000 to %11011111 index medium
624
{ 0xDF, ISTRING_INDEX_MEDIUM },
625
626         // %11100000 to %11101111 index large
627
{ 0xEF, ISTRING_INDEX_LARGE },
628
629         // %11110000 to %11111111 ILLEGAL
630
{ 0xFF, STATE_ILLEGAL },
631     };
632     
633     // UTF-8 states
634
public final static int UTF8_NCNAME_NCNAME = 0;
635     public final static int UTF8_NCNAME_NCNAME_CHAR = 1;
636     public final static int UTF8_TWO_BYTES = 2;
637     public final static int UTF8_THREE_BYTES = 3;
638     public final static int UTF8_FOUR_BYTES = 4;
639
640     public static final int[] UTF8_NCNAME = new int[256];
641     
642     private static int[][] UTF8_NCNAME_RANGES = {
643         
644         // Basic Latin
645

646         // %00000000 to %00101100
647
{ 0x2C, STATE_ILLEGAL },
648                 
649         // '-' '.'
650
// %%00101101 to %00101110 [#x002D-#x002E]
651
{ 0x2E, UTF8_NCNAME_NCNAME_CHAR },
652
653         // %00101111
654
{ 0x2F, STATE_ILLEGAL },
655         
656         // [0-9]
657
// %0011000 to %00111001 [#x0030-#x0039]
658
{ 0x39, UTF8_NCNAME_NCNAME_CHAR },
659
660         // %01000000
661
{ 0x40, STATE_ILLEGAL },
662
663         // [A-Z]
664
// %01000001 to %01011010 [#x0041-#x005A]
665
{ 0x5A, UTF8_NCNAME_NCNAME },
666         
667         // %01011110
668
{ 0x5E, STATE_ILLEGAL },
669         
670         // '_'
671
// %01011111 [#x005F]
672
{ 0x5F, UTF8_NCNAME_NCNAME },
673                 
674         // %01100000
675
{ 0x60, STATE_ILLEGAL },
676               
677         // [a-z]
678
// %01100001 to %01111010 [#x0061-#x007A]
679
{ 0x7A, UTF8_NCNAME_NCNAME },
680                 
681         // %01111011 to %01111111
682
{ 0x7F, STATE_ILLEGAL },
683                 
684                 
685         // Two bytes
686

687         // %10000000 to %11000001
688
{ 0xC1, STATE_ILLEGAL },
689                 
690         // %11000010 to %11011111
691
{ 0xDF, UTF8_TWO_BYTES },
692
693                 
694         // Three bytes
695

696         // %11100000 to %11101111
697
{ 0xEF, UTF8_THREE_BYTES },
698
699                 
700         // Four bytes
701

702         // %11110000 to %11110111
703
{ 0xF7, UTF8_FOUR_BYTES },
704
705                 
706         // %11111000 to %11111111
707
{ 0xFF, STATE_ILLEGAL }
708     };
709
710     public final static int UTF8_ONE_BYTE = 1;
711     
712     public static final int[] UTF8 = new int[256];
713     
714     private static int[][] UTF8_RANGES = {
715         
716         // Basic Latin
717

718         // %00000000 to %00001000
719
{ 0x08, STATE_ILLEGAL },
720                 
721         // CHARACTER TABULATION, LINE FEED
722
// %%00001001 to %00001010 [#x0009-#x000A]
723
{ 0x0A, UTF8_ONE_BYTE },
724
725         // %00001011 to %00001100
726
{ 0x0C, STATE_ILLEGAL },
727         
728         // CARRIAGE RETURN
729
// %00001101 [#x000D]
730
{ 0x0D, UTF8_ONE_BYTE },
731
732         // %00001110 to %00011111
733
{ 0x1F, STATE_ILLEGAL },
734                 
735         // %0010000 to %01111111
736
{ 0x7F, UTF8_ONE_BYTE },
737                 
738                 
739         // Two bytes
740

741         // %10000000 to %11000001
742
{ 0xC1, STATE_ILLEGAL },
743                 
744         // %11000010 to %11011111
745
{ 0xDF, UTF8_TWO_BYTES },
746
747                 
748         // Three bytes
749

750         // %11100000 to %11101111
751
{ 0xEF, UTF8_THREE_BYTES },
752
753                 
754         // Four bytes
755

756         // %11110000 to %11110111
757
{ 0xF7, UTF8_FOUR_BYTES },
758
759                 
760         // %11111000 to %11111111
761
{ 0xFF, STATE_ILLEGAL }
762     };
763     
764     private static void constructTable(int[] table, int[][] ranges) {
765         int start = 0x00;
766         for (int range = 0; range < ranges.length; range++) {
767             int end = ranges[range][RANGE_INDEX_END];
768             int value = ranges[range][RANGE_INDEX_VALUE];
769             for (int i = start; i<= end; i++) {
770                 table[i] = value;
771             }
772             start = end + 1;
773         }
774     }
775
776     static {
777         // EII
778
constructTable(DII, DII_RANGES);
779
780         // EII
781
constructTable(EII, EII_RANGES);
782         
783         // AII
784
constructTable(AII, AII_RANGES);
785
786         // AII Value
787
constructTable(NISTRING, NISTRING_RANGES);
788
789         // Identifying string
790
constructTable(ISTRING, ISTRING_RANGES);
791
792         // Identifying string
793
constructTable(ISTRING_PREFIX_NAMESPACE, ISTRING_PREFIX_NAMESPACE_RANGES);
794         
795         // UTF-8 NCNAME states
796
constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES);
797
798         // UTF-8 states
799
constructTable(UTF8, UTF8_RANGES);
800     }
801     
802     private DecoderStateTables() {
803     }
804 }
805
Popular Tags