KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > fri > patterns > interpreter > parsergenerator > lexer > StandardLexerRules


1 package fri.patterns.interpreter.parsergenerator.lexer;
2
3 import fri.patterns.interpreter.parsergenerator.Token;
4
5 /**
6     Standard lexer rules are building blocks for lexers dealing with text input.
7     This class resolves nonterminals enclosed in `backquotes` within an EBNF,
8     e.g. `cstylecomment`.
9     <p>
10     Furthermore it provides methods to retrieve sets of rules describing certain standard
11     scan items like `number` or `identifier`. The resulting arrays can be built together
12     by <i>SyntaxUtil.catenizeRules(...)</i>.
13     <p>
14     This class provides rules for comments with an arbitrary start character or start/end sequence:
15     <ul>
16         <li>getCustomOneLineCommentRules(String startChar)</li> and
17         <li>getCustomMultiLineCommentRules(String startSeq, String endSeq)</li>.
18     </ul>
19     <p>
20     Example (CStyleCommentStrip):
21     <pre>
22     String [][] rules = {
23         { Token.TOKEN, "others" }, // define what we want to receive
24         { Token.TOKEN, "`stringdef`" }, // need this rule as string definitions could contain comments
25         { Token.IGNORED, "`cstylecomment`" },
26         { "others", "others", "other" },
27         { "others", "other" },
28         { "other", "`char`", Token.BUTNOT, "`cstylecomment`", Token.BUTNOT, "`stringdef`" },
29     };
30     Syntax syntax = new Syntax(rules);
31     SyntaxSeparation separation = new SyntaxSeparation(syntax);
32     LexerBuilder builder = new LexerBuilder(separation.getLexerSyntax(), separation.getIgnoredSymbols());
33     Lexer lexer = builder.getLexer();
34     </pre>
35
36     TODO: Refactor this class and make smaller units with better names.
37
38     @see fri.patterns.interpreter.parsergenerator.lexer.LexerBuilder
39     @author (c) 2002, Fritz Ritzberger
40 */

41
42 public abstract class StandardLexerRules
43 {
44     /**
45         Returns e.g. the Letter-Rules <i>getUnicodeLetterRules()</i> for id "letter".
46         Using this, one can write things like `identifier` in a Lexer specification text,
47         as LexerBuilder tries to resolve these words calling this method.
48         Possible values for id are:
49         <ul>
50             <li>char (all UNICODE characters)</li>
51             <li>newline</li>
52             <li>newlines</li>
53             <li>space</li>
54             <li>spaces</li>
55             <li>whitespace</li>
56             <li>whitespaces</li>
57             <li>letter</li>
58             <li>digit</li>
59             <li>digits</li>
60             <li>hexdigit</li>
61             <li>hexdigits (does NOT include preceeding "0x")</li>
62             <li>identifier</li>
63             <li>stringdef</li>
64             <li>chardef</li>
65             <li>bnf_chardef (differs as in BNF characters can be written as "020" instead of '\020')</li>
66             <li>ruleref (`lexerrule`)</li>
67             <li>quantifier (*+?)</li>
68             <li>cstylecomment</li>
69             <li>comment</li>
70             <li>shellstylecomment</li>
71             <li>octdigits</li>
72             <li>bindigits</li>
73             <li>number</li>
74             <li>float</li>
75             <li>integer</li>
76             <li>xmlchar</li>
77             <li>combiningchar</li>
78             <li>extenderchar</li>
79         </ul>
80     */

81     public static String JavaDoc [][] rulesForIdentifier(String JavaDoc id) {
82         //System.err.println("searching for syntax rules for nonterminal "+id);
83
if (id.equals("char"))
84             return getUnicodeCharRules();
85         if (id.equals("newline"))
86             return getNewlineRules();
87         if (id.equals("newlines"))
88             return getNewlinesRules();
89         if (id.equals("space"))
90             return getSpaceRules();
91         if (id.equals("spaces"))
92             return getSpacesRules();
93         if (id.equals("whitespace"))
94             return getWhitespaceRules();
95         if (id.equals("whitespaces"))
96             return getWhitespacesRules();
97         if (id.equals("letter"))
98             return getUnicodeLetterRules();
99         if (id.equals("digit"))
100             return getUnicodeDigitRules();
101         if (id.equals("digits"))
102             return getUnicodeDigitsRules();
103         if (id.equals("hexdigit"))
104             return getHexDigitRules();
105         if (id.equals("hexdigits"))
106             return getHexDigitsRules();
107         if (id.equals("octdigits"))
108             return getOctDigitsRules();
109         if (id.equals("bindigits"))
110             return getBinDigitsRules();
111         if (id.equals("number"))
112             return getNumberRules();
113         if (id.equals("integer"))
114             return getIntegerRules();
115         if (id.equals("float"))
116             return getFloatRules();
117         if (id.equals("identifier"))
118             return getUnicodeIdentifierRules();
119         if (id.equals("stringdef"))
120             return getUnicodeStringdefRules();
121         if (id.equals("chardef"))
122             return getUnicodeChardefRules();
123         if (id.equals("bnf_chardef"))
124             return getUnicodeBNFChardefRules();
125         if (id.equals("ruleref"))
126             return getRulerefRules();
127         if (id.equals("quantifier"))
128             return getQuantifierRules();
129         if (id.equals("comment"))
130             return getCommentRules();
131         if (id.equals("cstylecomment"))
132             return getCStyleCommentRules();
133         if (id.equals("shellstylecomment"))
134             return getShellStyleCommentRules();
135         if (id.equals("xmlchar"))
136             return getUnicodeXmlCharRules();
137         if (id.equals("combiningchar"))
138             return getUnicodeCombiningCharRules();
139         if (id.equals("extenderchar"))
140             return getUnicodeExtenderCharRules();
141         return null;
142     }
143
144
145     /**
146         Returns rules for a custom comment (like C-style "//", but with passed start sequence).
147         @param nonterminalName name of comment to be used within syntax, e.g. "basicComment".
148         @param startChar string (1-n characters) defining the start sequence of the comment, e.g. ";"
149     */

150     public static final String JavaDoc [][] getCustomOneLineCommentRules(String JavaDoc nonterminalName, String JavaDoc startChar) {
151         String JavaDoc [][] sarr0 = getUnicodeCharRules();
152         String JavaDoc [][] sarr1 = getNewlineRules();
153         String JavaDoc [][] sarr2 = getSomeRules(290, 296);
154         String JavaDoc [] customRule = new String JavaDoc[sarr2[0].length];
155         System.arraycopy(sarr2[0], 0, customRule, 0, customRule.length);
156         customRule[0] = nonterminalName;
157         customRule[1] = "\""+startChar+"\""; // put custom sequence where where "//" sits
158
sarr2[0] = customRule;
159         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2 });
160     }
161     
162     /**
163         Returns rules for a custom comment (like C-style "/*", but with passed start and end sequence).
164         @param nonterminalName name of comment to be used within syntax, e.g. "pascalComment".
165         @param startSeq string defining the start sequence of the comment, e.g. "(*"
166         @param endSeq string defining the end sequence of the comment, e.g. "*)"
167     */

168     public static final String JavaDoc [][] getCustomMultiLineCommentRules(String JavaDoc nonterminalName, String JavaDoc startSeq, String JavaDoc endSeq) {
169         String JavaDoc [][] sarr0 = getUnicodeCharRules();
170         String JavaDoc [][] sarr1 = getNewlineRules();
171         String JavaDoc [][] customRules = new String JavaDoc [6][];
172         customRules[0] = new String JavaDoc [] { nonterminalName, "\""+startSeq+"\"", "char_minus_star_slash_list_opt", "\""+endSeq+"\"" };
173         customRules[1] = new String JavaDoc [] { "char_minus_"+nonterminalName, "char", Token.BUTNOT, "\""+endSeq+"\"" };
174         customRules[2] = new String JavaDoc [] { "char_minus_"+nonterminalName+"_list", "char_minus_"+nonterminalName+"_list", "char_minus_"+nonterminalName };
175         customRules[3] = new String JavaDoc [] { "char_minus_"+nonterminalName+"_list", "char_minus_"+nonterminalName };
176         customRules[4] = new String JavaDoc [] { "char_minus_"+nonterminalName+"_list_opt", "char_minus_"+nonterminalName+"_list" };
177         customRules[5] = new String JavaDoc [] { "char_minus_"+nonterminalName+"_list_opt" /*nothing*/ };
178         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, customRules });
179     }
180     
181     
182     /** Rules to scan one UNICODE character: 0x0 .. 0xFFFF. */
183     public static final String JavaDoc [][] getUnicodeCharRules() {
184         return getSomeRules(21, 22);
185     }
186
187     /** Rules to scan one platform independent newline. */
188     public static final String JavaDoc [][] getNewlineRules() {
189         return getSomeRules(16, 21);
190     }
191
192     /** Rules to scan one platform independent newline. */
193     public static final String JavaDoc [][] getNewlinesRules() {
194         String JavaDoc [][] sarr0 = getNewlineRules();
195         String JavaDoc [][] sarr1 = getSomeRules(0, 2, newlinesRules);
196         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1 });
197     }
198
199     /** Rules to scan one space. */
200     public static final String JavaDoc [][] getSpaceRules() {
201         return getSomeRules(13, 16);
202     }
203
204     /** Rules to scan spaces. */
205     public static final String JavaDoc [][] getSpacesRules() {
206         String JavaDoc [][] sarr0 = getSpaceRules();
207         String JavaDoc [][] sarr1 = getSomeRules(242, 244);
208         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1 });
209     }
210
211     /** Rules to scan one space or newline. */
212     public static final String JavaDoc [][] getWhitespaceRules() {
213         String JavaDoc [][] sarr0 = getSpaceRules();
214         String JavaDoc [][] sarr1 = getNewlineRules();
215         String JavaDoc [][] sarr2 = getSomeRules(0, 2, whitespaceRules);
216         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2 });
217     }
218
219     /** Rules to scan spaces or newlines. */
220     public static final String JavaDoc [][] getWhitespacesRules() {
221         String JavaDoc [][] sarr0 = getSpacesRules();
222         String JavaDoc [][] sarr1 = getNewlinesRules();
223         String JavaDoc [][] sarr2 = getSomeRules(0, 4, whitespaceRules);
224         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2 });
225     }
226
227     /** Rules to scan one hexdigit. */
228     public static final String JavaDoc [][] getHexDigitRules() {
229         return getSomeRules(10, 13);
230     }
231
232     /** Rules to scan hexdigits that form a number, starting "0x" not included. */
233     public static final String JavaDoc [][] getHexDigitsRules() {
234         String JavaDoc [][] sarr0 = getHexDigitRules();
235         String JavaDoc [][] sarr1 = getSomeRules(246, 248); // more hexdigits
236
return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1 });
237     }
238
239     /** Rules to scan one letter. */
240     public static final String JavaDoc [][] getUnicodeLetterRules() {
241         return getSomeRules(37, 242);
242     }
243
244     /** Rules to scan one digit. */
245     public static final String JavaDoc [][] getUnicodeDigitRules() {
246         return getSomeRules(22, 37);
247     }
248
249     /** Rules to scan digits. */
250     public static final String JavaDoc [][] getUnicodeDigitsRules() {
251         String JavaDoc [][] sarr0 = getUnicodeDigitRules();
252         String JavaDoc [][] sarr1 = getSomeRules(244, 246); // more digits
253
return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1 });
254     }
255     
256     /** Rules to scan identifiers that start with letter and continue with letter or digit or '_'. */
257     public static final String JavaDoc [][] getUnicodeIdentifierRules() {
258         String JavaDoc [][] sarr0 = getUnicodeDigitRules();
259         String JavaDoc [][] sarr1 = getUnicodeLetterRules();
260         String JavaDoc [][] sarr2 = getSomeRules(259, 268);
261         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2 });
262     }
263     
264     /** Rules to scan C/Java-like 'c'haracterdefinitions: '\377', 'A', '\n'. */
265     public static final String JavaDoc [][] getUnicodeChardefRules() {
266         String JavaDoc [][] sarr0 = getUnicodeCharRules();
267         String JavaDoc [][] sarr1 = getSomeRules(0, 1, digitRules); // octdigit
268
String JavaDoc [][] sarr2 = getSomeRules(0, 2, chardefRules);
269         String JavaDoc [][] sarr3 = getSomeRules(248, 249); // part of bnf_chardef
270
String JavaDoc [][] sarr4 = getSomeRules(251, 258); // part of bnf_chardef
271
return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2, sarr3, sarr4 });
272     }
273     
274     /** Rules to scan BNF-like 'c'haracterdefinitions. They differ from C/Java-chardefs in that they can be written as digits: 0x20. */
275     public static final String JavaDoc [][] getUnicodeBNFChardefRules() {
276         String JavaDoc [][] sarr0 = getUnicodeCharRules();
277         String JavaDoc [][] sarr1 = getHexDigitsRules();
278         String JavaDoc [][] sarr2 = getUnicodeDigitsRules();
279         String JavaDoc [][] sarr3 = getSomeRules(248, 259);
280         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2, sarr3 });
281     }
282     
283     /** Rules to scan "stringdefinitions" that can contain backslash as masking character. */
284     public static final String JavaDoc [][] getUnicodeStringdefRules() {
285         String JavaDoc [][] sarr0 = getUnicodeCharRules();
286         String JavaDoc [][] sarr1 = getSomeRules(268, 284);
287         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1 });
288     }
289
290     /** Rules to read a `lexerrule` within EBNF syntax specifications. */
291     public static final String JavaDoc [][] getRulerefRules() {
292         String JavaDoc [][] sarr0 = getUnicodeIdentifierRules();
293         String JavaDoc [][] sarr1 = getSomeRules(297, 298);
294         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1 });
295     }
296     
297     /** Rules to read quantifiers "*+?" within EBNF syntax specifications. */
298     public static final String JavaDoc [][] getQuantifierRules() {
299         return getSomeRules(7, 10);
300     }
301     
302     /** Rules to scan C-style slash-star and slash-slash AND shell-style # comments. */
303     public static final String JavaDoc [][] getCommentRules() {
304         String JavaDoc [][] sarr0 = getCStyleCommentRules();
305         String JavaDoc [][] sarr1 = getSomeRules(296, 297);
306         String JavaDoc [][] sarr2 = getSomeRules(299, 301);
307         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2 });
308     }
309     
310     /** Rules to scan C-style slash-star and slash-slash comments. */
311     public static final String JavaDoc [][] getCStyleCommentRules() {
312         String JavaDoc [][] sarr0 = getUnicodeCharRules();
313         String JavaDoc [][] sarr1 = getNewlineRules();
314         String JavaDoc [][] sarr2 = getSomeRules(284, 296);
315         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2 });
316     }
317     
318     /** Rules to scan # shell-style comments. */
319     public static final String JavaDoc [][] getShellStyleCommentRules() {
320         String JavaDoc [][] sarr0 = getUnicodeCharRules();
321         String JavaDoc [][] sarr1 = getNewlineRules();
322         String JavaDoc [][] sarr2 = getSomeRules(291, 297);
323         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2 });
324     }
325     
326     /** Rules for XML combining chars. */
327     public static final String JavaDoc [][] getUnicodeXmlCharRules() {
328         return xmlCharRules;
329     }
330     
331     /** Rules for XML combining chars. */
332     public static final String JavaDoc [][] getUnicodeCombiningCharRules() {
333         String JavaDoc [][] sarr = getSomeRules(0, 95, xmlCombinigAndExtenderRules);
334         return sarr;
335     }
336     
337     /** Rules for XML extender chars. */
338     public static final String JavaDoc [][] getUnicodeExtenderCharRules() {
339         String JavaDoc [][] sarr = getSomeRules(95, 106, xmlCombinigAndExtenderRules);
340         return sarr;
341     }
342     
343     /** Rules for octal number chars. */
344     public static final String JavaDoc [][] getOctDigitsRules() {
345         String JavaDoc [][] sarr = getSomeRules(0, 3, digitRules);
346         return sarr;
347     }
348     
349     /** Rules for binary number chars. */
350     public static final String JavaDoc [][] getBinDigitsRules() {
351         String JavaDoc [][] sarr = getSomeRules(3, 6, digitRules);
352         return sarr;
353     }
354     
355     /** Rules for general number chars (integer, float). */
356     public static final String JavaDoc [][] getNumberRules() {
357         String JavaDoc [][] sarr2 = getIntegerRules();
358         String JavaDoc [][] sarr1 = getFloatRules();
359         String JavaDoc [][] sarr0 = getSomeRules(0, 2, numberRules);
360         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2 });
361     }
362     
363     /** Rules for integer number chars. */
364     public static final String JavaDoc [][] getIntegerRules() {
365         String JavaDoc [][] sarr2 = getHexDigitsRules();
366         String JavaDoc [][] sarr1 = getUnicodeDigitsRules();
367         String JavaDoc [][] sarr0 = getSomeRules(19, 25, numberRules);
368         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1, sarr2 });
369     }
370     
371     /** Rules for float number chars. */
372     public static final String JavaDoc [][] getFloatRules() {
373         String JavaDoc [][] sarr1 = getUnicodeDigitsRules();
374         String JavaDoc [][] sarr0 = getSomeRules(2, 19, numberRules);
375         return catenizeRules(new String JavaDoc [][][] { sarr0, sarr1 });
376     }
377     
378
379     private static final String JavaDoc [][] getSomeRules(int startIncl, int endExcl) {
380         return getSomeRules(startIncl, endExcl, lexerSyntax);
381     }
382
383     private static final String JavaDoc [][] getSomeRules(int startIncl, int endExcl, String JavaDoc [][] rules) {
384         String JavaDoc [][] sarr = new String JavaDoc [endExcl - startIncl] [];
385         int j = 0;
386         for (int i = startIncl; i < endExcl; i++, j++)
387             sarr[j] = rules[i];
388         return sarr;
389     }
390
391
392     /** Print a grammar to System.out. */
393     public static void printRules(String JavaDoc [][] syntax) {
394         for (int i = 0; i < syntax.length; i++) {
395             for (int j = 0; j < syntax[i].length; j++)
396                 System.out.print(j == 1 ? " ::= "+syntax[i][j]+" " : syntax[i][j]+" ");
397             System.out.println();
398         }
399     }
400
401
402     /** Catenizes some rule sets to one rule set. Does not check for uniqueness. */
403     public static final String JavaDoc [][] catenizeRules(String JavaDoc [][][] arrays) {
404         int len = 0;
405         for (int i = 0; i < arrays.length; i++)
406             len += arrays[i].length;
407             
408         String JavaDoc [][] sarr = new String JavaDoc [len][];
409         
410         int k = 0;
411         for (int i = 0; i < arrays.length; i++) {
412             for (int j = 0; j < arrays[i].length; j++) {
413                 sarr[k] = arrays[i][j];
414                 k++;
415             }
416         }
417
418         return sarr;
419     }
420
421
422
423
424     /** Premade lexer syntax used to scan textual EBNF-like syntax specifications. */
425     public static final String JavaDoc [][] lexerSyntax = {
426     
427         // CAUTION: Do NOT edit without changing indexes in source above!!!
428

429         /*0*/ { Token.TOKEN, "identifier" },
430         /*1*/ { Token.TOKEN, "bnf_chardef" },
431         /*2*/ { Token.TOKEN, "stringdef" },
432         /*3*/ { Token.TOKEN, "quantifier" }, // see 297: ruleref
433

434         /*4*/ { Token.IGNORED, "spaces" },
435         /*5*/ { Token.IGNORED, "newline" },
436         /*6*/ { Token.IGNORED, "comment" },
437
438         /*7*/ { "quantifier", "'*'" },
439         /*8*/ { "quantifier", "'+'" },
440         /*9*/ { "quantifier", "'?'" },
441
442         // programmer digits
443

444         /*10*/ { "hexdigit", "'0'", Token.UPTO, "'9'" },
445         /*11*/ { "hexdigit", "'A'", Token.UPTO, "'F'" },
446         /*12*/ { "hexdigit", "'a'", Token.UPTO, "'f'" },
447         
448         // formatting characters
449

450         /*13*/ { "space", "0x20" },
451         /*14*/ { "space", "0x9" },
452         /*15*/ { "space", "0xC" }, // formfeed
453

454         /*16*/ { "cr", "'\\r'" }, // 0xD
455
/*17*/ { "nl", "'\\n'" }, // 0xA
456
/*18*/ { "newline", "cr", "nl" },
457         /*19*/ { "newline", "cr" },
458         /*20*/ { "newline", "nl" },
459
460         // UNICODE character set
461

462         /*21*/ { "char", "0x0", Token.UPTO, "0xFFFF" },
463
464         /*22*/ { "digit", "0x0030", Token.UPTO, "0x0039" },
465         /*23*/ { "digit", "0x0660", Token.UPTO, "0x0669" },
466         /*24*/ { "digit", "0x06F0", Token.UPTO, "0x06F9" },
467         /*25*/ { "digit", "0x0966", Token.UPTO, "0x096F" },
468         /*26*/ { "digit", "0x09E6", Token.UPTO, "0x09EF" },
469         /*27*/ { "digit", "0x0A66", Token.UPTO, "0x0A6F" },
470         /*28*/ { "digit", "0x0AE6", Token.UPTO, "0x0AEF" },
471         /*29*/ { "digit", "0x0B66", Token.UPTO, "0x0B6F" },
472         /*30*/ { "digit", "0x0BE7", Token.UPTO, "0x0BEF" },
473         /*31*/ { "digit", "0x0C66", Token.UPTO, "0x0C6F" },
474         /*32*/ { "digit", "0x0CE6", Token.UPTO, "0x0CEF" },
475         /*33*/ { "digit", "0x0D66", Token.UPTO, "0x0D6F" },
476         /*34*/ { "digit", "0x0E50", Token.UPTO, "0x0E59" },
477         /*35*/ { "digit", "0x0ED0", Token.UPTO, "0x0ED9" },
478         /*36*/ { "digit", "0x0F20", Token.UPTO, "0x0F29" },
479
480         /*37*/ { "letter", "0x0041", Token.UPTO, "0x005A" }, // BaseChar
481
/*38*/ { "letter", "0x0061", Token.UPTO, "0x007A" },
482         /*39*/ { "letter", "0x00C0", Token.UPTO, "0x00D6" },
483         /*40*/ { "letter", "0x00D8", Token.UPTO, "0x00F6" },
484         /*41*/ { "letter", "0x00F8", Token.UPTO, "0x00FF" },
485         /*42*/ { "letter", "0x0100", Token.UPTO, "0x0131" },
486         /*43*/ { "letter", "0x0134", Token.UPTO, "0x013E" },
487         /*44*/ { "letter", "0x0141", Token.UPTO, "0x0148" },
488         /*45*/ { "letter", "0x014A", Token.UPTO, "0x017E" },
489         /*46*/ { "letter", "0x0180", Token.UPTO, "0x01C3" },
490         /*47*/ { "letter", "0x01CD", Token.UPTO, "0x01F0" },
491         /*48*/ { "letter", "0x01F4", Token.UPTO, "0x01F5" },
492         /*49*/ { "letter", "0x01FA", Token.UPTO, "0x0217" },
493         /*50*/ { "letter", "0x0250", Token.UPTO, "0x02A8" },
494         /*51*/ { "letter", "0x02BB", Token.UPTO, "0x02C1" },
495         /*52*/ { "letter", "0x0386" },
496         /*53*/ { "letter", "0x0388", Token.UPTO, "0x038A" },
497         /*54*/ { "letter", "0x038C" },
498         /*55*/ { "letter", "0x038E", Token.UPTO, "0x03A1" },
499         /*56*/ { "letter", "0x03A3", Token.UPTO, "0x03CE" },
500         /*57*/ { "letter", "0x03D0", Token.UPTO, "0x03D6" },
501         /*58*/ { "letter", "0x03DA" },
502         /*59*/ { "letter", "0x03DC" },
503         /*60*/ { "letter", "0x03DE" },
504         /*61*/ { "letter", "0x03E0" },
505         /*62*/ { "letter", "0x03E2", Token.UPTO, "0x03F3" },
506         /*63*/ { "letter", "0x0401", Token.UPTO, "0x040C" },
507         /*64*/ { "letter", "0x040E", Token.UPTO, "0x044F" },
508         /*65*/ { "letter", "0x0451", Token.UPTO, "0x045C" },
509         /*66*/ { "letter", "0x045E", Token.UPTO, "0x0481" },
510         /*67*/ { "letter", "0x0490", Token.UPTO, "0x04C4" },
511         /*68*/ { "letter", "0x04C7", Token.UPTO, "0x04C8" },
512         /*69*/ { "letter", "0x04CB", Token.UPTO, "0x04CC" },
513         /*70*/ { "letter", "0x04D0", Token.UPTO, "0x04EB" },
514         /*71*/ { "letter", "0x04EE", Token.UPTO, "0x04F5" },
515         /*72*/ { "letter", "0x04F8", Token.UPTO, "0x04F9" },
516         /*73*/ { "letter", "0x0531", Token.UPTO, "0x0556" },
517         /*74*/ { "letter", "0x0559" },
518         /*75*/ { "letter", "0x0561", Token.UPTO, "0x0586" },
519         /*76*/ { "letter", "0x05D0", Token.UPTO, "0x05EA" },
520         /*77*/ { "letter", "0x05F0", Token.UPTO, "0x05F2" },
521         /*78*/ { "letter", "0x0621", Token.UPTO, "0x063A" },
522         /*79*/ { "letter", "0x0641", Token.UPTO, "0x064A" },
523         /*80*/ { "letter", "0x0671", Token.UPTO, "0x06B7" },
524         /*81*/ { "letter", "0x06BA", Token.UPTO, "0x06BE" },
525         /*82*/ { "letter", "0x06C0", Token.UPTO, "0x06CE" },
526         /*83*/ { "letter", "0x06D0", Token.UPTO, "0x06D3" },
527         /*84*/ { "letter", "0x06D5" },
528         /*85*/ { "letter", "0x06E5", Token.UPTO, "0x06E6" },
529         /*86*/ { "letter", "0x0905", Token.UPTO, "0x0939" },
530         /*87*/ { "letter", "0x093D" },
531         /*88*/ { "letter", "0x0958", Token.UPTO, "0x0961" },
532         /*89*/ { "letter", "0x0985", Token.UPTO, "0x098C" },
533         /*90*/ { "letter", "0x098F", Token.UPTO, "0x0990" },
534         /*91*/ { "letter", "0x0993", Token.UPTO, "0x09A8" },
535         /*92*/ { "letter", "0x09AA", Token.UPTO, "0x09B0" },
536         /*93*/ { "letter", "0x09B2" },
537         /*94*/ { "letter", "0x09B6", Token.UPTO, "0x09B9" },
538         /*95*/ { "letter", "0x09DC", Token.UPTO, "0x09DD" },
539         /*96*/ { "letter", "0x09DF", Token.UPTO, "0x09E1" },
540         /*97*/ { "letter", "0x09F0", Token.UPTO, "0x09F1" },
541         /*98*/ { "letter", "0x0A05", Token.UPTO, "0x0A0A" },
542         /*99*/ { "letter", "0x0A0F", Token.UPTO, "0x0A10" },
543         /*100*/ { "letter", "0x0A13", Token.UPTO, "0x0A28" },
544         /*101*/ { "letter", "0x0A2A", Token.UPTO, "0x0A30" },
545         /*102*/ { "letter", "0x0A32", Token.UPTO, "0x0A33" },
546         /*103*/ { "letter", "0x0A35", Token.UPTO, "0x0A36" },
547         /*104*/ { "letter", "0x0A38", Token.UPTO, "0x0A39" },
548         /*105*/ { "letter", "0x0A59", Token.UPTO, "0x0A5C" },
549         /*106*/ { "letter", "0x0A5E" },
550         /*107*/ { "letter", "0x0A72", Token.UPTO, "0x0A74" },
551         /*108*/ { "letter", "0x0A85", Token.UPTO, "0x0A8B" },
552         /*109*/ { "letter", "0x0A8D" },
553         /*110*/ { "letter", "0x0A8F", Token.UPTO, "0x0A91" },
554         /*111*/ { "letter", "0x0A93", Token.UPTO, "0x0AA8" },
555         /*112*/ { "letter", "0x0AAA", Token.UPTO, "0x0AB0" },
556         /*113*/ { "letter", "0x0AB2", Token.UPTO, "0x0AB3" },
557         /*114*/ { "letter", "0x0AB5", Token.UPTO, "0x0AB9" },
558         /*115*/ { "letter", "0x0ABD" },
559         /*116*/ { "letter", "0x0AE0" },
560         /*117*/ { "letter", "0x0B05", Token.UPTO, "0x0B0C" },
561         /*118*/ { "letter", "0x0B0F", Token.UPTO, "0x0B10" },
562         /*119*/ { "letter", "0x0B13", Token.UPTO, "0x0B28" },
563         /*120*/ { "letter", "0x0B2A", Token.UPTO, "0x0B30" },
564         /*121*/ { "letter", "0x0B32", Token.UPTO, "0x0B33" },
565         /*122*/ { "letter", "0x0B36", Token.UPTO, "0x0B39" },
566         /*123*/ { "letter", "0x0B3D" },
567         /*124*/ { "letter", "0x0B5C", Token.UPTO, "0x0B5D" },
568         /*125*/ { "letter", "0x0B5F", Token.UPTO, "0x0B61" },
569         /*126*/ { "letter", "0x0B85", Token.UPTO, "0x0B8A" },
570         /*127*/ { "letter", "0x0B8E", Token.UPTO, "0x0B90" },
571         /*128*/ { "letter", "0x0B92", Token.UPTO, "0x0B95" },
572         /*129*/ { "letter", "0x0B99", Token.UPTO, "0x0B9A" },
573         /*130*/ { "letter", "0x0B9C" },
574         /*131*/ { "letter", "0x0B9E", Token.UPTO, "0x0B9F" },
575         /*132*/ { "letter", "0x0BA3", Token.UPTO, "0x0BA4" },
576         /*133*/ { "letter", "0x0BA8", Token.UPTO, "0x0BAA" },
577         /*134*/ { "letter", "0x0BAE", Token.UPTO, "0x0BB5" },
578         /*135*/ { "letter", "0x0BB7", Token.UPTO, "0x0BB9" },
579         /*136*/ { "letter", "0x0C05", Token.UPTO, "0x0C0C" },
580         /*137*/ { "letter", "0x0C0E", Token.UPTO, "0x0C10" },
581         /*138*/ { "letter", "0x0C12", Token.UPTO, "0x0C28" },
582         /*139*/ { "letter", "0x0C2A", Token.UPTO, "0x0C33" },
583         /*140*/ { "letter", "0x0C35", Token.UPTO, "0x0C39" },
584         /*141*/ { "letter", "0x0C60", Token.UPTO, "0x0C61" },
585         /*142*/ { "letter", "0x0C85", Token.UPTO, "0x0C8C" },
586         /*143*/ { "letter", "0x0C8E", Token.UPTO, "0x0C90" },
587         /*144*/ { "letter", "0x0C92", Token.UPTO, "0x0CA8" },
588         /*145*/ { "letter", "0x0CAA", Token.UPTO, "0x0CB3" },
589         /*146*/ { "letter", "0x0CB5", Token.UPTO, "0x0CB9" },
590         /*147*/ { "letter", "0x0CDE" },
591         /*148*/ { "letter", "0x0CE0", Token.UPTO, "0x0CE1" },
592         /*149*/ { "letter", "0x0D05", Token.UPTO, "0x0D0C" },
593         /*150*/ { "letter", "0x0D0E", Token.UPTO, "0x0D10" },
594         /*151*/ { "letter", "0x0D12", Token.UPTO, "0x0D28" },
595         /*152*/ { "letter", "0x0D2A", Token.UPTO, "0x0D39" },
596         /*153*/ { "letter", "0x0D60", Token.UPTO, "0x0D61" },
597         /*154*/ { "letter", "0x0E01", Token.UPTO, "0x0E2E" },
598         /*155*/ { "letter", "0x0E30" },
599         /*156*/ { "letter", "0x0E32", Token.UPTO, "0x0E33" },
600         /*157*/ { "letter", "0x0E40", Token.UPTO, "0x0E45" },
601         /*158*/ { "letter", "0x0E81", Token.UPTO, "0x0E82" },
602         /*159*/ { "letter", "0x0E84" },
603         /*160*/ { "letter", "0x0E87", Token.UPTO, "0x0E88" },
604         /*161*/ { "letter", "0x0E8A" },
605         /*162*/ { "letter", "0x0E8D" },
606         /*163*/ { "letter", "0x0E94", Token.UPTO, "0x0E97" },
607         /*164*/ { "letter", "0x0E99", Token.UPTO, "0x0E9F" },
608         /*165*/ { "letter", "0x0EA1", Token.UPTO, "0x0EA3" },
609         /*166*/ { "letter", "0x0EA5" },
610         /*167*/ { "letter", "0x0EA7" },
611         /*168*/ { "letter", "0x0EAA", Token.UPTO, "0x0EAB" },
612         /*169*/ { "letter", "0x0EAD", Token.UPTO, "0x0EAE" },
613         /*170*/ { "letter", "0x0EB0" },
614         /*171*/ { "letter", "0x0EB2", Token.UPTO, "0x0EB3" },
615         /*172*/ { "letter", "0x0EBD" },
616         /*173*/ { "letter", "0x0EC0", Token.UPTO, "0x0EC4" },
617         /*174*/ { "letter", "0x0F40", Token.UPTO, "0x0F47" },
618         /*175*/ { "letter", "0x0F49", Token.UPTO, "0x0F69" },
619         /*176*/ { "letter", "0x10A0", Token.UPTO, "0x10C5" },
620         /*177*/ { "letter", "0x10D0", Token.UPTO, "0x10F6" },
621         /*178*/ { "letter", "0x1100" },
622         /*179*/ { "letter", "0x1102", Token.UPTO, "0x1103" },
623         /*180*/ { "letter", "0x1105", Token.UPTO, "0x1107" },
624         /*181*/ { "letter", "0x1109" },
625         /*182*/ { "letter", "0x110B", Token.UPTO, "0x110C" },
626         /*183*/ { "letter", "0x110E", Token.UPTO, "0x1112" },
627         /*184*/ { "letter", "0x113C" },
628         /*185*/ { "letter", "0x113E" },
629         /*186*/ { "letter", "0x1140" },
630         /*187*/ { "letter", "0x114C" },
631         /*188*/ { "letter", "0x114E" },
632         /*189*/ { "letter", "0x1150" },
633         /*190*/ { "letter", "0x1154", Token.UPTO, "0x1155" },
634         /*191*/ { "letter", "0x1159" },
635         /*192*/ { "letter", "0x115F", Token.UPTO, "0x1161" },
636         /*193*/ { "letter", "0x1163" },
637         /*194*/ { "letter", "0x1165" },
638         /*195*/ { "letter", "0x1167" },
639         /*196*/ { "letter", "0x1169" },
640         /*197*/ { "letter", "0x116D", Token.UPTO, "0x116E" },
641         /*198*/ { "letter", "0x1172", Token.UPTO, "0x1173" },
642         /*199*/ { "letter", "0x1175" },
643         /*200*/ { "letter", "0x119E" },
644         /*201*/ { "letter", "0x11A8" },
645         /*202*/ { "letter", "0x11AB" },
646         /*203*/ { "letter", "0x11AE", Token.UPTO, "0x11AF" },
647         /*204*/ { "letter", "0x11B7", Token.UPTO, "0x11B8" },
648         /*205*/ { "letter", "0x11BA" },
649         /*206*/ { "letter", "0x11BC", Token.UPTO, "0x11C2" },
650         /*207*/ { "letter", "0x11EB" },
651         /*208*/ { "letter", "0x11F0" },
652         /*209*/ { "letter", "0x11F9" },
653         /*210*/ { "letter", "0x1E00", Token.UPTO, "0x1E9B" },
654         /*211*/ { "letter", "0x1EA0", Token.UPTO, "0x1EF9" },
655         /*212*/ { "letter", "0x1F00", Token.UPTO, "0x1F15" },
656         /*213*/ { "letter", "0x1F18", Token.UPTO, "0x1F1D" },
657         /*214*/ { "letter", "0x1F20", Token.UPTO, "0x1F45" },
658         /*215*/ { "letter", "0x1F48", Token.UPTO, "0x1F4D" },
659         /*216*/ { "letter", "0x1F50", Token.UPTO, "0x1F57" },
660         /*217*/ { "letter", "0x1F59" },
661         /*218*/ { "letter", "0x1F5B" },
662         /*219*/ { "letter", "0x1F5D" },
663         /*220*/ { "letter", "0x1F5F", Token.UPTO, "0x1F7D" },
664         /*221*/ { "letter", "0x1F80", Token.UPTO, "0x1FB4" },
665         /*222*/ { "letter", "0x1FB6", Token.UPTO, "0x1FBC" },
666         /*223*/ { "letter", "0x1FBE" },
667         /*224*/ { "letter", "0x1FC2", Token.UPTO, "0x1FC4" },
668         /*225*/ { "letter", "0x1FC6", Token.UPTO, "0x1FCC" },
669         /*226*/ { "letter", "0x1FD0", Token.UPTO, "0x1FD3" },
670         /*227*/ { "letter", "0x1FD6", Token.UPTO, "0x1FDB" },
671         /*228*/ { "letter", "0x1FE0", Token.UPTO, "0x1FEC" },
672         /*229*/ { "letter", "0x1FF2", Token.UPTO, "0x1FF4" },
673         /*230*/ { "letter", "0x1FF6", Token.UPTO, "0x1FFC" },
674         /*231*/ { "letter", "0x2126" },
675         /*232*/ { "letter", "0x212A", Token.UPTO, "0x212B" },
676         /*233*/ { "letter", "0x212E" },
677         /*234*/ { "letter", "0x2180", Token.UPTO, "0x2182" },
678         /*235*/ { "letter", "0x3041", Token.UPTO, "0x3094" },
679         /*236*/ { "letter", "0x30A1", Token.UPTO, "0x30FA" },
680         /*237*/ { "letter", "0x3105", Token.UPTO, "0x312C" },
681         /*238*/ { "letter", "0xAC00", Token.UPTO, "0xD7A3" },
682         // Ideographic
683
/*239*/ { "letter", "0x4E00", Token.UPTO, "0x9FA5" },
684         /*240*/ { "letter", "0x3007" },
685         /*241*/ { "letter", "0x3021", Token.UPTO, "0x3029" },
686
687         // helper rules
688

689         /*242*/ { "spaces", "spaces", "space" },
690         /*243*/ { "spaces", "space" },
691             
692         /*244*/ { "digits", "digits", "digit" },
693         /*245*/ { "digits", "digit" },
694
695         /*246*/ { "hexdigits", "hexdigits", "hexdigit" },
696         /*247*/ { "hexdigits", "hexdigit" },
697
698         /*248*/ { "bnf_chardef", "\"'\"", "char", "\"'\"" },
699         /*249*/ { "bnf_chardef", "\"0x\"", "hexdigits" },
700         /*250*/ { "bnf_chardef", "\"0X\"", "hexdigits" },
701         /*251*/ { "bnf_chardef", "\"'\\''\"" }, // single quote
702
/*252*/ { "bnf_chardef", "\"'\\n'\"" }, // newline
703
/*253*/ { "bnf_chardef", "\"'\\r'\"" }, // carriage return
704
/*254*/ { "bnf_chardef", "\"'\\t'\"" }, // tabulator
705
/*255*/ { "bnf_chardef", "\"'\\f'\"" }, // formfeed
706
/*256*/ { "bnf_chardef", "\"'\\b'\"" }, // backspace
707
/*257*/ { "bnf_chardef", "\"'\\\\'\"" }, // backslash
708
// bell \a was removed in favor of backslash and too much work re-numbering ...
709
/*258*/ { "bnf_chardef", "digits" },
710
711         /*259*/ { "identifier", "letter_or_uscore", "letter_or_digit_list_opt" },
712         /*260*/ { "letter_or_uscore", "letter" },
713         /*261*/ { "letter_or_uscore", "'_'" },
714         /*262*/ { "letter_or_digit", "letter_or_uscore" },
715         /*263*/ { "letter_or_digit", "digit" },
716         /*264*/ { "letter_or_digit_list", "letter_or_digit_list", "letter_or_digit" },
717         /*265*/ { "letter_or_digit_list", "letter_or_digit" },
718         /*266*/ { "letter_or_digit_list_opt", "letter_or_digit_list" },
719         /*267*/ { "letter_or_digit_list_opt" /*nothing*/ },
720
721         /*268*/ { "stringdef", "'\"'", "stringpart_list_opt", "'\"'" },
722         /*269*/ { "char_minus_doublequote_list", "char_minus_doublequote_list", "char_minus_doublequote" },
723         /*270*/ { "char_minus_doublequote_list", "char_minus_doublequote" },
724         /*271*/ { "char_minus_doublequote", "char", Token.BUTNOT, "'\"'", Token.BUTNOT, "'\\'" }, // does not contain " or \
725
/*272*/ { "char_minus_doublequote_list_opt", "char_minus_doublequote_list" },
726         /*273*/ { "char_minus_doublequote_list_opt" /*nothing*/ },
727         /*274*/ { "backslash_char_list", "backslash_char_list", "backslash_char" },
728         /*275*/ { "backslash_char_list", "backslash_char" },
729         /*276*/ { "backslash_char", "'\\'", "char" }, // escaped character like "\""
730
/*277*/ { "backslash_char_list_opt", "backslash_char_list" },
731         /*278*/ { "backslash_char_list_opt" /*nothing*/ },
732         /*279*/ { "stringpart", "char_minus_doublequote_list_opt", "backslash_char_list_opt" },
733         /*280*/ { "stringpart_list", "stringpart_list", "stringpart" },
734         /*281*/ { "stringpart_list", "stringpart" },
735         /*282*/ { "stringpart_list_opt", "stringpart_list" },
736         /*283*/ { "stringpart_list_opt" /*nothing*/ },
737
738         /*284*/ { "cstylecomment", "\"/*\"", "char_minus_star_slash_list_opt", "\"*/\"" },
739         /*285*/ { "char_minus_star_slash", "char", Token.BUTNOT, "\"*/\"" },
740         /*286*/ { "char_minus_star_slash_list", "char_minus_star_slash_list", "char_minus_star_slash" },
741         /*287*/ { "char_minus_star_slash_list", "char_minus_star_slash" },
742         /*288*/ { "char_minus_star_slash_list_opt", "char_minus_star_slash_list" },
743         /*289*/ { "char_minus_star_slash_list_opt" /*nothing*/ },
744
745         /*290*/ { "cstylecomment", "\"//\"", "char_minus_newline_list_opt" },
746         /*291*/ { "char_minus_newline", "char", Token.BUTNOT, "newline" },
747         /*292*/ { "char_minus_newline_list", "char_minus_newline_list", "char_minus_newline" },
748         /*293*/ { "char_minus_newline_list", "char_minus_newline" },
749         /*294*/ { "char_minus_newline_list_opt", "char_minus_newline_list" },
750         /*295*/ { "char_minus_newline_list_opt" /*nothing*/ },
751         /*296*/ { "shellstylecomment", "'#'", "char_minus_newline_list_opt" },
752
753         /*297*/ { "ruleref", "'"+Token.COMMAND_QUOTE+"'", "identifier", "'"+Token.COMMAND_QUOTE+"'" },
754         /*298*/ { Token.TOKEN, "ruleref" },
755
756         /*299*/ { "comment", "cstylecomment" },
757         /*300*/ { "comment", "shellstylecomment" },
758     };
759
760     /** XML Char definitions of W3C. */
761     public static final String JavaDoc [][] xmlCharRules = {
762         /*0*/ { "xmlchar", "0x9" },
763         /*1*/ { "xmlchar", "0xA" },
764         /*2*/ { "xmlchar", "0xD" },
765         /*3*/ { "xmlchar", "0x20", Token.UPTO, "0xD7FF" },
766         /*4*/ { "xmlchar", "0xE000", Token.UPTO, "0xFFFD" },
767         /*5*/ { "xmlchar", "0x10000", Token.UPTO, "0x10FFFF" },
768     };
769     
770     /** XML CombiningChar and XML Extender definitions of W3C. */
771     public static final String JavaDoc [][] xmlCombinigAndExtenderRules = {
772         /*0*/ { "combiningchar", "0x0300", Token.UPTO, "0x0345" },
773         /*1*/ { "combiningchar", "0x0360", Token.UPTO, "0x0361" },
774         /*2*/ { "combiningchar", "0x0483", Token.UPTO, "0x0486" },
775         /*3*/ { "combiningchar", "0x0591", Token.UPTO, "0x05A1" },
776         /*4*/ { "combiningchar", "0x05A3", Token.UPTO, "0x05B9" },
777         /*5*/ { "combiningchar", "0x05BB", Token.UPTO, "0x05BD" },
778         /*6*/ { "combiningchar", "0x05BF" },
779         /*7*/ { "combiningchar", "0x05C1", Token.UPTO, "0x05C2" },
780         /*8*/ { "combiningchar", "0x05C4" },
781         /*9*/ { "combiningchar", "0x064B", Token.UPTO, "0x0652" },
782         /*10*/ { "combiningchar", "0x0670" },
783         /*11*/ { "combiningchar", "0x06D6", Token.UPTO, "0x06DC" },
784         /*12*/ { "combiningchar", "0x06DD", Token.UPTO, "0x06DF" },
785         /*13*/ { "combiningchar", "0x06E0", Token.UPTO, "0x06E4" },
786         /*14*/ { "combiningchar", "0x06E7", Token.UPTO, "0x06E8" },
787         /*15*/ { "combiningchar", "0x06EA", Token.UPTO, "0x06ED" },
788         /*16*/ { "combiningchar", "0x0901", Token.UPTO, "0x0903" },
789         /*17*/ { "combiningchar", "0x093C" },
790         /*18*/ { "combiningchar", "0x093E", Token.UPTO, "0x094C" },
791         /*19*/ { "combiningchar", "0x094D" },
792         /*20*/ { "combiningchar", "0x0951", Token.UPTO, "0x0954" },
793         /*21*/ { "combiningchar", "0x0962", Token.UPTO, "0x0963" },
794         /*22*/ { "combiningchar", "0x0981", Token.UPTO, "0x0983" },
795         /*23*/ { "combiningchar", "0x09BC" },
796         /*24*/ { "combiningchar", "0x09BE" },
797         /*25*/ { "combiningchar", "0x09BF" },
798         /*26*/ { "combiningchar", "0x09C0", Token.UPTO, "0x09C4" },
799         /*27*/ { "combiningchar", "0x09C7", Token.UPTO, "0x09C8" },
800         /*28*/ { "combiningchar", "0x09CB", Token.UPTO, "0x09CD" },
801         /*29*/ { "combiningchar", "0x09D7" },
802         /*30*/ { "combiningchar", "0x09E2", Token.UPTO, "0x09E3" },
803         /*31*/ { "combiningchar", "0x0A02" },
804         /*32*/ { "combiningchar", "0x0A3C" },
805         /*33*/ { "combiningchar", "0x0A3E" },
806         /*34*/ { "combiningchar", "0x0A3F" },
807         /*35*/ { "combiningchar", "0x0A40", Token.UPTO, "0x0A42" },
808         /*36*/ { "combiningchar", "0x0A47", Token.UPTO, "0x0A48" },
809         /*37*/ { "combiningchar", "0x0A4B", Token.UPTO, "0x0A4D" },
810         /*38*/ { "combiningchar", "0x0A70", Token.UPTO, "0x0A71" },
811         /*39*/ { "combiningchar", "0x0A81", Token.UPTO, "0x0A83" },
812         /*40*/ { "combiningchar", "0x0ABC" },
813         /*41*/ { "combiningchar", "0x0ABE", Token.UPTO, "0x0AC5" },
814         /*42*/ { "combiningchar", "0x0AC7", Token.UPTO, "0x0AC9" },
815         /*43*/ { "combiningchar", "0x0ACB", Token.UPTO, "0x0ACD" },
816         /*44*/ { "combiningchar", "0x0B01", Token.UPTO, "0x0B03" },
817         /*45*/ { "combiningchar", "0x0B3C" },
818         /*46*/ { "combiningchar", "0x0B3E", Token.UPTO, "0x0B43" },
819         /*47*/ { "combiningchar", "0x0B47", Token.UPTO, "0x0B48" },
820         /*48*/ { "combiningchar", "0x0B4B", Token.UPTO, "0x0B4D" },
821         /*49*/ { "combiningchar", "0x0B56", Token.UPTO, "0x0B57" },
822         /*50*/ { "combiningchar", "0x0B82", Token.UPTO, "0x0B83" },
823         /*51*/ { "combiningchar", "0x0BBE", Token.UPTO, "0x0BC2" },
824         /*52*/ { "combiningchar", "0x0BC6", Token.UPTO, "0x0BC8" },
825         /*53*/ { "combiningchar", "0x0BCA", Token.UPTO, "0x0BCD" },
826         /*54*/ { "combiningchar", "0x0BD7" },
827         /*55*/ { "combiningchar", "0x0C01", Token.UPTO, "0x0C03" },
828         /*56*/ { "combiningchar", "0x0C3E", Token.UPTO, "0x0C44" },
829         /*57*/ { "combiningchar", "0x0C46", Token.UPTO, "0x0C48" },
830         /*58*/ { "combiningchar", "0x0C4A", Token.UPTO, "0x0C4D" },
831         /*59*/ { "combiningchar", "0x0C55", Token.UPTO, "0x0C56" },
832         /*60*/ { "combiningchar", "0x0C82", Token.UPTO, "0x0C83" },
833         /*61*/ { "combiningchar", "0x0CBE", Token.UPTO, "0x0CC4" },
834         /*62*/ { "combiningchar", "0x0CC6", Token.UPTO, "0x0CC8" },
835         /*63*/ { "combiningchar", "0x0CCA", Token.UPTO, "0x0CCD" },
836         /*64*/ { "combiningchar", "0x0CD5", Token.UPTO, "0x0CD6" },
837         /*65*/ { "combiningchar", "0x0D02", Token.UPTO, "0x0D03" },
838         /*66*/ { "combiningchar", "0x0D3E", Token.UPTO, "0x0D43" },
839         /*67*/ { "combiningchar", "0x0D46", Token.UPTO, "0x0D48" },
840         /*68*/ { "combiningchar", "0x0D4A", Token.UPTO, "0x0D4D" },
841         /*69*/ { "combiningchar", "0x0D57" },
842         /*70*/ { "combiningchar", "0x0E31" },
843         /*71*/ { "combiningchar", "0x0E34", Token.UPTO, "0x0E3A" },
844         /*72*/ { "combiningchar", "0x0E47", Token.UPTO, "0x0E4E" },
845         /*73*/ { "combiningchar", "0x0EB1" },
846         /*74*/ { "combiningchar", "0x0EB4", Token.UPTO, "0x0EB9" },
847         /*75*/ { "combiningchar", "0x0EBB", Token.UPTO, "0x0EBC" },
848         /*76*/ { "combiningchar", "0x0EC8", Token.UPTO, "0x0ECD" },
849         /*77*/ { "combiningchar", "0x0F18", Token.UPTO, "0x0F19" },
850         /*78*/ { "combiningchar", "0x0F35" },
851         /*79*/ { "combiningchar", "0x0F37" },
852         /*80*/ { "combiningchar", "0x0F39" },
853         /*81*/ { "combiningchar", "0x0F3E" },
854         /*82*/ { "combiningchar", "0x0F3F" },
855         /*83*/ { "combiningchar", "0x0F71", Token.UPTO, "0x0F84" },
856         /*84*/ { "combiningchar", "0x0F86", Token.UPTO, "0x0F8B" },
857         /*85*/ { "combiningchar", "0x0F90", Token.UPTO, "0x0F95" },
858         /*86*/ { "combiningchar", "0x0F97" },
859         /*87*/ { "combiningchar", "0x0F99", Token.UPTO, "0x0FAD" },
860         /*88*/ { "combiningchar", "0x0FB1", Token.UPTO, "0x0FB7" },
861         /*89*/ { "combiningchar", "0x0FB9" },
862         /*90*/ { "combiningchar", "0x20D0", Token.UPTO, "0x20DC" },
863         /*91*/ { "combiningchar", "0x20E1" },
864         /*92*/ { "combiningchar", "0x302A", Token.UPTO, "0x302F" },
865         /*93*/ { "combiningchar", "0x3099" },
866         /*94*/ { "combiningchar", "0x309A" },
867
868         /*95*/ { "extenderchar", "0x00B7" },
869         /*96*/ { "extenderchar", "0x02D0" },
870         /*97*/ { "extenderchar", "0x02D1" },
871         /*98*/ { "extenderchar", "0x0387" },
872         /*99*/ { "extenderchar", "0x0640" },
873         /*100*/ { "extenderchar", "0x0E46" },
874         /*101*/ { "extenderchar", "0x0EC6" },
875         /*102*/ { "extenderchar", "0x3005" },
876         /*103*/ { "extenderchar", "0x3031", Token.UPTO, "0x3035" },
877         /*104*/ { "extenderchar", "0x309D", Token.UPTO, "0x309E" },
878         /*105*/ { "extenderchar", "0x30FC", Token.UPTO, "0x30FE" },
879     };
880
881
882     /** Numerical rules for binary and octal <b>digits</b>. */
883     public static final String JavaDoc [][] digitRules = {
884         /*0*/ { "octdigit", "'0'", Token.UPTO, "'7'" },
885         /*1*/ { "octdigits", "octdigits", "octdigit" },
886         /*2*/ { "octdigits", "octdigit" },
887
888         /*3*/ { "bindigit", "'0'", Token.UPTO, "'1'" },
889         /*4*/ { "bindigits", "bindigits", "bindigit" },
890         /*5*/ { "bindigits", "bindigit" },
891     };
892
893     /** Numerical rules for <b>numbers</b> within sourcecode: number ::= integer | float. */
894     public static final String JavaDoc [][] numberRules = {
895         // number = float | integer (incl. hexnumber)
896
/*0*/ { "number", "float" },
897         /*1*/ { "number", "integer" },
898         /*2*/ { "float", "wholenumber", "'.'", "mantissa", "float_opt" },
899         /*3*/ { "wholenumber", "digits" },
900         /*4*/ { "wholenumber" /*nothing*/ },
901         /*5*/ { "mantissa", "digits", "mantissa_opt" },
902         /*6*/ { "mantissa_opt", "exponent", "digits" },
903         /*7*/ { "mantissa_opt" /*nothing*/ },
904         /*8*/ { "exponent", "exponentletter", "exponentsign" },
905         /*9*/ { "exponentletter", "'e'" },
906         /*10*/ { "exponentletter", "'E'" },
907         /*11*/ { "exponentsign", "'-'" },
908         /*12*/ { "exponentsign", "'+'" },
909         /*13*/ { "exponentsign" /*nothing*/ },
910         /*14*/ { "float_opt", "'f'" },
911         /*15*/ { "float_opt", "'F'" },
912         /*16*/ { "float_opt", "'d'" },
913         /*17*/ { "float_opt", "'D'" },
914         /*18*/ { "float_opt" /*nothing*/ },
915         /*19*/ { "integer", "\"0X\"", "hexdigits" },
916         /*20*/ { "integer", "\"0x\"", "hexdigits" },
917         /*21*/ { "integer", "digits", "integer_opt" },
918         /*22*/ { "integer_opt", "'l'" }, // "long" marker
919
/*23*/ { "integer_opt", "'L'" }, // "long" marker
920
/*24*/ { "integer_opt" /*nothing*/ },
921     };
922     
923     /** Rules describing one or more newlines. */
924     public static final String JavaDoc [][] newlinesRules = {
925         { "newlines", "newlines", "newline" },
926         { "newlines", "newline" },
927     };
928     
929     /** Rules describing C/Java-like character definitions: 'c', '\r', '\007'. */
930     public static final String JavaDoc [][] chardefRules = {
931         /*0*/ { "chardef", "\"'\\\"", "'0'", Token.UPTO, "'3'", "octdigit", "octdigit", "\"'\"" },
932         /*1*/ { "chardef", "bnf_chardef" }, // but only 248 and 251 - 258 !!!
933
};
934     
935     /** Rules describing whitespace: newlines and spaces, minimum one. */
936     public static final String JavaDoc [][] whitespaceRules = {
937         /*0*/ { "whitespace", "newline" },
938         /*1*/ { "whitespace", "space" },
939         /*2*/ { "whitespaces", "whitespaces", "whitespace" },
940         /*3*/ { "whitespaces", "whitespace" },
941     };
942     
943
944     private StandardLexerRules() {}
945
946 }
947
Popular Tags