KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > impl > xpath > regex > Token


1 /*
2  * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.impl.xpath.regex;
18
19 import java.util.Vector JavaDoc;
20 import java.util.Hashtable JavaDoc;
21
22 /**
23  * This class represents a node in parse tree.
24  *
25  * @xerces.internal
26  *
27  * @version $Id: Token.java,v 1.10 2005/03/22 03:26:24 mrglavas Exp $
28  */

29 class Token implements java.io.Serializable JavaDoc {
30
31     private static final long serialVersionUID = 4049923761862293040L;
32
33     static final boolean COUNTTOKENS = true;
34     static int tokens = 0;
35
36     static final int CHAR = 0; // Literal char
37
static final int DOT = 11; // .
38
static final int CONCAT = 1; // XY
39
static final int UNION = 2; // X|Y|Z
40
static final int CLOSURE = 3; // X*
41
static final int RANGE = 4; // [a-zA-Z] etc.
42
static final int NRANGE = 5; // [^a-zA-Z] etc.
43
static final int PAREN = 6; // (X) or (?:X)
44
static final int EMPTY = 7; //
45
static final int ANCHOR = 8; // ^ $ \b \B \< \> \A \Z \z
46
static final int NONGREEDYCLOSURE = 9; // *? +?
47
static final int STRING = 10; // strings
48
static final int BACKREFERENCE = 12; // back references
49
static final int LOOKAHEAD = 20; // (?=...)
50
static final int NEGATIVELOOKAHEAD = 21; // (?!...)
51
static final int LOOKBEHIND = 22; // (?<=...)
52
static final int NEGATIVELOOKBEHIND = 23; // (?<!...)
53
static final int INDEPENDENT = 24; // (?>...)
54
static final int MODIFIERGROUP = 25; // (?ims-ims:...)
55
static final int CONDITION = 26; // (?(...)yes|no)
56

57     static final int UTF16_MAX = 0x10ffff;
58
59     int type;
60
61     static Token token_dot;
62     static Token token_0to9;
63     static Token token_wordchars;
64     static Token token_not_0to9;
65     static Token token_not_wordchars;
66     static Token token_spaces;
67     static Token token_not_spaces;
68     static Token token_empty;
69     static Token token_linebeginning;
70     static Token token_linebeginning2;
71     static Token token_lineend;
72     static Token token_stringbeginning;
73     static Token token_stringend;
74     static Token token_stringend2;
75     static Token token_wordedge;
76     static Token token_not_wordedge;
77     static Token token_wordbeginning;
78     static Token token_wordend;
79     static {
80         Token.token_empty = new Token(Token.EMPTY);
81
82         Token.token_linebeginning = Token.createAnchor('^');
83         Token.token_linebeginning2 = Token.createAnchor('@');
84         Token.token_lineend = Token.createAnchor('$');
85         Token.token_stringbeginning = Token.createAnchor('A');
86         Token.token_stringend = Token.createAnchor('z');
87         Token.token_stringend2 = Token.createAnchor('Z');
88         Token.token_wordedge = Token.createAnchor('b');
89         Token.token_not_wordedge = Token.createAnchor('B');
90         Token.token_wordbeginning = Token.createAnchor('<');
91         Token.token_wordend = Token.createAnchor('>');
92
93         Token.token_dot = new Token(Token.DOT);
94
95         Token.token_0to9 = Token.createRange();
96         Token.token_0to9.addRange('0', '9');
97         Token.token_wordchars = Token.createRange();
98         Token.token_wordchars.addRange('0', '9');
99         Token.token_wordchars.addRange('A', 'Z');
100         Token.token_wordchars.addRange('_', '_');
101         Token.token_wordchars.addRange('a', 'z');
102         Token.token_spaces = Token.createRange();
103         Token.token_spaces.addRange('\t', '\t');
104         Token.token_spaces.addRange('\n', '\n');
105         Token.token_spaces.addRange('\f', '\f');
106         Token.token_spaces.addRange('\r', '\r');
107         Token.token_spaces.addRange(' ', ' ');
108
109         Token.token_not_0to9 = Token.complementRanges(Token.token_0to9);
110         Token.token_not_wordchars = Token.complementRanges(Token.token_wordchars);
111         Token.token_not_spaces = Token.complementRanges(Token.token_spaces);
112     }
113
114     static Token.ParenToken createLook(int type, Token child) {
115         if (COUNTTOKENS) Token.tokens ++;
116         return new Token.ParenToken(type, child, 0);
117     }
118     static Token.ParenToken createParen(Token child, int pnumber) {
119         if (COUNTTOKENS) Token.tokens ++;
120         return new Token.ParenToken(Token.PAREN, child, pnumber);
121     }
122     static Token.ClosureToken createClosure(Token tok) {
123         if (COUNTTOKENS) Token.tokens ++;
124         return new Token.ClosureToken(Token.CLOSURE, tok);
125     }
126     static Token.ClosureToken createNGClosure(Token tok) {
127         if (COUNTTOKENS) Token.tokens ++;
128         return new Token.ClosureToken(Token.NONGREEDYCLOSURE, tok);
129     }
130     static Token.ConcatToken createConcat(Token tok1, Token tok2) {
131         if (COUNTTOKENS) Token.tokens ++;
132         return new Token.ConcatToken(tok1, tok2);
133     }
134     static Token.UnionToken createConcat() {
135         if (COUNTTOKENS) Token.tokens ++;
136         return new Token.UnionToken(Token.CONCAT); // *** It is not a bug.
137
}
138     static Token.UnionToken createUnion() {
139         if (COUNTTOKENS) Token.tokens ++;
140         return new Token.UnionToken(Token.UNION);
141     }
142     static Token createEmpty() {
143         return Token.token_empty;
144     }
145     static RangeToken createRange() {
146         if (COUNTTOKENS) Token.tokens ++;
147         return new RangeToken(Token.RANGE);
148     }
149     static RangeToken createNRange() {
150         if (COUNTTOKENS) Token.tokens ++;
151         return new RangeToken(Token.NRANGE);
152     }
153     static Token.CharToken createChar(int ch) {
154         if (COUNTTOKENS) Token.tokens ++;
155         return new Token.CharToken(Token.CHAR, ch);
156     }
157     static private Token.CharToken createAnchor(int ch) {
158         if (COUNTTOKENS) Token.tokens ++;
159         return new Token.CharToken(Token.ANCHOR, ch);
160     }
161     static Token.StringToken createBackReference(int refno) {
162         if (COUNTTOKENS) Token.tokens ++;
163         return new Token.StringToken(Token.BACKREFERENCE, null, refno);
164     }
165     static Token.StringToken createString(String JavaDoc str) {
166         if (COUNTTOKENS) Token.tokens ++;
167         return new Token.StringToken(Token.STRING, str, 0);
168     }
169     static Token.ModifierToken createModifierGroup(Token child, int add, int mask) {
170         if (COUNTTOKENS) Token.tokens ++;
171         return new Token.ModifierToken(child, add, mask);
172     }
173     static Token.ConditionToken createCondition(int refno, Token condition,
174                                                 Token yespat, Token nopat) {
175         if (COUNTTOKENS) Token.tokens ++;
176         return new Token.ConditionToken(refno, condition, yespat, nopat);
177     }
178
179     protected Token(int type) {
180         this.type = type;
181     }
182
183     /**
184      * A number of children.
185      */

186     int size() {
187         return 0;
188     }
189     Token getChild(int index) {
190         return null;
191     }
192     void addChild(Token tok) {
193         throw new RuntimeException JavaDoc("Not supported.");
194     }
195
196                                                 // for RANGE or NRANGE
197
protected void addRange(int start, int end) {
198         throw new RuntimeException JavaDoc("Not supported.");
199     }
200     protected void sortRanges() {
201         throw new RuntimeException JavaDoc("Not supported.");
202     }
203     protected void compactRanges() {
204         throw new RuntimeException JavaDoc("Not supported.");
205     }
206     protected void mergeRanges(Token tok) {
207         throw new RuntimeException JavaDoc("Not supported.");
208     }
209     protected void subtractRanges(Token tok) {
210         throw new RuntimeException JavaDoc("Not supported.");
211     }
212     protected void intersectRanges(Token tok) {
213         throw new RuntimeException JavaDoc("Not supported.");
214     }
215     static Token complementRanges(Token tok) {
216         return RangeToken.complementRanges(tok);
217     }
218
219
220     void setMin(int min) { // for CLOSURE
221
}
222     void setMax(int max) { // for CLOSURE
223
}
224     int getMin() { // for CLOSURE
225
return -1;
226     }
227     int getMax() { // for CLOSURE
228
return -1;
229     }
230     int getReferenceNumber() { // for STRING
231
return 0;
232     }
233     String JavaDoc getString() { // for STRING
234
return null;
235     }
236
237     int getParenNumber() {
238         return 0;
239     }
240     int getChar() {
241         return -1;
242     }
243
244     public String JavaDoc toString() {
245         return this.toString(0);
246     }
247     public String JavaDoc toString(int options) {
248         return this.type == Token.DOT ? "." : "";
249     }
250
251     /**
252      * How many characters are needed?
253      */

254     final int getMinLength() {
255         switch (this.type) {
256           case CONCAT:
257             int sum = 0;
258             for (int i = 0; i < this.size(); i ++)
259                 sum += this.getChild(i).getMinLength();
260             return sum;
261
262           case CONDITION:
263           case UNION:
264             if (this.size() == 0)
265                 return 0;
266             int ret = this.getChild(0).getMinLength();
267             for (int i = 1; i < this.size(); i ++) {
268                 int min = this.getChild(i).getMinLength();
269                 if (min < ret) ret = min;
270             }
271             return ret;
272
273           case CLOSURE:
274           case NONGREEDYCLOSURE:
275             if (this.getMin() >= 0)
276                 return this.getMin() * this.getChild(0).getMinLength();
277             return 0;
278
279           case EMPTY:
280           case ANCHOR:
281             return 0;
282
283           case DOT:
284           case CHAR:
285           case RANGE:
286           case NRANGE:
287             return 1;
288
289           case INDEPENDENT:
290           case PAREN:
291           case MODIFIERGROUP:
292             return this.getChild(0).getMinLength();
293
294           case BACKREFERENCE:
295             return 0; // *******
296

297           case STRING:
298             return this.getString().length();
299
300           case LOOKAHEAD:
301           case NEGATIVELOOKAHEAD:
302           case LOOKBEHIND:
303           case NEGATIVELOOKBEHIND:
304             return 0; // ***** Really?
305

306           default:
307             throw new RuntimeException JavaDoc("Token#getMinLength(): Invalid Type: "+this.type);
308         }
309     }
310
311     final int getMaxLength() {
312         switch (this.type) {
313           case CONCAT:
314             int sum = 0;
315             for (int i = 0; i < this.size(); i ++) {
316                 int d = this.getChild(i).getMaxLength();
317                 if (d < 0) return -1;
318                 sum += d;
319             }
320             return sum;
321
322           case CONDITION:
323           case UNION:
324             if (this.size() == 0)
325                 return 0;
326             int ret = this.getChild(0).getMaxLength();
327             for (int i = 1; ret >= 0 && i < this.size(); i ++) {
328                 int max = this.getChild(i).getMaxLength();
329                 if (max < 0) { // infinity
330
ret = -1;
331                     break;
332                 }
333                 if (max > ret) ret = max;
334             }
335             return ret;
336
337           case CLOSURE:
338           case NONGREEDYCLOSURE:
339             if (this.getMax() >= 0)
340                                                 // When this.child.getMaxLength() < 0,
341
// this returns minus value
342
return this.getMax() * this.getChild(0).getMaxLength();
343             return -1;
344
345           case EMPTY:
346           case ANCHOR:
347             return 0;
348
349           case CHAR:
350             return 1;
351           case DOT:
352           case RANGE:
353           case NRANGE:
354             return 2;
355
356           case INDEPENDENT:
357           case PAREN:
358           case MODIFIERGROUP:
359             return this.getChild(0).getMaxLength();
360
361           case BACKREFERENCE:
362             return -1; // ******
363

364           case STRING:
365             return this.getString().length();
366
367           case LOOKAHEAD:
368           case NEGATIVELOOKAHEAD:
369           case LOOKBEHIND:
370           case NEGATIVELOOKBEHIND:
371             return 0; // ***** Really?
372

373           default:
374             throw new RuntimeException JavaDoc("Token#getMaxLength(): Invalid Type: "+this.type);
375         }
376     }
377
378     static final int FC_CONTINUE = 0;
379     static final int FC_TERMINAL = 1;
380     static final int FC_ANY = 2;
381     private static final boolean isSet(int options, int flag) {
382         return (options & flag) == flag;
383     }
384     final int analyzeFirstCharacter(RangeToken result, int options) {
385         switch (this.type) {
386           case CONCAT:
387             int ret = FC_CONTINUE;
388             for (int i = 0; i < this.size(); i ++)
389                 if ((ret = this.getChild(i).analyzeFirstCharacter(result, options)) != FC_CONTINUE)
390                     break;
391             return ret;
392
393           case UNION:
394             if (this.size() == 0)
395                 return FC_CONTINUE;
396             /*
397              * a|b|c -> FC_TERMINAL
398              * a|.|c -> FC_ANY
399              * a|b| -> FC_CONTINUE
400              */

401             int ret2 = FC_CONTINUE;
402             boolean hasEmpty = false;
403             for (int i = 0; i < this.size(); i ++) {
404                 ret2 = this.getChild(i).analyzeFirstCharacter(result, options);
405                 if (ret2 == FC_ANY)
406                     break;
407                 else if (ret2 == FC_CONTINUE)
408                     hasEmpty = true;
409             }
410             return hasEmpty ? FC_CONTINUE : ret2;
411
412           case CONDITION:
413             int ret3 = this.getChild(0).analyzeFirstCharacter(result, options);
414             if (this.size() == 1) return FC_CONTINUE;
415             if (ret3 == FC_ANY) return ret3;
416             int ret4 = this.getChild(1).analyzeFirstCharacter(result, options);
417             if (ret4 == FC_ANY) return ret4;
418             return ret3 == FC_CONTINUE || ret4 == FC_CONTINUE ? FC_CONTINUE : FC_TERMINAL;
419
420           case CLOSURE:
421           case NONGREEDYCLOSURE:
422             this.getChild(0).analyzeFirstCharacter(result, options);
423             return FC_CONTINUE;
424
425           case EMPTY:
426           case ANCHOR:
427             return FC_CONTINUE;
428
429           case CHAR:
430             int ch = this.getChar();
431             result.addRange(ch, ch);
432             if (ch < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
433                 ch = Character.toUpperCase((char)ch);
434                 result.addRange(ch, ch);
435                 ch = Character.toLowerCase((char)ch);
436                 result.addRange(ch, ch);
437             }
438             return FC_TERMINAL;
439
440           case DOT: // ****
441
if (isSet(options, RegularExpression.SINGLE_LINE)) {
442                 return FC_CONTINUE; // **** We can not optimize.
443
} else {
444                 return FC_CONTINUE;
445                 /*
446                 result.addRange(0, RegularExpression.LINE_FEED-1);
447                 result.addRange(RegularExpression.LINE_FEED+1, RegularExpression.CARRIAGE_RETURN-1);
448                 result.addRange(RegularExpression.CARRIAGE_RETURN+1,
449                                 RegularExpression.LINE_SEPARATOR-1);
450                 result.addRange(RegularExpression.PARAGRAPH_SEPARATOR+1, UTF16_MAX);
451                 return 1;
452                 */

453             }
454
455           case RANGE:
456             if (isSet(options, RegularExpression.IGNORE_CASE)) {
457                 result.mergeRanges(((RangeToken)this).getCaseInsensitiveToken());
458             } else {
459                 result.mergeRanges(this);
460             }
461             return FC_TERMINAL;
462
463           case NRANGE: // ****
464
if (isSet(options, RegularExpression.IGNORE_CASE)) {
465                 result.mergeRanges(Token.complementRanges(((RangeToken)this).getCaseInsensitiveToken()));
466             } else {
467                 result.mergeRanges(Token.complementRanges(this));
468             }
469             return FC_TERMINAL;
470
471           case INDEPENDENT:
472           case PAREN:
473             return this.getChild(0).analyzeFirstCharacter(result, options);
474
475           case MODIFIERGROUP:
476             options |= ((ModifierToken)this).getOptions();
477             options &= ~((ModifierToken)this).getOptionsMask();
478             return this.getChild(0).analyzeFirstCharacter(result, options);
479
480           case BACKREFERENCE:
481             result.addRange(0, UTF16_MAX); // **** We can not optimize.
482
return FC_ANY;
483
484           case STRING:
485             int cha = this.getString().charAt(0);
486             int ch2;
487             if (REUtil.isHighSurrogate(cha)
488                 && this.getString().length() >= 2
489                 && REUtil.isLowSurrogate((ch2 = this.getString().charAt(1))))
490                 cha = REUtil.composeFromSurrogates(cha, ch2);
491             result.addRange(cha, cha);
492             if (cha < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
493                 cha = Character.toUpperCase((char)cha);
494                 result.addRange(cha, cha);
495                 cha = Character.toLowerCase((char)cha);
496                 result.addRange(cha, cha);
497             }
498             return FC_TERMINAL;
499
500           case LOOKAHEAD:
501           case NEGATIVELOOKAHEAD:
502           case LOOKBEHIND:
503           case NEGATIVELOOKBEHIND:
504             return FC_CONTINUE;
505
506           default:
507             throw new RuntimeException JavaDoc("Token#analyzeHeadCharacter(): Invalid Type: "+this.type);
508         }
509     }
510
511     private final boolean isShorterThan(Token tok) {
512         if (tok == null) return false;
513         /*
514         int mylength;
515         if (this.type == STRING) mylength = this.getString().length();
516         else if (this.type == CHAR) mylength = this.getChar() >= 0x10000 ? 2 : 1;
517         else throw new RuntimeException("Internal Error: Illegal type: "+this.type);
518         int otherlength;
519         if (tok.type == STRING) otherlength = tok.getString().length();
520         else if (tok.type == CHAR) otherlength = tok.getChar() >= 0x10000 ? 2 : 1;
521         else throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
522         */

523         int mylength;
524         if (this.type == STRING) mylength = this.getString().length();
525         else throw new RuntimeException JavaDoc("Internal Error: Illegal type: "+this.type);
526         int otherlength;
527         if (tok.type == STRING) otherlength = tok.getString().length();
528         else throw new RuntimeException JavaDoc("Internal Error: Illegal type: "+tok.type);
529         return mylength < otherlength;
530     }
531
532     static class FixedStringContainer {
533         Token token = null;
534         int options = 0;
535         FixedStringContainer() {
536         }
537     }
538
539     final void findFixedString(FixedStringContainer container, int options) {
540         switch (this.type) {
541           case CONCAT:
542             Token prevToken = null;
543             int prevOptions = 0;
544             for (int i = 0; i < this.size(); i ++) {
545                 this.getChild(i).findFixedString(container, options);
546                 if (prevToken == null || prevToken.isShorterThan(container.token)) {
547                     prevToken = container.token;
548                     prevOptions = container.options;
549                 }
550             }
551             container.token = prevToken;
552             container.options = prevOptions;
553             return;
554
555           case UNION:
556           case CLOSURE:
557           case NONGREEDYCLOSURE:
558           case EMPTY:
559           case ANCHOR:
560           case RANGE:
561           case DOT:
562           case NRANGE:
563           case BACKREFERENCE:
564           case LOOKAHEAD:
565           case NEGATIVELOOKAHEAD:
566           case LOOKBEHIND:
567           case NEGATIVELOOKBEHIND:
568           case CONDITION:
569             container.token = null;
570             return;
571
572           case CHAR: // Ignore CHAR tokens.
573
container.token = null; // **
574
return; // **
575

576           case STRING:
577             container.token = this;
578             container.options = options;
579             return;
580
581           case INDEPENDENT:
582           case PAREN:
583             this.getChild(0).findFixedString(container, options);
584             return;
585
586           case MODIFIERGROUP:
587             options |= ((ModifierToken)this).getOptions();
588             options &= ~((ModifierToken)this).getOptionsMask();
589             this.getChild(0).findFixedString(container, options);
590             return;
591
592           default:
593             throw new RuntimeException JavaDoc("Token#findFixedString(): Invalid Type: "+this.type);
594         }
595     }
596
597     boolean match(int ch) {
598         throw new RuntimeException JavaDoc("NFAArrow#match(): Internal error: "+this.type);
599     }
600
601     // ------------------------------------------------------
602
private final static Hashtable JavaDoc categories = new Hashtable JavaDoc();
603     private final static Hashtable JavaDoc categories2 = new Hashtable JavaDoc();
604     private static final String JavaDoc[] categoryNames = {
605         "Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd",
606         "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs",
607         "Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So", // 28
608
"Pi", "Pf", // 29, 30
609
"L", "M", "N", "Z", "C", "P", "S", // 31-37
610
};
611
612     // Schema Rec. {Datatypes} - Punctuation
613
static final int CHAR_INIT_QUOTE = 29; // Pi - initial quote
614
static final int CHAR_FINAL_QUOTE = 30; // Pf - final quote
615
static final int CHAR_LETTER = 31;
616     static final int CHAR_MARK = 32;
617     static final int CHAR_NUMBER = 33;
618     static final int CHAR_SEPARATOR = 34;
619     static final int CHAR_OTHER = 35;
620     static final int CHAR_PUNCTUATION = 36;
621     static final int CHAR_SYMBOL = 37;
622     
623     //blockNames in UNICODE 3.1 that supported by XML Schema REC
624
private static final String JavaDoc[] blockNames = {
625         /*0000..007F;*/ "Basic Latin",
626         /*0080..00FF;*/ "Latin-1 Supplement",
627         /*0100..017F;*/ "Latin Extended-A",
628         /*0180..024F;*/ "Latin Extended-B",
629         /*0250..02AF;*/ "IPA Extensions",
630         /*02B0..02FF;*/ "Spacing Modifier Letters",
631         /*0300..036F;*/ "Combining Diacritical Marks",
632         /*0370..03FF;*/ "Greek",
633         /*0400..04FF;*/ "Cyrillic",
634         /*0530..058F;*/ "Armenian",
635         /*0590..05FF;*/ "Hebrew",
636         /*0600..06FF;*/ "Arabic",
637         /*0700..074F;*/ "Syriac",
638         /*0780..07BF;*/ "Thaana",
639         /*0900..097F;*/ "Devanagari",
640         /*0980..09FF;*/ "Bengali",
641         /*0A00..0A7F;*/ "Gurmukhi",
642         /*0A80..0AFF;*/ "Gujarati",
643         /*0B00..0B7F;*/ "Oriya",
644         /*0B80..0BFF;*/ "Tamil",
645         /*0C00..0C7F;*/ "Telugu",
646         /*0C80..0CFF;*/ "Kannada",
647         /*0D00..0D7F;*/ "Malayalam",
648         /*0D80..0DFF;*/ "Sinhala",
649         /*0E00..0E7F;*/ "Thai",
650         /*0E80..0EFF;*/ "Lao",
651         /*0F00..0FFF;*/ "Tibetan",
652         /*1000..109F;*/ "Myanmar",
653         /*10A0..10FF;*/ "Georgian",
654         /*1100..11FF;*/ "Hangul Jamo",
655         /*1200..137F;*/ "Ethiopic",
656         /*13A0..13FF;*/ "Cherokee",
657         /*1400..167F;*/ "Unified Canadian Aboriginal Syllabics",
658         /*1680..169F;*/ "Ogham",
659         /*16A0..16FF;*/ "Runic",
660         /*1780..17FF;*/ "Khmer",
661         /*1800..18AF;*/ "Mongolian",
662         /*1E00..1EFF;*/ "Latin Extended Additional",
663         /*1F00..1FFF;*/ "Greek Extended",
664         /*2000..206F;*/ "General Punctuation",
665         /*2070..209F;*/ "Superscripts and Subscripts",
666         /*20A0..20CF;*/ "Currency Symbols",
667         /*20D0..20FF;*/ "Combining Marks for Symbols",
668         /*2100..214F;*/ "Letterlike Symbols",
669         /*2150..218F;*/ "Number Forms",
670         /*2190..21FF;*/ "Arrows",
671         /*2200..22FF;*/ "Mathematical Operators",
672         /*2300..23FF;*/ "Miscellaneous Technical",
673         /*2400..243F;*/ "Control Pictures",
674         /*2440..245F;*/ "Optical Character Recognition",
675         /*2460..24FF;*/ "Enclosed Alphanumerics",
676         /*2500..257F;*/ "Box Drawing",
677         /*2580..259F;*/ "Block Elements",
678         /*25A0..25FF;*/ "Geometric Shapes",
679         /*2600..26FF;*/ "Miscellaneous Symbols",
680         /*2700..27BF;*/ "Dingbats",
681         /*2800..28FF;*/ "Braille Patterns",
682         /*2E80..2EFF;*/ "CJK Radicals Supplement",
683         /*2F00..2FDF;*/ "Kangxi Radicals",
684         /*2FF0..2FFF;*/ "Ideographic Description Characters",
685         /*3000..303F;*/ "CJK Symbols and Punctuation",
686         /*3040..309F;*/ "Hiragana",
687         /*30A0..30FF;*/ "Katakana",
688         /*3100..312F;*/ "Bopomofo",
689         /*3130..318F;*/ "Hangul Compatibility Jamo",
690         /*3190..319F;*/ "Kanbun",
691         /*31A0..31BF;*/ "Bopomofo Extended",
692         /*3200..32FF;*/ "Enclosed CJK Letters and Months",
693         /*3300..33FF;*/ "CJK Compatibility",
694         /*3400..4DB5;*/ "CJK Unified Ideographs Extension A",
695         /*4E00..9FFF;*/ "CJK Unified Ideographs",
696         /*A000..A48F;*/ "Yi Syllables",
697         /*A490..A4CF;*/ "Yi Radicals",
698         /*AC00..D7A3;*/ "Hangul Syllables",
699         /*E000..F8FF;*/ "Private Use",
700         /*F900..FAFF;*/ "CJK Compatibility Ideographs",
701         /*FB00..FB4F;*/ "Alphabetic Presentation Forms",
702         /*FB50..FDFF;*/ "Arabic Presentation Forms-A",
703         /*FE20..FE2F;*/ "Combining Half Marks",
704         /*FE30..FE4F;*/ "CJK Compatibility Forms",
705         /*FE50..FE6F;*/ "Small Form Variants",
706         /*FE70..FEFE;*/ "Arabic Presentation Forms-B",
707         /*FEFF..FEFF;*/ "Specials",
708         /*FF00..FFEF;*/ "Halfwidth and Fullwidth Forms",
709          //missing Specials add manually
710
/*10300..1032F;*/ "Old Italic", // 84
711
/*10330..1034F;*/ "Gothic",
712         /*10400..1044F;*/ "Deseret",
713         /*1D000..1D0FF;*/ "Byzantine Musical Symbols",
714         /*1D100..1D1FF;*/ "Musical Symbols",
715         /*1D400..1D7FF;*/ "Mathematical Alphanumeric Symbols",
716         /*20000..2A6D6;*/ "CJK Unified Ideographs Extension B",
717         /*2F800..2FA1F;*/ "CJK Compatibility Ideographs Supplement",
718         /*E0000..E007F;*/ "Tags",
719         //missing 2 private use add manually
720

721     };
722     //ADD THOSE MANUALLY
723
//F0000..FFFFD; "Private Use",
724
//100000..10FFFD; "Private Use"
725
//FFF0..FFFD; "Specials",
726
static final String JavaDoc blockRanges =
727        "\u0000\u007F\u0080\u00FF\u0100\u017F\u0180\u024F\u0250\u02AF\u02B0\u02FF\u0300\u036F"
728         +"\u0370\u03FF\u0400\u04FF\u0530\u058F\u0590\u05FF\u0600\u06FF\u0700\u074F\u0780\u07BF"
729 <