KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > impl > xpath > regex > RangeToken


1 /*
2  * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.impl.xpath.regex;
18
19 /**
20  * This class represents a character class such as [a-z] or a period.
21  *
22  * @xerces.internal
23  *
24  * @version $Id: RangeToken.java,v 1.7 2005/03/22 03:23:06 mrglavas Exp $
25  */

26 final class RangeToken extends Token implements java.io.Serializable JavaDoc {
27
28     private static final long serialVersionUID = 3257568399592010545L;
29     
30     int[] ranges;
31     boolean sorted;
32     boolean compacted;
33     RangeToken icaseCache = null;
34     int[] map = null;
35     int nonMapIndex;
36
37     RangeToken(int type) {
38         super(type);
39         this.setSorted(false);
40     }
41
42                                                 // for RANGE or NRANGE
43
protected void addRange(int start, int end) {
44         this.icaseCache = null;
45         //System.err.println("Token#addRange(): "+start+" "+end);
46
int r1, r2;
47         if (start <= end) {
48             r1 = start;
49             r2 = end;
50         } else {
51             r1 = end;
52             r2 = start;
53         }
54
55         int pos = 0;
56         if (this.ranges == null) {
57             this.ranges = new int[2];
58             this.ranges[0] = r1;
59             this.ranges[1] = r2;
60             this.setSorted(true);
61         } else {
62             pos = this.ranges.length;
63             if (this.ranges[pos-1]+1 == r1) {
64                 this.ranges[pos-1] = r2;
65                 return;
66             }
67             int[] temp = new int[pos+2];
68             System.arraycopy(this.ranges, 0, temp, 0, pos);
69             this.ranges = temp;
70             if (this.ranges[pos-1] >= r1)
71                 this.setSorted(false);
72             this.ranges[pos++] = r1;
73             this.ranges[pos] = r2;
74             if (!this.sorted)
75                 this.sortRanges();
76         }
77     }
78
79     private final boolean isSorted() {
80         return this.sorted;
81     }
82     private final void setSorted(boolean sort) {
83         this.sorted = sort;
84         if (!sort) this.compacted = false;
85     }
86     private final boolean isCompacted() {
87         return this.compacted;
88     }
89     private final void setCompacted() {
90         this.compacted = true;
91     }
92
93     protected void sortRanges() {
94         if (this.isSorted())
95             return;
96         if (this.ranges == null)
97             return;
98         //System.err.println("Do sorting: "+this.ranges.length);
99

100                                                 // Bubble sort
101
// Why? -- In many cases,
102
// this.ranges has few elements.
103
for (int i = this.ranges.length-4; i >= 0; i -= 2) {
104             for (int j = 0; j <= i; j += 2) {
105                 if (this.ranges[j] > this.ranges[j+2]
106                     || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) {
107                     int tmp;
108                     tmp = this.ranges[j+2];
109                     this.ranges[j+2] = this.ranges[j];
110                     this.ranges[j] = tmp;
111                     tmp = this.ranges[j+3];
112                     this.ranges[j+3] = this.ranges[j+1];
113                     this.ranges[j+1] = tmp;
114                 }
115             }
116         }
117         this.setSorted(true);
118     }
119
120     /**
121      * this.ranges is sorted.
122      */

123     protected void compactRanges() {
124         boolean DEBUG = false;
125         if (this.ranges == null || this.ranges.length <= 2)
126             return;
127         if (this.isCompacted())
128             return;
129         int base = 0; // Index of writing point
130
int target = 0; // Index of processing point
131

132         while (target < this.ranges.length) {
133             if (base != target) {
134                 this.ranges[base] = this.ranges[target++];
135                 this.ranges[base+1] = this.ranges[target++];
136             } else
137                 target += 2;
138             int baseend = this.ranges[base+1];
139             while (target < this.ranges.length) {
140                 if (baseend+1 < this.ranges[target])
141                     break;
142                 if (baseend+1 == this.ranges[target]) {
143                     if (DEBUG)
144                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
145                                            +", "+this.ranges[base+1]
146                                            +"], ["+this.ranges[target]
147                                            +", "+this.ranges[target+1]
148                                            +"] -> ["+this.ranges[base]
149                                            +", "+this.ranges[target+1]
150                                            +"]");
151                     this.ranges[base+1] = this.ranges[target+1];
152                     baseend = this.ranges[base+1];
153                     target += 2;
154                 } else if (baseend >= this.ranges[target+1]) {
155                     if (DEBUG)
156                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
157                                            +", "+this.ranges[base+1]
158                                            +"], ["+this.ranges[target]
159                                            +", "+this.ranges[target+1]
160                                            +"] -> ["+this.ranges[base]
161                                            +", "+this.ranges[base+1]
162                                            +"]");
163                     target += 2;
164                 } else if (baseend < this.ranges[target+1]) {
165                     if (DEBUG)
166                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
167                                            +", "+this.ranges[base+1]
168                                            +"], ["+this.ranges[target]
169                                            +", "+this.ranges[target+1]
170                                            +"] -> ["+this.ranges[base]
171                                            +", "+this.ranges[target+1]
172                                            +"]");
173                     this.ranges[base+1] = this.ranges[target+1];
174                     baseend = this.ranges[base+1];
175                     target += 2;
176                 } else {
177                     throw new RuntimeException JavaDoc("Token#compactRanges(): Internel Error: ["
178                                                +this.ranges[base]
179                                                +","+this.ranges[base+1]
180                                                +"] ["+this.ranges[target]
181                                                +","+this.ranges[target+1]+"]");
182                 }
183             } // while
184
base += 2;
185         }
186
187         if (base != this.ranges.length) {
188             int[] result = new int[base];
189             System.arraycopy(this.ranges, 0, result, 0, base);
190             this.ranges = result;
191         }
192         this.setCompacted();
193     }
194
195     protected void mergeRanges(Token token) {
196         RangeToken tok = (RangeToken)token;
197         this.sortRanges();
198         tok.sortRanges();
199         if (tok.ranges == null)
200             return;
201         this.icaseCache = null;
202         this.setSorted(true);
203         if (this.ranges == null) {
204             this.ranges = new int[tok.ranges.length];
205             System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length);
206             return;
207         }
208         int[] result = new int[this.ranges.length+tok.ranges.length];
209         for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) {
210             if (i >= this.ranges.length) {
211                 result[k++] = tok.ranges[j++];
212                 result[k++] = tok.ranges[j++];
213             } else if (j >= tok.ranges.length) {
214                 result[k++] = this.ranges[i++];
215                 result[k++] = this.ranges[i++];
216             } else if (tok.ranges[j] < this.ranges[i]
217                        || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) {
218                 result[k++] = tok.ranges[j++];
219                 result[k++] = tok.ranges[j++];
220             } else {
221                 result[k++] = this.ranges[i++];
222                 result[k++] = this.ranges[i++];
223             }
224         }
225         this.ranges = result;
226     }
227
228     protected void subtractRanges(Token token) {
229         if (token.type == NRANGE) {
230             this.intersectRanges(token);
231             return;
232         }
233         RangeToken tok = (RangeToken)token;
234         if (tok.ranges == null || this.ranges == null)
235             return;
236         this.icaseCache = null;
237         this.sortRanges();
238         this.compactRanges();
239         tok.sortRanges();
240         tok.compactRanges();
241
242         //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length);
243

244         int[] result = new int[this.ranges.length+tok.ranges.length];
245         int wp = 0, src = 0, sub = 0;
246         while (src < this.ranges.length && sub < tok.ranges.length) {
247             int srcbegin = this.ranges[src];
248             int srcend = this.ranges[src+1];
249             int subbegin = tok.ranges[sub];
250             int subend = tok.ranges[sub+1];
251             if (srcend < subbegin) { // Not overlapped
252
// src: o-----o
253
// sub: o-----o
254
// res: o-----o
255
// Reuse sub
256
result[wp++] = this.ranges[src++];
257                 result[wp++] = this.ranges[src++];
258             } else if (srcend >= subbegin
259                        && srcbegin <= subend) { // Overlapped
260
// src: o--------o
261
// sub: o----o
262
// sub: o----o
263
// sub: o----o
264
// sub: o------------o
265
if (subbegin <= srcbegin && srcend <= subend) {
266                                                 // src: o--------o
267
// sub: o------------o
268
// res: empty
269
// Reuse sub
270
src += 2;
271                 } else if (subbegin <= srcbegin) {
272                                                 // src: o--------o
273
// sub: o----o
274
// res: o-----o
275
// Reuse src(=res)
276
this.ranges[src] = subend+1;
277                     sub += 2;
278                 } else if (srcend <= subend) {
279                                                 // src: o--------o
280
// sub: o----o
281
// res: o-----o
282
// Reuse sub
283
result[wp++] = srcbegin;
284                     result[wp++] = subbegin-1;
285                     src += 2;
286                 } else {
287                                                 // src: o--------o
288
// sub: o----o
289
// res: o-o o-o
290
// Reuse src(=right res)
291
result[wp++] = srcbegin;
292                     result[wp++] = subbegin-1;
293                     this.ranges[src] = subend+1;
294                     sub += 2;
295                 }
296             } else if (subend < srcbegin) {
297                                                 // Not overlapped
298
// src: o-----o
299
// sub: o----o
300
sub += 2;
301             } else {
302                 throw new RuntimeException JavaDoc("Token#subtractRanges(): Internal Error: ["+this.ranges[src]
303                                            +","+this.ranges[src+1]
304                                            +"] - ["+tok.ranges[sub]
305                                            +","+tok.ranges[sub+1]
306                                            +"]");
307             }
308         }
309         while (src < this.ranges.length) {
310             result[wp++] = this.ranges[src++];
311             result[wp++] = this.ranges[src++];
312         }
313         this.ranges = new int[wp];
314         System.arraycopy(result, 0, this.ranges, 0, wp);
315                                                 // this.ranges is sorted and compacted.
316
}
317
318     /**
319      * @param tok Ignore whether it is NRANGE or not.
320      */

321     protected void intersectRanges(Token token) {
322         RangeToken tok = (RangeToken)token;
323         if (tok.ranges == null || this.ranges == null)
324             return;
325         this.icaseCache = null;
326         this.sortRanges();
327         this.compactRanges();
328         tok.sortRanges();
329         tok.compactRanges();
330
331         int[] result = new int[this.ranges.length+tok.ranges.length];
332         int wp = 0, src1 = 0, src2 = 0;
333         while (src1 < this.ranges.length && src2 < tok.ranges.length) {
334             int src1begin = this.ranges[src1];
335             int src1end = this.ranges[src1+1];
336             int src2begin = tok.ranges[src2];
337             int src2end = tok.ranges[src2+1];
338             if (src1end < src2begin) { // Not overlapped
339
// src1: o-----o
340
// src2: o-----o
341
// res: empty
342
// Reuse src2
343
src1 += 2;
344             } else if (src1end >= src2begin
345                        && src1begin <= src2end) { // Overlapped
346
// src1: o--------o
347
// src2: o----o
348
// src2: o----o
349
// src2: o----o
350
// src2: o------------o
351
if (src2begin <= src2begin && src1end <= src2end) {
352                                                 // src1: o--------o
353
// src2: o------------o
354
// res: o--------o
355
// Reuse src2
356
result[wp++] = src1begin;
357                     result[wp++] = src1end;
358                     src1 += 2;
359                 } else if (src2begin <= src1begin) {
360                                                 // src1: o--------o
361
// src2: o----o
362
// res: o--o
363
// Reuse the rest of src1
364
result[wp++] = src1begin;
365                     result[wp++] = src2end;
366                     this.ranges[src1] = src2end+1;
367                     src2 += 2;
368                 } else if (src1end <= src2end) {
369                                                 // src1: o--------o
370
// src2: o----o
371
// res: o--o
372
// Reuse src2
373
result[wp++] = src2begin;
374                     result[wp++] = src1end;
375                     src1 += 2;
376                 } else {
377                                                 // src1: o--------o
378
// src2: o----o
379
// res: o----o
380
// Reuse the rest of src1
381
result[wp++] = src2begin;
382                     result[wp++] = src2end;
383                     this.ranges[src1] = src2end+1;
384                 }
385             } else if (src2end < src1begin) {
386                                                 // Not overlapped
387
// src1: o-----o
388
// src2: o----o
389
src2 += 2;
390             } else {
391                 throw new RuntimeException JavaDoc("Token#intersectRanges(): Internal Error: ["
392                                            +this.ranges[src1]
393                                            +","+this.ranges[src1+1]
394                                            +"] & ["+tok.ranges[src2]
395                                            +","+tok.ranges[src2+1]
396                                            +"]");
397             }
398         }
399         while (src1 < this.ranges.length) {
400             result[wp++] = this.ranges[src1++];
401             result[wp++] = this.ranges[src1++];
402         }
403         this.ranges = new int[wp];
404         System.arraycopy(result, 0, this.ranges, 0, wp);
405                                                 // this.ranges is sorted and compacted.
406
}
407
408     /**
409      * for RANGE: Creates complement.
410      * for NRANGE: Creates the same meaning RANGE.
411      */

412     static Token complementRanges(Token token) {
413         if (token.type != RANGE && token.type != NRANGE)
414             throw new IllegalArgumentException JavaDoc("Token#complementRanges(): must be RANGE: "+token.type);
415         RangeToken tok = (RangeToken)token;
416         tok.sortRanges();
417         tok.compactRanges();
418         int len = tok.ranges.length+2;
419         if (tok.ranges[0] == 0)
420             len -= 2;
421         int last = tok.ranges[tok.ranges.length-1];
422         if (last == UTF16_MAX)
423             len -= 2;
424         RangeToken ret = Token.createRange();
425         ret.ranges = new int[len];
426         int wp = 0;
427         if (tok.ranges[0] > 0) {
428             ret.ranges[wp++] = 0;
429             ret.ranges[wp++] = tok.ranges[0]-1;
430         }
431         for (int i = 1; i < tok.ranges.length-2; i += 2) {
432             ret.ranges[wp++] = tok.ranges[i]+1;
433             ret.ranges[wp++] = tok.ranges[i+1]-1;
434         }
435         if (last != UTF16_MAX) {
436             ret.ranges[wp++] = last+1;
437             ret.ranges[wp] = UTF16_MAX;
438         }
439         ret.setCompacted();
440         return ret;
441     }
442
443     synchronized RangeToken getCaseInsensitiveToken() {
444         if (this.icaseCache != null)
445             return this.icaseCache;
446             
447         RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
448         for (int i = 0; i < this.ranges.length; i += 2) {
449             for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) {
450                 if (ch > 0xffff)
451                     uppers.addRange(ch, ch);
452                 else {
453                     char uch = Character.toUpperCase((char)ch);
454                     uppers.addRange(uch, uch);
455                 }
456             }
457         }
458         RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
459         for (int i = 0; i < uppers.ranges.length; i += 2) {
460             for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) {
461                 if (ch > 0xffff)
462                     lowers.addRange(ch, ch);
463                 else {
464                     char uch = Character.toUpperCase((char)ch);
465                     lowers.addRange(uch, uch);
466                 }
467             }
468         }
469         lowers.mergeRanges(uppers);
470         lowers.mergeRanges(this);
471         lowers.compactRanges();
472
473         this.icaseCache = lowers;
474         return lowers;
475     }
476
477     void dumpRanges() {
478         System.err.print("RANGE: ");
479         if (this.ranges == null)
480             System.err.println(" NULL");
481         for (int i = 0; i < this.ranges.length; i += 2) {
482             System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] ");
483         }
484         System.err.println("");
485     }
486
487     boolean match(int ch) {
488         if (this.map == null) this.createMap();
489         boolean ret;
490         if (this.type == RANGE) {
491             if (ch < MAPSIZE)
492                 return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
493             ret = false;
494             for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
495                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
496                     return true;
497             }
498         } else {
499             if (ch < MAPSIZE)
500                 return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
501             ret = true;
502             for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
503                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
504                     return false;
505             }
506         }
507         return ret;
508     }
509
510     private static final int MAPSIZE = 256;
511     private void createMap() {
512         int asize = MAPSIZE/32; // 32 is the number of bits in `int'.
513
this.map = new int[asize];
514         this.nonMapIndex = this.ranges.length;
515         for (int i = 0; i < asize; i ++) this.map[i] = 0;
516         for (int i = 0; i < this.ranges.length; i += 2) {
517             int s = this.ranges[i];
518             int e = this.ranges[i+1];
519             if (s < MAPSIZE) {
520                 for (int j = s; j <= e && j < MAPSIZE; j ++)
521                     this.map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
522
} else {
523                 this.nonMapIndex = i;
524                 break;
525             }
526             if (e >= MAPSIZE) {
527                 this.nonMapIndex = i;
528                 break;
529             }
530         }
531         //for (int i = 0; i < asize; i ++) System.err.println("Map: "+Integer.toString(this.map[i], 16));
532
}
533
534     public String JavaDoc toString(int options) {
535         String JavaDoc ret;
536         if (this.type == RANGE) {
537             if (this == Token.token_dot)
538                 ret = ".";
539             else if (this == Token.token_0to9)
540                 ret = "\\d";
541             else if (this == Token.token_wordchars)
542                 ret = "\\w";
543             else if (this == Token.token_spaces)
544                 ret = "\\s";
545             else {
546                 StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
547                 sb.append("[");
548                 for (int i = 0; i < this.ranges.length; i += 2) {
549                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
550                     if (this.ranges[i] == this.ranges[i+1]) {
551                         sb.append(escapeCharInCharClass(this.ranges[i]));
552                     } else {
553                         sb.append(escapeCharInCharClass(this.ranges[i]));
554                         sb.append((char)'-');
555                         sb.append(escapeCharInCharClass(this.ranges[i+1]));
556                     }
557                 }
558                 sb.append("]");
559                 ret = sb.toString();
560             }
561         } else {
562             if (this == Token.token_not_0to9)
563                 ret = "\\D";
564             else if (this == Token.token_not_wordchars)
565                 ret = "\\W";
566             else if (this == Token.token_not_spaces)
567                 ret = "\\S";
568             else {
569                 StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
570                 sb.append("[^");
571                 for (int i = 0; i < this.ranges.length; i += 2) {
572                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
573                     if (this.ranges[i] == this.ranges[i+1]) {
574                         sb.append(escapeCharInCharClass(this.ranges[i]));
575                     } else {
576                         sb.append(escapeCharInCharClass(this.ranges[i]));
577                         sb.append('-');
578                         sb.append(escapeCharInCharClass(this.ranges[i+1]));
579                     }
580                 }
581                 sb.append("]");
582                 ret = sb.toString();
583             }
584         }
585         return ret;
586     }
587
588     private static String JavaDoc escapeCharInCharClass(int ch) {
589         String JavaDoc ret;
590         switch (ch) {
591           case '[': case ']': case '-': case '^':
592           case ',': case '\\':
593             ret = "\\"+(char)ch;
594             break;
595           case '\f': ret = "\\f"; break;
596           case '\n': ret = "\\n"; break;
597           case '\r': ret = "\\r"; break;
598           case '\t': ret = "\\t"; break;
599           case 0x1b: ret = "\\e"; break;
600           //case 0x0b: ret = "\\v"; break;
601
default:
602             if (ch < 0x20) {
603                 String JavaDoc pre = "0"+Integer.toHexString(ch);
604                 ret = "\\x"+pre.substring(pre.length()-2, pre.length());
605             } else if (ch >= 0x10000) {
606                 String JavaDoc pre = "0"+Integer.toHexString(ch);
607                 ret = "\\v"+pre.substring(pre.length()-6, pre.length());
608             } else
609                 ret = ""+(char)ch;
610         }
611         return ret;
612     }
613
614 }
615
Popular Tags