KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > utils > regex > RangeToken


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.utils.regex;
59
60
61 /**
62  * This class represents a character class such as [a-z] or a period.
63  */

64 final class RangeToken extends Token implements java.io.Serializable JavaDoc {
65
66     int[] ranges;
67     boolean sorted;
68     boolean compacted;
69     RangeToken icaseCache = null;
70     int[] map = null;
71     int nonMapIndex;
72
73     RangeToken(int type) {
74         super(type);
75         this.setSorted(false);
76     }
77
78                                                 // for RANGE or NRANGE
79
protected void addRange(int start, int end) {
80         this.icaseCache = null;
81         //System.err.println("Token#addRange(): "+start+" "+end);
82
int r1, r2;
83         if (start <= end) {
84             r1 = start;
85             r2 = end;
86         } else {
87             r1 = end;
88             r2 = start;
89         }
90
91         int pos = 0;
92         if (this.ranges == null) {
93             this.ranges = new int[2];
94             this.ranges[0] = r1;
95             this.ranges[1] = r2;
96             this.setSorted(true);
97         } else {
98             pos = this.ranges.length;
99             if (this.ranges[pos-1]+1 == r1) {
100                 this.ranges[pos-1] = r2;
101                 return;
102             }
103             int[] temp = new int[pos+2];
104             System.arraycopy(this.ranges, 0, temp, 0, pos);
105             this.ranges = temp;
106             if (this.ranges[pos-1] >= r1)
107                 this.setSorted(false);
108             this.ranges[pos++] = r1;
109             this.ranges[pos] = r2;
110             if (!this.sorted)
111                 this.sortRanges();
112         }
113     }
114
115     private final boolean isSorted() {
116         return this.sorted;
117     }
118     private final void setSorted(boolean sort) {
119         this.sorted = sort;
120         if (!sort) this.compacted = false;
121     }
122     private final boolean isCompacted() {
123         return this.compacted;
124     }
125     private final void setCompacted() {
126         this.compacted = true;
127     }
128
129     protected void sortRanges() {
130         if (this.isSorted())
131             return;
132         if (this.ranges == null)
133             return;
134         //System.err.println("Do sorting: "+this.ranges.length);
135

136                                                 // Bubble sort
137
// Why? -- In many cases,
138
// this.ranges has few elements.
139
for (int i = this.ranges.length-4; i >= 0; i -= 2) {
140             for (int j = 0; j <= i; j += 2) {
141                 if (this.ranges[j] > this.ranges[j+2]
142                     || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) {
143                     int tmp;
144                     tmp = this.ranges[j+2];
145                     this.ranges[j+2] = this.ranges[j];
146                     this.ranges[j] = tmp;
147                     tmp = this.ranges[j+3];
148                     this.ranges[j+3] = this.ranges[j+1];
149                     this.ranges[j+1] = tmp;
150                 }
151             }
152         }
153         this.setSorted(true);
154     }
155
156     /**
157      * this.ranges is sorted.
158      */

159     protected void compactRanges() {
160         boolean DEBUG = false;
161         if (this.ranges == null || this.ranges.length <= 2)
162             return;
163         if (this.isCompacted())
164             return;
165         int base = 0; // Index of writing point
166
int target = 0; // Index of processing point
167

168         while (target < this.ranges.length) {
169             if (base != target) {
170                 this.ranges[base] = this.ranges[target++];
171                 this.ranges[base+1] = this.ranges[target++];
172             } else
173                 target += 2;
174             int baseend = this.ranges[base+1];
175             while (target < this.ranges.length) {
176                 if (baseend+1 < this.ranges[target])
177                     break;
178                 if (baseend+1 == this.ranges[target]) {
179                     if (DEBUG)
180                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
181                                            +", "+this.ranges[base+1]
182                                            +"], ["+this.ranges[target]
183                                            +", "+this.ranges[target+1]
184                                            +"] -> ["+this.ranges[base]
185                                            +", "+this.ranges[target+1]
186                                            +"]");
187                     this.ranges[base+1] = this.ranges[target+1];
188                     baseend = this.ranges[base+1];
189                     target += 2;
190                 } else if (baseend >= this.ranges[target+1]) {
191                     if (DEBUG)
192                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
193                                            +", "+this.ranges[base+1]
194                                            +"], ["+this.ranges[target]
195                                            +", "+this.ranges[target+1]
196                                            +"] -> ["+this.ranges[base]
197                                            +", "+this.ranges[base+1]
198                                            +"]");
199                     target += 2;
200                 } else if (baseend < this.ranges[target+1]) {
201                     if (DEBUG)
202                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
203                                            +", "+this.ranges[base+1]
204                                            +"], ["+this.ranges[target]
205                                            +", "+this.ranges[target+1]
206                                            +"] -> ["+this.ranges[base]
207                                            +", "+this.ranges[target+1]
208                                            +"]");
209                     this.ranges[base+1] = this.ranges[target+1];
210                     baseend = this.ranges[base+1];
211                     target += 2;
212                 } else {
213                     throw new RuntimeException JavaDoc("Token#compactRanges(): Internel Error: ["
214                                                +this.ranges[base]
215                                                +","+this.ranges[base+1]
216                                                +"] ["+this.ranges[target]
217                                                +","+this.ranges[target+1]+"]");
218                 }
219             } // while
220
base += 2;
221         }
222
223         if (base != this.ranges.length) {
224             int[] result = new int[base];
225             System.arraycopy(this.ranges, 0, result, 0, base);
226             this.ranges = result;
227         }
228         this.setCompacted();
229     }
230
231     protected void mergeRanges(Token token) {
232         if (token.type != this.type)
233             throw new IllegalArgumentException JavaDoc("Token#mergeRanges(): Mismatched Type: "+token.type);
234         RangeToken tok = (RangeToken)token;
235         this.sortRanges();
236         tok.sortRanges();
237         if (tok.ranges == null)
238             return;
239         this.icaseCache = null;
240         this.setSorted(true);
241         if (this.ranges == null) {
242             this.ranges = new int[tok.ranges.length];
243             System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length);
244             return;
245         }
246         int[] result = new int[this.ranges.length+tok.ranges.length];
247         for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) {
248             if (i >= this.ranges.length) {
249                 result[k++] = tok.ranges[j++];
250                 result[k++] = tok.ranges[j++];
251             } else if (j >= tok.ranges.length) {
252                 result[k++] = this.ranges[i++];
253                 result[k++] = this.ranges[i++];
254             } else if (tok.ranges[j] < this.ranges[i]
255                        || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) {
256                 result[k++] = tok.ranges[j++];
257                 result[k++] = tok.ranges[j++];
258             } else {
259                 result[k++] = this.ranges[i++];
260                 result[k++] = this.ranges[i++];
261             }
262         }
263         this.ranges = result;
264     }
265
266     protected void subtractRanges(Token token) {
267         if (token.type == NRANGE) {
268             this.intersectRanges(token);
269             return;
270         }
271         RangeToken tok = (RangeToken)token;
272         if (tok.ranges == null || this.ranges == null)
273             return;
274         this.icaseCache = null;
275         this.sortRanges();
276         this.compactRanges();
277         tok.sortRanges();
278         tok.compactRanges();
279
280         //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length);
281

282         int[] result = new int[this.ranges.length+tok.ranges.length];
283         int wp = 0, src = 0, sub = 0;
284         while (src < this.ranges.length && sub < tok.ranges.length) {
285             int srcbegin = this.ranges[src];
286             int srcend = this.ranges[src+1];
287             int subbegin = tok.ranges[sub];
288             int subend = tok.ranges[sub+1];
289             if (srcend < subbegin) { // Not overlapped
290
// src: o-----o
291
// sub: o-----o
292
// res: o-----o
293
// Reuse sub
294
result[wp++] = this.ranges[src++];
295                 result[wp++] = this.ranges[src++];
296             } else if (srcend >= subbegin
297                        && srcbegin <= subend) { // Overlapped
298
// src: o--------o
299
// sub: o----o
300
// sub: o----o
301
// sub: o----o
302
// sub: o------------o
303
if (subbegin <= srcbegin && srcend <= subend) {
304                                                 // src: o--------o
305
// sub: o------------o
306
// res: empty
307
// Reuse sub
308
src += 2;
309                 } else if (subbegin <= srcbegin) {
310                                                 // src: o--------o
311
// sub: o----o
312
// res: o-----o
313
// Reuse src(=res)
314
this.ranges[src] = subend+1;
315                     sub += 2;
316                 } else if (srcend <= subend) {
317                                                 // src: o--------o
318
// sub: o----o
319
// res: o-----o
320
// Reuse sub
321
result[wp++] = srcbegin;
322                     result[wp++] = subbegin-1;
323                     src += 2;
324                 } else {
325                                                 // src: o--------o
326
// sub: o----o
327
// res: o-o o-o
328
// Reuse src(=right res)
329
result[wp++] = srcbegin;
330                     result[wp++] = subbegin-1;
331                     this.ranges[src] = subend+1;
332                     sub += 2;
333                 }
334             } else if (subend < srcbegin) {
335                                                 // Not overlapped
336
// src: o-----o
337
// sub: o----o
338
sub += 2;
339             } else {
340                 throw new RuntimeException JavaDoc("Token#subtractRanges(): Internal Error: ["+this.ranges[src]
341                                            +","+this.ranges[src+1]
342                                            +"] - ["+tok.ranges[sub]
343                                            +","+tok.ranges[sub+1]
344                                            +"]");
345             }
346         }
347         while (src < this.ranges.length) {
348             result[wp++] = this.ranges[src++];
349             result[wp++] = this.ranges[src++];
350         }
351         this.ranges = new int[wp];
352         System.arraycopy(result, 0, this.ranges, 0, wp);
353                                                 // this.ranges is sorted and compacted.
354
}
355
356     /**
357      * @param tok Ignore whether it is NRANGE or not.
358      */

359     protected void intersectRanges(Token token) {
360         RangeToken tok = (RangeToken)token;
361         if (tok.ranges == null || this.ranges == null)
362             return;
363         this.icaseCache = null;
364         this.sortRanges();
365         this.compactRanges();
366         tok.sortRanges();
367         tok.compactRanges();
368
369         int[] result = new int[this.ranges.length+tok.ranges.length];
370         int wp = 0, src1 = 0, src2 = 0;
371         while (src1 < this.ranges.length && src2 < tok.ranges.length) {
372             int src1begin = this.ranges[src1];
373             int src1end = this.ranges[src1+1];
374             int src2begin = tok.ranges[src2];
375             int src2end = tok.ranges[src2+1];
376             if (src1end < src2begin) { // Not overlapped
377
// src1: o-----o
378
// src2: o-----o
379
// res: empty
380
// Reuse src2
381
src1 += 2;
382             } else if (src1end >= src2begin
383                        && src1begin <= src2end) { // Overlapped
384
// src1: o--------o
385
// src2: o----o
386
// src2: o----o
387
// src2: o----o
388
// src2: o------------o
389
if (src2begin <= src2begin && src1end <= src2end) {
390                                                 // src1: o--------o
391
// src2: o------------o
392
// res: o--------o
393
// Reuse src2
394
result[wp++] = src1begin;
395                     result[wp++] = src1end;
396                     src1 += 2;
397                 } else if (src2begin <= src1begin) {
398                                                 // src1: o--------o
399
// src2: o----o
400
// res: o--o
401
// Reuse the rest of src1
402
result[wp++] = src1begin;
403                     result[wp++] = src2end;
404                     this.ranges[src1] = src2end+1;
405                     src2 += 2;
406                 } else if (src1end <= src2end) {
407                                                 // src1: o--------o
408
// src2: o----o
409
// res: o--o
410
// Reuse src2
411
result[wp++] = src2begin;
412                     result[wp++] = src1end;
413                     src1 += 2;
414                 } else {
415                                                 // src1: o--------o
416
// src2: o----o
417
// res: o----o
418
// Reuse the rest of src1
419
result[wp++] = src2begin;
420                     result[wp++] = src2end;
421                     this.ranges[src1] = src2end+1;
422                 }
423             } else if (src2end < src1begin) {
424                                                 // Not overlapped
425
// src1: o-----o
426
// src2: o----o
427
src2 += 2;
428             } else {
429                 throw new RuntimeException JavaDoc("Token#intersectRanges(): Internal Error: ["
430                                            +this.ranges[src1]
431                                            +","+this.ranges[src1+1]
432                                            +"] & ["+tok.ranges[src2]
433                                            +","+tok.ranges[src2+1]
434                                            +"]");
435             }
436         }
437         while (src1 < this.ranges.length) {
438             result[wp++] = this.ranges[src1++];
439             result[wp++] = this.ranges[src1++];
440         }
441         this.ranges = new int[wp];
442         System.arraycopy(result, 0, this.ranges, 0, wp);
443                                                 // this.ranges is sorted and compacted.
444
}
445
446     /**
447      * for RANGE: Creates complement.
448      * for NRANGE: Creates the same meaning RANGE.
449      */

450     static Token complementRanges(Token token) {
451         if (token.type != RANGE && token.type != NRANGE)
452             throw new IllegalArgumentException JavaDoc("Token#complementRanges(): must be RANGE: "+token.type);
453         RangeToken tok = (RangeToken)token;
454         tok.sortRanges();
455         tok.compactRanges();
456         int len = tok.ranges.length+2;
457         if (tok.ranges[0] == 0)
458             len -= 2;
459         int last = tok.ranges[tok.ranges.length-1];
460         if (last == UTF16_MAX)
461             len -= 2;
462         RangeToken ret = Token.createRange();
463         ret.ranges = new int[len];
464         int wp = 0;
465         if (tok.ranges[0] > 0) {
466             ret.ranges[wp++] = 0;
467             ret.ranges[wp++] = tok.ranges[0]-1;
468         }
469         for (int i = 1; i < tok.ranges.length-2; i += 2) {
470             ret.ranges[wp++] = tok.ranges[i]+1;
471             ret.ranges[wp++] = tok.ranges[i+1]-1;
472         }
473         if (last != UTF16_MAX) {
474             ret.ranges[wp++] = last+1;
475             ret.ranges[wp] = UTF16_MAX;
476         }
477         ret.setCompacted();
478         return ret;
479     }
480
481     synchronized RangeToken getCaseInsensitiveToken() {
482         if (this.icaseCache != null)
483             return this.icaseCache;
484             
485         RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
486         for (int i = 0; i < this.ranges.length; i += 2) {
487             for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) {
488                 if (ch > 0xffff)
489                     uppers.addRange(ch, ch);
490                 else {
491                     char uch = Character.toUpperCase((char)ch);
492                     uppers.addRange(uch, uch);
493                 }
494             }
495         }
496         RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
497         for (int i = 0; i < uppers.ranges.length; i += 2) {
498             for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) {
499                 if (ch > 0xffff)
500                     lowers.addRange(ch, ch);
501                 else {
502                     char uch = Character.toUpperCase((char)ch);
503                     lowers.addRange(uch, uch);
504                 }
505             }
506         }
507         lowers.mergeRanges(uppers);
508         lowers.mergeRanges(this);
509         lowers.compactRanges();
510
511         this.icaseCache = lowers;
512         return lowers;
513     }
514
515     void dumpRanges() {
516         System.err.print("RANGE: ");
517         if (this.ranges == null)
518             System.err.println(" NULL");
519         for (int i = 0; i < this.ranges.length; i += 2) {
520             System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] ");
521         }
522         System.err.println("");
523     }
524
525     boolean match(int ch) {
526         if (this.map == null) this.createMap();
527         boolean ret;
528         if (this.type == RANGE) {
529             if (ch < MAPSIZE)
530                 return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
531             ret = false;
532             for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
533                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
534                     return true;
535             }
536         } else {
537             if (ch < MAPSIZE)
538                 return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
539             ret = true;
540             for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
541                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
542                     return false;
543             }
544         }
545         return ret;
546     }
547
548     private static final int MAPSIZE = 256;
549     private void createMap() {
550         int asize = MAPSIZE/32; // 32 is the number of bits in `int'.
551
this.map = new int[asize];
552         this.nonMapIndex = this.ranges.length;
553         for (int i = 0; i < asize; i ++) this.map[i] = 0;
554         for (int i = 0; i < this.ranges.length; i += 2) {
555             int s = this.ranges[i];
556             int e = this.ranges[i+1];
557             if (s < MAPSIZE) {
558                 for (int j = s; j <= e && j < MAPSIZE; j ++)
559                     this.map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
560
} else {
561                 this.nonMapIndex = i;
562                 break;
563             }
564             if (e >= MAPSIZE) {
565                 this.nonMapIndex = i;
566                 break;
567             }
568         }
569         //for (int i = 0; i < asize; i ++) System.err.println("Map: "+Integer.toString(this.map[i], 16));
570
}
571
572     public String JavaDoc toString(int options) {
573         String JavaDoc ret;
574         if (this.type == RANGE) {
575             if (this == Token.token_dot)
576                 ret = ".";
577             else if (this == Token.token_0to9)
578                 ret = "\\d";
579             else if (this == Token.token_wordchars)
580                 ret = "\\w";
581             else if (this == Token.token_spaces)
582                 ret = "\\s";
583             else {
584                 StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
585                 sb.append("[");
586                 for (int i = 0; i < this.ranges.length; i += 2) {
587                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
588                     if (this.ranges[i] == this.ranges[i+1]) {
589                         sb.append(escapeCharInCharClass(this.ranges[i]));
590                     } else {
591                         sb.append(escapeCharInCharClass(this.ranges[i]));
592                         sb.append((char)'-');
593                         sb.append(escapeCharInCharClass(this.ranges[i+1]));
594                     }
595                 }
596                 sb.append("]");
597                 ret = sb.toString();
598             }
599         } else {
600             if (this == Token.token_not_0to9)
601                 ret = "\\D";
602             else if (this == Token.token_not_wordchars)
603                 ret = "\\W";
604             else if (this == Token.token_not_spaces)
605                 ret = "\\S";
606             else {
607                 StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
608                 sb.append("[^");
609                 for (int i = 0; i < this.ranges.length; i += 2) {
610                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
611                     if (this.ranges[i] == this.ranges[i+1]) {
612                         sb.append(escapeCharInCharClass(this.ranges[i]));
613                     } else {
614                         sb.append(escapeCharInCharClass(this.ranges[i]));
615                         sb.append('-');
616                         sb.append(escapeCharInCharClass(this.ranges[i+1]));
617                     }
618                 }
619                 sb.append("]");
620                 ret = sb.toString();
621             }
622         }
623         return ret;
624     }
625
626     private static String JavaDoc escapeCharInCharClass(int ch) {
627         String JavaDoc ret;
628         switch (ch) {
629           case '[': case ']': case '-': case '^':
630           case ',': case '\\':
631             ret = "\\"+(char)ch;
632             break;
633           case '\f': ret = "\\f"; break;
634           case '\n': ret = "\\n"; break;
635           case '\r': ret = "\\r"; break;
636           case '\t': ret = "\\t"; break;
637           case 0x1b: ret = "\\e"; break;
638           //case 0x0b: ret = "\\v"; break;
639
default:
640             if (ch < 0x20) {
641                 String JavaDoc pre = "0"+Integer.toHexString(ch);
642                 ret = "\\x"+pre.substring(pre.length()-2, pre.length());
643             } else if (ch >= 0x10000) {
644                 String JavaDoc pre = "0"+Integer.toHexString(ch);
645                 ret = "\\v"+pre.substring(pre.length()-6, pre.length());
646             } else
647                 ret = ""+(char)ch;
648         }
649         return ret;
650     }
651
652 }
653
Popular Tags