KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xerces > internal > impl > xpath > regex > RangeToken


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package com.sun.org.apache.xerces.internal.impl.xpath.regex;
59
60 /**
61  * This class represents a character class such as [a-z] or a period.
62  *
63  * @version $Id: RangeToken.java,v 1.4 2002/08/09 15:18:17 neilg Exp $
64  */

65 final class RangeToken extends Token implements java.io.Serializable JavaDoc {
66
67     int[] ranges;
68     boolean sorted;
69     boolean compacted;
70     RangeToken icaseCache = null;
71     int[] map = null;
72     int nonMapIndex;
73
74     RangeToken(int type) {
75         super(type);
76         this.setSorted(false);
77     }
78
79                                                 // for RANGE or NRANGE
80
protected void addRange(int start, int end) {
81         this.icaseCache = null;
82         //System.err.println("Token#addRange(): "+start+" "+end);
83
int r1, r2;
84         if (start <= end) {
85             r1 = start;
86             r2 = end;
87         } else {
88             r1 = end;
89             r2 = start;
90         }
91
92         int pos = 0;
93         if (this.ranges == null) {
94             this.ranges = new int[2];
95             this.ranges[0] = r1;
96             this.ranges[1] = r2;
97             this.setSorted(true);
98         } else {
99             pos = this.ranges.length;
100             if (this.ranges[pos-1]+1 == r1) {
101                 this.ranges[pos-1] = r2;
102                 return;
103             }
104             int[] temp = new int[pos+2];
105             System.arraycopy(this.ranges, 0, temp, 0, pos);
106             this.ranges = temp;
107             if (this.ranges[pos-1] >= r1)
108                 this.setSorted(false);
109             this.ranges[pos++] = r1;
110             this.ranges[pos] = r2;
111             if (!this.sorted)
112                 this.sortRanges();
113         }
114     }
115
116     private final boolean isSorted() {
117         return this.sorted;
118     }
119     private final void setSorted(boolean sort) {
120         this.sorted = sort;
121         if (!sort) this.compacted = false;
122     }
123     private final boolean isCompacted() {
124         return this.compacted;
125     }
126     private final void setCompacted() {
127         this.compacted = true;
128     }
129
130     protected void sortRanges() {
131         if (this.isSorted())
132             return;
133         if (this.ranges == null)
134             return;
135         //System.err.println("Do sorting: "+this.ranges.length);
136

137                                                 // Bubble sort
138
// Why? -- In many cases,
139
// this.ranges has few elements.
140
for (int i = this.ranges.length-4; i >= 0; i -= 2) {
141             for (int j = 0; j <= i; j += 2) {
142                 if (this.ranges[j] > this.ranges[j+2]
143                     || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) {
144                     int tmp;
145                     tmp = this.ranges[j+2];
146                     this.ranges[j+2] = this.ranges[j];
147                     this.ranges[j] = tmp;
148                     tmp = this.ranges[j+3];
149                     this.ranges[j+3] = this.ranges[j+1];
150                     this.ranges[j+1] = tmp;
151                 }
152             }
153         }
154         this.setSorted(true);
155     }
156
157     /**
158      * this.ranges is sorted.
159      */

160     protected void compactRanges() {
161         boolean DEBUG = false;
162         if (this.ranges == null || this.ranges.length <= 2)
163             return;
164         if (this.isCompacted())
165             return;
166         int base = 0; // Index of writing point
167
int target = 0; // Index of processing point
168

169         while (target < this.ranges.length) {
170             if (base != target) {
171                 this.ranges[base] = this.ranges[target++];
172                 this.ranges[base+1] = this.ranges[target++];
173             } else
174                 target += 2;
175             int baseend = this.ranges[base+1];
176             while (target < this.ranges.length) {
177                 if (baseend+1 < this.ranges[target])
178                     break;
179                 if (baseend+1 == this.ranges[target]) {
180                     if (DEBUG)
181                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
182                                            +", "+this.ranges[base+1]
183                                            +"], ["+this.ranges[target]
184                                            +", "+this.ranges[target+1]
185                                            +"] -> ["+this.ranges[base]
186                                            +", "+this.ranges[target+1]
187                                            +"]");
188                     this.ranges[base+1] = this.ranges[target+1];
189                     baseend = this.ranges[base+1];
190                     target += 2;
191                 } else if (baseend >= this.ranges[target+1]) {
192                     if (DEBUG)
193                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
194                                            +", "+this.ranges[base+1]
195                                            +"], ["+this.ranges[target]
196                                            +", "+this.ranges[target+1]
197                                            +"] -> ["+this.ranges[base]
198                                            +", "+this.ranges[base+1]
199                                            +"]");
200                     target += 2;
201                 } else if (baseend < this.ranges[target+1]) {
202                     if (DEBUG)
203                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
204                                            +", "+this.ranges[base+1]
205                                            +"], ["+this.ranges[target]
206                                            +", "+this.ranges[target+1]
207                                            +"] -> ["+this.ranges[base]
208                                            +", "+this.ranges[target+1]
209                                            +"]");
210                     this.ranges[base+1] = this.ranges[target+1];
211                     baseend = this.ranges[base+1];
212                     target += 2;
213                 } else {
214                     throw new RuntimeException JavaDoc("Token#compactRanges(): Internel Error: ["
215                                                +this.ranges[base]
216                                                +","+this.ranges[base+1]
217                                                +"] ["+this.ranges[target]
218                                                +","+this.ranges[target+1]+"]");
219                 }
220             } // while
221
base += 2;
222         }
223
224         if (base != this.ranges.length) {
225             int[] result = new int[base];
226             System.arraycopy(this.ranges, 0, result, 0, base);
227             this.ranges = result;
228         }
229         this.setCompacted();
230     }
231
232     protected void mergeRanges(Token token) {
233         RangeToken tok = (RangeToken)token;
234         this.sortRanges();
235         tok.sortRanges();
236         if (tok.ranges == null)
237             return;
238         this.icaseCache = null;
239         this.setSorted(true);
240         if (this.ranges == null) {
241             this.ranges = new int[tok.ranges.length];
242             System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length);
243             return;
244         }
245         int[] result = new int[this.ranges.length+tok.ranges.length];
246         for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) {
247             if (i >= this.ranges.length) {
248                 result[k++] = tok.ranges[j++];
249                 result[k++] = tok.ranges[j++];
250             } else if (j >= tok.ranges.length) {
251                 result[k++] = this.ranges[i++];
252                 result[k++] = this.ranges[i++];
253             } else if (tok.ranges[j] < this.ranges[i]
254                        || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) {
255                 result[k++] = tok.ranges[j++];
256                 result[k++] = tok.ranges[j++];
257             } else {
258                 result[k++] = this.ranges[i++];
259                 result[k++] = this.ranges[i++];
260             }
261         }
262         this.ranges = result;
263     }
264
265     protected void subtractRanges(Token token) {
266         if (token.type == NRANGE) {
267             this.intersectRanges(token);
268             return;
269         }
270         RangeToken tok = (RangeToken)token;
271         if (tok.ranges == null || this.ranges == null)
272             return;
273         this.icaseCache = null;
274         this.sortRanges();
275         this.compactRanges();
276         tok.sortRanges();
277         tok.compactRanges();
278
279         //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length);
280

281         int[] result = new int[this.ranges.length+tok.ranges.length];
282         int wp = 0, src = 0, sub = 0;
283         while (src < this.ranges.length && sub < tok.ranges.length) {
284             int srcbegin = this.ranges[src];
285             int srcend = this.ranges[src+1];
286             int subbegin = tok.ranges[sub];
287             int subend = tok.ranges[sub+1];
288             if (srcend < subbegin) { // Not overlapped
289
// src: o-----o
290
// sub: o-----o
291
// res: o-----o
292
// Reuse sub
293
result[wp++] = this.ranges[src++];
294                 result[wp++] = this.ranges[src++];
295             } else if (srcend >= subbegin
296                        && srcbegin <= subend) { // Overlapped
297
// src: o--------o
298
// sub: o----o
299
// sub: o----o
300
// sub: o----o
301
// sub: o------------o
302
if (subbegin <= srcbegin && srcend <= subend) {
303                                                 // src: o--------o
304
// sub: o------------o
305
// res: empty
306
// Reuse sub
307
src += 2;
308                 } else if (subbegin <= srcbegin) {
309                                                 // src: o--------o
310
// sub: o----o
311
// res: o-----o
312
// Reuse src(=res)
313
this.ranges[src] = subend+1;
314                     sub += 2;
315                 } else if (srcend <= subend) {
316                                                 // src: o--------o
317
// sub: o----o
318
// res: o-----o
319
// Reuse sub
320
result[wp++] = srcbegin;
321                     result[wp++] = subbegin-1;
322                     src += 2;
323                 } else {
324                                                 // src: o--------o
325
// sub: o----o
326
// res: o-o o-o
327
// Reuse src(=right res)
328
result[wp++] = srcbegin;
329                     result[wp++] = subbegin-1;
330                     this.ranges[src] = subend+1;
331                     sub += 2;
332                 }
333             } else if (subend < srcbegin) {
334                                                 // Not overlapped
335
// src: o-----o
336
// sub: o----o
337
sub += 2;
338             } else {
339                 throw new RuntimeException JavaDoc("Token#subtractRanges(): Internal Error: ["+this.ranges[src]
340                                            +","+this.ranges[src+1]
341                                            +"] - ["+tok.ranges[sub]
342                                            +","+tok.ranges[sub+1]
343                                            +"]");
344             }
345         }
346         while (src < this.ranges.length) {
347             result[wp++] = this.ranges[src++];
348             result[wp++] = this.ranges[src++];
349         }
350         this.ranges = new int[wp];
351         System.arraycopy(result, 0, this.ranges, 0, wp);
352                                                 // this.ranges is sorted and compacted.
353
}
354
355     /**
356      * @param tok Ignore whether it is NRANGE or not.
357      */

358     protected void intersectRanges(Token token) {
359         RangeToken tok = (RangeToken)token;
360         if (tok.ranges == null || this.ranges == null)
361             return;
362         this.icaseCache = null;
363         this.sortRanges();
364         this.compactRanges();
365         tok.sortRanges();
366         tok.compactRanges();
367
368         int[] result = new int[this.ranges.length+tok.ranges.length];
369         int wp = 0, src1 = 0, src2 = 0;
370         while (src1 < this.ranges.length && src2 < tok.ranges.length) {
371             int src1begin = this.ranges[src1];
372             int src1end = this.ranges[src1+1];
373             int src2begin = tok.ranges[src2];
374             int src2end = tok.ranges[src2+1];
375             if (src1end < src2begin) { // Not overlapped
376
// src1: o-----o
377
// src2: o-----o
378
// res: empty
379
// Reuse src2
380
src1 += 2;
381             } else if (src1end >= src2begin
382                        && src1begin <= src2end) { // Overlapped
383
// src1: o--------o
384
// src2: o----o
385
// src2: o----o
386
// src2: o----o
387
// src2: o------------o
388
if (src2begin <= src2begin && src1end <= src2end) {
389                                                 // src1: o--------o
390
// src2: o------------o
391
// res: o--------o
392
// Reuse src2
393
result[wp++] = src1begin;
394                     result[wp++] = src1end;
395                     src1 += 2;
396                 } else if (src2begin <= src1begin) {
397                                                 // src1: o--------o
398
// src2: o----o
399
// res: o--o
400
// Reuse the rest of src1
401
result[wp++] = src1begin;
402                     result[wp++] = src2end;
403                     this.ranges[src1] = src2end+1;
404                     src2 += 2;
405                 } else if (src1end <= src2end) {
406                                                 // src1: o--------o
407
// src2: o----o
408
// res: o--o
409
// Reuse src2
410
result[wp++] = src2begin;
411                     result[wp++] = src1end;
412                     src1 += 2;
413                 } else {
414                                                 // src1: o--------o
415
// src2: o----o
416
// res: o----o
417
// Reuse the rest of src1
418
result[wp++] = src2begin;
419                     result[wp++] = src2end;
420                     this.ranges[src1] = src2end+1;
421                 }
422             } else if (src2end < src1begin) {
423                                                 // Not overlapped
424
// src1: o-----o
425
// src2: o----o
426
src2 += 2;
427             } else {
428                 throw new RuntimeException JavaDoc("Token#intersectRanges(): Internal Error: ["
429                                            +this.ranges[src1]
430                                            +","+this.ranges[src1+1]
431                                            +"] & ["+tok.ranges[src2]
432                                            +","+tok.ranges[src2+1]
433                                            +"]");
434             }
435         }
436         while (src1 < this.ranges.length) {
437             result[wp++] = this.ranges[src1++];
438             result[wp++] = this.ranges[src1++];
439         }
440         this.ranges = new int[wp];
441         System.arraycopy(result, 0, this.ranges, 0, wp);
442                                                 // this.ranges is sorted and compacted.
443
}
444
445     /**
446      * for RANGE: Creates complement.
447      * for NRANGE: Creates the same meaning RANGE.
448      */

449     static Token complementRanges(Token token) {
450         if (token.type != RANGE && token.type != NRANGE)
451             throw new IllegalArgumentException JavaDoc("Token#complementRanges(): must be RANGE: "+token.type);
452         RangeToken tok = (RangeToken)token;
453         tok.sortRanges();
454         tok.compactRanges();
455         int len = tok.ranges.length+2;
456         if (tok.ranges[0] == 0)
457             len -= 2;
458         int last = tok.ranges[tok.ranges.length-1];
459         if (last == UTF16_MAX)
460             len -= 2;
461         RangeToken ret = Token.createRange();
462         ret.ranges = new int[len];
463         int wp = 0;
464         if (tok.ranges[0] > 0) {
465             ret.ranges[wp++] = 0;
466             ret.ranges[wp++] = tok.ranges[0]-1;
467         }
468         for (int i = 1; i < tok.ranges.length-2; i += 2) {
469             ret.ranges[wp++] = tok.ranges[i]+1;
470             ret.ranges[wp++] = tok.ranges[i+1]-1;
471         }
472         if (last != UTF16_MAX) {
473             ret.ranges[wp++] = last+1;
474             ret.ranges[wp] = UTF16_MAX;
475         }
476         ret.setCompacted();
477         return ret;
478     }
479
480     synchronized RangeToken getCaseInsensitiveToken() {
481         if (this.icaseCache != null)
482             return this.icaseCache;
483             
484         RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
485         for (int i = 0; i < this.ranges.length; i += 2) {
486             for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) {
487                 if (ch > 0xffff)
488                     uppers.addRange(ch, ch);
489                 else {
490                     char uch = Character.toUpperCase((char)ch);
491                     uppers.addRange(uch, uch);
492                 }
493             }
494         }
495         RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
496         for (int i = 0; i < uppers.ranges.length; i += 2) {
497             for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) {
498                 if (ch > 0xffff)
499                     lowers.addRange(ch, ch);
500                 else {
501                     char uch = Character.toUpperCase((char)ch);
502                     lowers.addRange(uch, uch);
503                 }
504             }
505         }
506         lowers.mergeRanges(uppers);
507         lowers.mergeRanges(this);
508         lowers.compactRanges();
509
510         this.icaseCache = lowers;
511         return lowers;
512     }
513
514     void dumpRanges() {
515         System.err.print("RANGE: ");
516         if (this.ranges == null)
517             System.err.println(" NULL");
518         for (int i = 0; i < this.ranges.length; i += 2) {
519             System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] ");
520         }
521         System.err.println("");
522     }
523
524     boolean match(int ch) {
525         if (this.map == null) this.createMap();
526         boolean ret;
527         if (this.type == RANGE) {
528             if (ch < MAPSIZE)
529                 return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
530             ret = false;
531             for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
532                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
533                     return true;
534             }
535         } else {
536             if (ch < MAPSIZE)
537                 return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
538             ret = true;
539             for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
540                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
541                     return false;
542             }
543         }
544         return ret;
545     }
546
547     private static final int MAPSIZE = 256;
548     private void createMap() {
549         int asize = MAPSIZE/32; // 32 is the number of bits in `int'.
550
this.map = new int[asize];
551         this.nonMapIndex = this.ranges.length;
552         for (int i = 0; i < asize; i ++) this.map[i] = 0;
553         for (int i = 0; i < this.ranges.length; i += 2) {
554             int s = this.ranges[i];
555             int e = this.ranges[i+1];
556             if (s < MAPSIZE) {
557                 for (int j = s; j <= e && j < MAPSIZE; j ++)
558                     this.map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
559
} else {
560                 this.nonMapIndex = i;
561                 break;
562             }
563             if (e >= MAPSIZE) {
564                 this.nonMapIndex = i;
565                 break;
566             }
567         }
568         //for (int i = 0; i < asize; i ++) System.err.println("Map: "+Integer.toString(this.map[i], 16));
569
}
570
571     public String JavaDoc toString(int options) {
572         String JavaDoc ret;
573         if (this.type == RANGE) {
574             if (this == Token.token_dot)
575                 ret = ".";
576             else if (this == Token.token_0to9)
577                 ret = "\\d";
578             else if (this == Token.token_wordchars)
579                 ret = "\\w";
580             else if (this == Token.token_spaces)
581                 ret = "\\s";
582             else {
583                 StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
584                 sb.append("[");
585                 for (int i = 0; i < this.ranges.length; i += 2) {
586                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
587                     if (this.ranges[i] == this.ranges[i+1]) {
588                         sb.append(escapeCharInCharClass(this.ranges[i]));
589                     } else {
590                         sb.append(escapeCharInCharClass(this.ranges[i]));
591                         sb.append((char)'-');
592                         sb.append(escapeCharInCharClass(this.ranges[i+1]));
593                     }
594                 }
595                 sb.append("]");
596                 ret = sb.toString();
597             }
598         } else {
599             if (this == Token.token_not_0to9)
600                 ret = "\\D";
601             else if (this == Token.token_not_wordchars)
602                 ret = "\\W";
603             else if (this == Token.token_not_spaces)
604                 ret = "\\S";
605             else {
606                 StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
607                 sb.append("[^");
608                 for (int i = 0; i < this.ranges.length; i += 2) {
609                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
610                     if (this.ranges[i] == this.ranges[i+1]) {
611                         sb.append(escapeCharInCharClass(this.ranges[i]));
612                     } else {
613                         sb.append(escapeCharInCharClass(this.ranges[i]));
614                         sb.append('-');
615                         sb.append(escapeCharInCharClass(this.ranges[i+1]));
616                     }
617                 }
618                 sb.append("]");
619                 ret = sb.toString();
620             }
621         }
622         return ret;
623     }
624
625     private static String JavaDoc escapeCharInCharClass(int ch) {
626         String JavaDoc ret;
627         switch (ch) {
628           case '[': case ']': case '-': case '^':
629           case ',': case '\\':
630             ret = "\\"+(char)ch;
631             break;
632           case '\f': ret = "\\f"; break;
633           case '\n': ret = "\\n"; break;
634           case '\r': ret = "\\r"; break;
635           case '\t': ret = "\\t"; break;
636           case 0x1b: ret = "\\e"; break;
637           //case 0x0b: ret = "\\v"; break;
638
default:
639             if (ch < 0x20) {
640                 String JavaDoc pre = "0"+Integer.toHexString(ch);
641                 ret = "\\x"+pre.substring(pre.length()-2, pre.length());
642             } else if (ch >= 0x10000) {
643                 String JavaDoc pre = "0"+Integer.toHexString(ch);
644                 ret = "\\v"+pre.substring(pre.length()-6, pre.length());
645             } else
646                 ret = ""+(char)ch;
647         }
648         return ret;
649     }
650
651 }
652
Popular Tags