KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > JFlex > IntCharSet


1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * JFlex 1.4.1 *
3  * Copyright (C) 1998-2004 Gerwin Klein <lsf@jflex.de> *
4  * All rights reserved. *
5  * *
6  * This program is free software; you can redistribute it and/or modify *
7  * it under the terms of the GNU General Public License. See the file *
8  * COPYRIGHT for more information. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License along *
16  * with this program; if not, write to the Free Software Foundation, Inc., *
17  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
18  * *
19  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

20
21 package JFlex;
22
23 import java.util.*;
24
25
26 /**
27  * CharSet implemented with intervalls
28  *
29  * [fixme: optimizations possible]
30  *
31  * @author Gerwin Klein
32  * @version JFlex 1.4.1, $Revision: 2.9 $, $Date: 2004/11/07 00:12:48 $
33  */

34 public final class IntCharSet {
35
36   private final static boolean DEBUG = false;
37
38   /* invariant: all intervals are disjoint, ordered */
39   private Vector intervalls;
40   private int pos;
41
42   public IntCharSet() {
43     this.intervalls = new Vector();
44   }
45
46   public IntCharSet(char c) {
47     this(new Interval(c,c));
48   }
49
50   public IntCharSet(Interval intervall) {
51     this();
52     intervalls.addElement(intervall);
53   }
54
55   public IntCharSet(Vector /* Interval */ chars) {
56     int size = chars.size();
57
58     this.intervalls = new Vector(size);
59     
60     for (int i = 0; i < size; i++)
61       add( (Interval) chars.elementAt(i) );
62   }
63
64   
65   
66
67   /**
68    * returns the index of the intervall that contains
69    * the character c, -1 if there is no such intevall
70    *
71    * @prec: true
72    * @post: -1 <= return < intervalls.size() &&
73    * (return > -1 --> intervalls[return].contains(c))
74    *
75    * @param c the character
76    * @return the index of the enclosing interval, -1 if no such interval
77    */

78   private int indexOf(char c) {
79     int start = 0;
80     int end = intervalls.size()-1;
81
82     while (start <= end) {
83       int check = (start+end) / 2;
84       Interval i = (Interval) intervalls.elementAt(check);
85       
86       if (start == end)
87         return i.contains(c) ? start : -1;
88
89       if (c < i.start) {
90         end = check-1;
91         continue;
92       }
93
94       if (c > i.end) {
95         start = check+1;
96         continue;
97       }
98
99       return check;
100     }
101
102     return -1;
103   }
104
105   public IntCharSet add(IntCharSet set) {
106     for (int i = 0; i < set.intervalls.size(); i++)
107       add( (Interval) set.intervalls.elementAt(i) );
108     return this;
109   }
110
111   public void add(Interval intervall) {
112     
113     int size = intervalls.size();
114
115     for (int i = 0; i < size; i++) {
116       Interval elem = (Interval) intervalls.elementAt(i);
117
118       if ( elem.end+1 < intervall.start ) continue;
119       
120       if ( elem.contains(intervall) ) return;
121
122       if ( elem.start > intervall.end+1 ) {
123         intervalls.insertElementAt(new Interval(intervall), i);
124         return;
125       }
126       
127       if (intervall.start < elem.start)
128         elem.start = intervall.start;
129       
130       if (intervall.end <= elem.end)
131         return;
132         
133       elem.end = intervall.end;
134         
135       i++;
136       // delete all x with x.contains( intervall.end )
137
while (i < size) {
138         Interval x = (Interval) intervalls.elementAt(i);
139         if (x.start > elem.end+1) return;
140         
141         elem.end = x.end;
142         intervalls.removeElementAt(i);
143         size--;
144       }
145       return;
146     }
147
148     intervalls.addElement(new Interval(intervall));
149   }
150
151   public void add(char c) {
152     int size = intervalls.size();
153
154     for (int i = 0; i < size; i++) {
155       Interval elem = (Interval) intervalls.elementAt(i);
156       if (elem.end+1 < c) continue;
157
158       if (elem.contains(c)) return; // already there, nothing to do
159

160       // assert(elem.end+1 >= c && (elem.start > c || elem.end < c));
161

162       if (elem.start > c+1) {
163         intervalls.insertElementAt(new Interval(c,c), i);
164         return;
165       }
166
167       // assert(elem.end+1 >= c && elem.start <= c+1 && (elem.start > c || elem.end < c));
168

169       if (c+1 == elem.start) {
170         elem.start = c;
171         return;
172       }
173       
174       // assert(elem.end+1 == c);
175
elem.end = c;
176
177       // merge with next interval if it contains c
178
if (i+1 >= size) return;
179       Interval x = (Interval) intervalls.elementAt(i+1);
180       if (x.start <= c+1) {
181         elem.end = x.end;
182         intervalls.removeElementAt(i+1);
183       }
184       return;
185     }
186     
187     // end reached but nothing found -> append at end
188
intervalls.addElement(new Interval(c,c));
189   }
190
191  
192   public boolean contains(char singleChar) {
193     return indexOf(singleChar) >= 0;
194   }
195
196
197   /**
198    * o instanceof Interval
199    */

200   public boolean equals(Object JavaDoc o) {
201     IntCharSet set = (IntCharSet) o;
202     if ( intervalls.size() != set.intervalls.size() ) return false;
203
204     for (int i = 0; i < intervalls.size(); i++) {
205       if ( !intervalls.elementAt(i).equals( set.intervalls.elementAt(i)) )
206         return false;
207     }
208
209     return true;
210   }
211
212   private char min(char a, char b) {
213     return a <= b ? a : b;
214   }
215
216   private char max(char a, char b) {
217     return a >= b ? a : b;
218   }
219
220   /* intersection */
221   public IntCharSet and(IntCharSet set) {
222     if (DEBUG) {
223       Out.dump("intersection");
224       Out.dump("this : "+this);
225       Out.dump("other : "+set);
226     }
227
228     IntCharSet result = new IntCharSet();
229
230     int i = 0; // index in this.intervalls
231
int j = 0; // index in set.intervalls
232

233     int size = intervalls.size();
234     int setSize = set.intervalls.size();
235
236     while (i < size && j < setSize) {
237       Interval x = (Interval) this.intervalls.elementAt(i);
238       Interval y = (Interval) set.intervalls.elementAt(j);
239
240       if (x.end < y.start) {
241         i++;
242         continue;
243       }
244
245       if (y.end < x.start) {
246         j++;
247         continue;
248       }
249
250       result.intervalls.addElement(
251         new Interval(
252           max(x.start, y.start),
253           min(x.end, y.end)
254           )
255         );
256
257       if (x.end >= y.end) j++;
258       if (y.end >= x.end) i++;
259     }
260
261     if (DEBUG) {
262       Out.dump("result: "+result);
263     }
264
265     return result;
266   }
267   
268   /* complement */
269   /* prec: this.contains(set), set != null */
270   public void sub(IntCharSet set) {
271     if (DEBUG) {
272       Out.dump("complement");
273       Out.dump("this : "+this);
274       Out.dump("other : "+set);
275     }
276
277     int i = 0; // index in this.intervalls
278
int j = 0; // index in set.intervalls
279

280     int setSize = set.intervalls.size();
281
282     while (i < intervalls.size() && j < setSize) {
283       Interval x = (Interval) this.intervalls.elementAt(i);
284       Interval y = (Interval) set.intervalls.elementAt(j);
285
286       if (DEBUG) {
287         Out.dump("this : "+this);
288         Out.dump("this ["+i+"] : "+x);
289         Out.dump("other ["+j+"] : "+y);
290       }
291
292       if (x.end < y.start) {
293         i++;
294         continue;
295       }
296
297       if (y.end < x.start) {
298         j++;
299         continue;
300       }
301
302       // x.end >= y.start && y.end >= x.start ->
303
// x.end <= y.end && x.start >= y.start (prec)
304

305       if ( x.start == y.start && x.end == y.end ) {
306         intervalls.removeElementAt(i);
307         j++;
308         continue;
309       }
310
311       // x.end <= y.end && x.start >= y.start &&
312
// (x.end < y.end || x.start > y.start) ->
313
// x.start < x.end
314

315       if ( x.start == y.start ) {
316         x.start = (char) (y.end+1);
317         j++;
318         continue;
319       }
320
321       if ( x.end == y.end ) {
322         x.end = (char) (y.start-1);
323         i++;
324         j++;
325         continue;
326       }
327
328       intervalls.insertElementAt(new Interval(x.start, (char) (y.start-1)), i);
329       x.start = (char) (y.end+1);
330
331       i++;
332       j++;
333     }
334
335     if (DEBUG) {
336       Out.dump("result: "+this);
337     }
338   }
339
340   public boolean containsElements() {
341     return intervalls.size() > 0;
342   }
343
344   public int numIntervalls() {
345     return intervalls.size();
346   }
347
348   // beware: depends on caller protocol, single user only
349
public Interval getNext() {
350     if (pos == intervalls.size()) pos = 0;
351     return (Interval) intervalls.elementAt(pos++);
352   }
353
354   /**
355    * Create a caseless version of this charset.
356    * <p>
357    * The caseless version contains all characters of this char set,
358    * and additionally all lower/upper/title case variants of the
359    * characters in this set.
360    *
361    * @return a caseless copy of this set
362    */

363   public IntCharSet getCaseless() {
364     IntCharSet n = copy();
365         
366     int size = intervalls.size();
367     for (int i=0; i < size; i++) {
368       Interval elem = (Interval) intervalls.elementAt(i);
369       for (char c = elem.start; c <= elem.end; c++) {
370         n.add(Character.toLowerCase(c));
371         n.add(Character.toUpperCase(c));
372         n.add(Character.toTitleCase(c));
373       }
374     }
375     
376     return n;
377   }
378
379
380   /**
381    * Make a string representation of this char set.
382    *
383    * @return a string representing this char set.
384    */

385   public String JavaDoc toString() {
386     StringBuffer JavaDoc result = new StringBuffer JavaDoc("{ ");
387
388     for (int i = 0; i < intervalls.size(); i++)
389       result.append( intervalls.elementAt(i) );
390
391     result.append(" }");
392
393     return result.toString();
394   }
395   
396   
397   /**
398    * Return a (deep) copy of this char set
399    *
400    * @return the copy
401    */

402   public IntCharSet copy() {
403     IntCharSet result = new IntCharSet();
404     int size = intervalls.size();
405     for (int i=0; i < size; i++) {
406       Interval iv = ((Interval) intervalls.elementAt(i)).copy();
407       result.intervalls.addElement(iv);
408     }
409     return result;
410   }
411 }
412
Popular Tags