KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > JFlex > CharClasses


1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * JFlex 1.4.1 *
3  * Copyright (C) 1998-2004 Gerwin Klein <lsf@jflex.de> *
4  * All rights reserved. *
5  * *
6  * This program is free software; you can redistribute it and/or modify *
7  * it under the terms of the GNU General Public License. See the file *
8  * COPYRIGHT for more information. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License along *
16  * with this program; if not, write to the Free Software Foundation, Inc., *
17  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
18  * *
19  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

20
21 package JFlex;
22
23 import java.util.*;
24
25
26 /**
27  *
28  * @author Gerwin Klein
29  * @version JFlex 1.4.1, $Revision: 2.6 $, $Date: 2004/11/06 23:03:32 $
30  */

31 public class CharClasses {
32
33   /** debug flag (for char classes only) */
34   private static final boolean DEBUG = false;
35
36   /** the largest character that can be used in char classes */
37   public static final char maxChar = '\uFFFF';
38
39   /** the char classes */
40   private Vector /* of IntCharSet */ classes;
41
42   /** the largest character actually used in a specification */
43   private char maxCharUsed;
44
45   /**
46    * Constructs a new CharClass object that provides space for
47    * classes of characters from 0 to maxCharCode.
48    *
49    * Initially all characters are in class 0.
50    *
51    * @param maxCharCode the last character code to be
52    * considered. (127 for 7bit Lexers,
53    * 255 for 8bit Lexers and 0xFFFF
54    * for Unicode Lexers).
55    */

56   public CharClasses(int maxCharCode) {
57     if (maxCharCode < 0 || maxCharCode > 0xFFFF)
58       throw new IllegalArgumentException JavaDoc();
59
60     maxCharUsed = (char) maxCharCode;
61
62     classes = new Vector();
63     classes.addElement(new IntCharSet(new Interval((char) 0, maxChar)));
64   }
65
66
67   /**
68    * Returns the greatest Unicode value of the current input character set.
69    */

70   public char getMaxCharCode() {
71     return maxCharUsed;
72   }
73   
74
75   /**
76    * Sets the larges Unicode value of the current input character set.
77    *
78    * @param charCode the largest character code, used for the scanner
79    * (i.e. %7bit, %8bit, %16bit etc.)
80    */

81   public void setMaxCharCode(int charCode) {
82     if (charCode < 0 || charCode > 0xFFFF)
83       throw new IllegalArgumentException JavaDoc();
84
85     maxCharUsed = (char) charCode;
86   }
87   
88
89   /**
90    * Returns the current number of character classes.
91    */

92   public int getNumClasses() {
93     return classes.size();
94   }
95
96
97
98   /**
99    * Updates the current partition, so that the specified set of characters
100    * gets a new character class.
101    *
102    * Characters that are elements of <code>set</code> are not in the same
103    * equivalence class with characters that are not elements of <code>set</code>.
104    *
105    * @param set the set of characters to distinguish from the rest
106    * @param caseless if true upper/lower/title case are considered equivalent
107    */

108   public void makeClass(IntCharSet set, boolean caseless) {
109     if (caseless) set = set.getCaseless();
110     
111     if ( DEBUG ) {
112       Out.dump("makeClass("+set+")");
113       dump();
114     }
115
116     int oldSize = classes.size();
117     for (int i = 0; i < oldSize; i++) {
118       IntCharSet x = (IntCharSet) classes.elementAt(i);
119
120       if (x.equals(set)) return;
121
122       IntCharSet and = x.and(set);
123
124       if ( and.containsElements() ) {
125         if ( x.equals(and) ) {
126           set.sub(and);
127           continue;
128         }
129         else if ( set.equals(and) ) {
130           x.sub(and);
131           classes.addElement(and);
132           if (DEBUG) {
133             Out.dump("makeClass(..) finished");
134             dump();
135           }
136           return;
137         }
138
139         set.sub(and);
140         x.sub(and);
141         classes.addElement(and);
142       }
143     }
144     
145     if (DEBUG) {
146       Out.dump("makeClass(..) finished");
147       dump();
148     }
149   }
150   
151
152   /**
153    * Returns the code of the character class the specified character belongs to.
154    */

155   public int getClassCode(char letter) {
156     int i = -1;
157     while (true) {
158       IntCharSet x = (IntCharSet) classes.elementAt(++i);
159       if ( x.contains(letter) ) return i;
160     }
161   }
162
163   /**
164    * Dump charclasses to the dump output stream
165    */

166   public void dump() {
167     Out.dump(toString());
168   }
169
170   
171   /**
172    * Return a string representation of one char class
173    *
174    * @param theClass the index of the class to
175    */

176   public String JavaDoc toString(int theClass) {
177     return classes.elementAt(theClass).toString();
178   }
179
180
181   /**
182    * Return a string representation of the char classes
183    * stored in this class.
184    *
185    * Enumerates the classes by index.
186    */

187   public String JavaDoc toString() {
188     StringBuffer JavaDoc result = new StringBuffer JavaDoc("CharClasses:");
189
190     result.append(Out.NL);
191
192     for (int i = 0; i < classes.size(); i++)
193       result.append("class "+i+":"+Out.NL+classes.elementAt(i)+Out.NL);
194     
195     return result.toString();
196   }
197
198   
199   /**
200    * Creates a new character class for the single character <code>singleChar</code>.
201    *
202    * @param caseless if true upper/lower/title case are considered equivalent
203    */

204   public void makeClass(char singleChar, boolean caseless) {
205     makeClass(new IntCharSet(singleChar), caseless);
206   }
207
208
209   /**
210    * Creates a new character class for each character of the specified String.
211    *
212    * @param caseless if true upper/lower/title case are considered equivalent
213    */

214   public void makeClass(String JavaDoc str, boolean caseless) {
215     for (int i = 0; i < str.length(); i++) makeClass(str.charAt(i), caseless);
216   }
217
218
219   /**
220    * Updates the current partition, so that the specified set of characters
221    * gets a new character class.
222    *
223    * Characters that are elements of the set <code>v</code> are not in the same
224    * equivalence class with characters that are not elements of the set <code>v</code>.
225    *
226    * @param v a Vector of Interval objects.
227    * This Vector represents a set of characters. The set of characters is
228    * the union of all intervalls in the Vector.
229    *
230    * @param caseless if true upper/lower/title case are considered equivalent
231    */

232   public void makeClass(Vector /* Interval */ v, boolean caseless) {
233     makeClass(new IntCharSet(v), caseless);
234   }
235   
236
237   /**
238    * Updates the current partition, so that the set of all characters not contained in the specified
239    * set of characters gets a new character class.
240    *
241    * Characters that are elements of the set <code>v</code> are not in the same
242    * equivalence class with characters that are not elements of the set <code>v</code>.
243    *
244    * This method is equivalent to <code>makeClass(v)</code>
245    *
246    * @param v a Vector of Interval objects.
247    * This Vector represents a set of characters. The set of characters is
248    * the union of all intervalls in the Vector.
249    *
250    * @param caseless if true upper/lower/title case are considered equivalent
251    */

252   public void makeClassNot(Vector v, boolean caseless) {
253     makeClass(new IntCharSet(v), caseless);
254   }
255
256
257   /**
258    * Returns an array that contains the character class codes of all characters
259    * in the specified set of input characters.
260    */

261   private int [] getClassCodes(IntCharSet set, boolean negate) {
262
263     if (DEBUG) {
264       Out.dump("getting class codes for "+set);
265       if (negate)
266         Out.dump("[negated]");
267     }
268
269     int size = classes.size();
270
271     // [fixme: optimize]
272
int temp [] = new int [size];
273     int length = 0;
274
275     for (int i = 0; i < size; i++) {
276       IntCharSet x = (IntCharSet) classes.elementAt(i);
277       if ( negate ) {
278         if ( !set.and(x).containsElements() ) {
279           temp[length++] = i;
280           if (DEBUG) Out.dump("code "+i);
281         }
282       }
283       else {
284         if ( set.and(x).containsElements() ) {
285           temp[length++] = i;
286           if (DEBUG) Out.dump("code "+i);
287         }
288       }
289     }
290
291     int result [] = new int [length];
292     System.arraycopy(temp, 0, result, 0, length);
293     
294     return result;
295   }
296
297
298   /**
299    * Returns an array that contains the character class codes of all characters
300    * in the specified set of input characters.
301    *
302    * @param intervallVec a Vector of Intervalls, the set of characters to get
303    * the class codes for
304    *
305    * @return an array with the class codes for intervallVec
306    */

307   public int [] getClassCodes(Vector /* Interval */ intervallVec) {
308     return getClassCodes(new IntCharSet(intervallVec), false);
309   }
310
311
312   /**
313    * Returns an array that contains the character class codes of all characters
314    * that are <strong>not</strong> in the specified set of input characters.
315    *
316    * @param intervallVec a Vector of Intervalls, the complement of the
317    * set of characters to get the class codes for
318    *
319    * @return an array with the class codes for the complement of intervallVec
320    */

321   public int [] getNotClassCodes(Vector /* Interval */ intervallVec) {
322     return getClassCodes(new IntCharSet(intervallVec), true);
323   }
324
325
326   /**
327    * Check consistency of the stored classes [debug].
328    *
329    * all classes must be disjoint, checks if all characters
330    * have a class assigned.
331    */

332   public void check() {
333     for (int i = 0; i < classes.size(); i++)
334       for (int j = i+1; j < classes.size(); j++) {
335         IntCharSet x = (IntCharSet) classes.elementAt(i);
336         IntCharSet y = (IntCharSet) classes.elementAt(j);
337         if ( x.and(y).containsElements() ) {
338           System.out.println("Error: non disjoint char classes "+i+" and "+j);
339           System.out.println("class "+i+": "+x);
340           System.out.println("class "+j+": "+y);
341         }
342       }
343
344     // check if each character has a classcode
345
// (= if getClassCode terminates)
346
for (char c = 0; c < maxChar; c++) {
347       getClassCode(c);
348       if (c % 100 == 0) System.out.print(".");
349     }
350     
351     getClassCode(maxChar);
352   }
353
354
355   /**
356    * Returns an array of all CharClassIntervalls in this
357    * char class collection.
358    *
359    * The array is ordered by char code, i.e.
360    * <code>result[i+1].start = result[i].end+1</code>
361    *
362    * Each CharClassInterval contains the number of the
363    * char class it belongs to.
364    */

365   public CharClassInterval [] getIntervalls() {
366     int i, c;
367     int size = classes.size();
368     int numIntervalls = 0;
369
370     for (i = 0; i < size; i++)
371       numIntervalls+= ((IntCharSet) classes.elementAt(i)).numIntervalls();
372
373     CharClassInterval [] result = new CharClassInterval[numIntervalls];
374     
375     i = 0;
376     c = 0;
377     while (i < numIntervalls) {
378       int code = getClassCode((char) c);
379       IntCharSet set = (IntCharSet) classes.elementAt(code);
380       Interval iv = set.getNext();
381       
382       result[i++] = new CharClassInterval(iv.start, iv.end, code);
383       c = iv.end+1;
384     }
385
386     return result;
387   }
388 }
389
Popular Tags