KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > javolution > text > CharSet


1 /*
2  * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
3  * Copyright (C) 2006 - Javolution (http://javolution.org/)
4  * All rights reserved.
5  *
6  * Permission to use, copy, modify, and distribute this software is
7  * freely granted, provided that this notice is preserved.
8  */

9 package javolution.text;
10
11 import javolution.lang.Immutable;
12 import javolution.lang.MathLib;
13
14 /**
15  * <p> This class represents a set of characters.</p>
16  * <p> Instances of this class are typically used for parsing purpose
17  * (faster than regular expressions for simple patterns). For example:[code]
18  * // Integration with Text.
19  * Text number;
20  * int exponentIndex = num.indexOfAny(CharSet.valueOf('e', 'E'));
21  *
22  * // Integration with TextFormat.
23  * public List<Integer> parse(CharSequence csq, TextFormat.Cursor cursor) {
24  * FastTable<Integer> numbers = FastTable.newInstance();
25  * while (cursor.skip(CharSet.WHITESPACES, csq)) {
26  * numbers.add(TypeFormat.parseInt(csq, cursor));
27  * }
28  * return numbers;
29  * }
30  * [/code]
31  *
32  * @author <a HREF="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
33  * @version 3.7, January 1, 2006
34  */

35 public final class CharSet implements Immutable {
36
37     /**
38      * Represents an empty character set.
39      */

40     public static final CharSet EMPTY = new CharSet(new long[0]);
41
42     /**
43      * Represents white spaces characters according to Java
44      * (see {@link Character#isWhitespace(char)}).
45      */

46     public static final CharSet WHITESPACES = CharSet.valueOf(new char[] { 0x9,
47             0xA, 0xB, 0xC, 0xD, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x1680, 0x180E,
48             0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2008,
49             0x2009, 0x200A, 0x200B, 0x2028, 0x2029, 0x205F, 0x3000 });
50
51     /**
52      * Represents spaces characters according to Java
53      * (see {@link Character#isSpaceChar(char)}).
54      */

55     public static final CharSet SPACES = CharSet.valueOf(new char[] { 0x20,
56             0xA0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
57             0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x2028,
58             0x2029, 0x202F, 0x205F, 0x3000 });
59
60     /**
61      * Represents ISO control characters according to Java
62      * (see {@link Character#isISOControl(char)}).
63      */

64     public static final CharSet ISO_CONTROLS = CharSet.valueOf(new char[] {
65             0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC,
66             0xD, 0xE, 0xF, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
67             0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x7F, 0x80, 0x81,
68             0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C,
69             0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
70             0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F });
71
72     /**
73      * Holds the containment mapping.
74      */

75     private final long[] _mapping;
76
77     /**
78      * Creates a character set with the specified mapping.
79      *
80      * @param mapping the character set mapping.
81      */

82     private CharSet(long[] mapping) {
83         _mapping = mapping;
84     }
85
86     /**
87      * Returns the character set holding the specified characters.
88      *
89      * @param chars the characters contained by this character set.
90      * @return the corresponding character set.
91      */

92     public static CharSet valueOf(char[]/*...*/chars) {
93         int maxChar = 0;
94         for (int i = chars.length; --i >= 0;) {
95             if (chars[i] > maxChar) {
96                 maxChar = chars[i];
97             }
98         }
99         CharSet charSet = new CharSet(new long[(maxChar >> 6) + 1]);
100         for (int i = chars.length; --i >= 0;) {
101             char c = chars[i];
102             charSet._mapping[c >> 6] |= 1L << (c & 63);
103         }
104         return charSet;
105     }
106
107     /**
108      * Returns the character set holding the characters in the specified
109      * range.
110      *
111      * @param first the first character.
112      * @param last the last character.
113      * @return the corresponding character set.
114      * @throws IllegalArgumentException if <code>first > last</code>
115      */

116     public static CharSet rangeOf(char first, char last) {
117         if (first > last)
118             throw new IllegalArgumentException JavaDoc(
119                     "first should be less or equal to last");
120         CharSet charSet = new CharSet(new long[(last >> 6) + 1]);
121         for (char c = first; c <= last; c++) {
122             charSet._mapping[c >> 6] |= 1L << (c & 63);
123         }
124         return charSet;
125
126     }
127
128     /**
129      * Indicates if the specified character is contained by this character set.
130      *
131      * @param c the character to test.
132      * @return <code>true</code> if this character set contains the specified
133      * character; <code>false</code> otherwise.
134      */

135     public boolean contains(char c) {
136         final int i = c >> 6;
137         return i < _mapping.length ? (_mapping[i] & (1L << (c & 63))) != 0
138                 : false;
139     }
140
141     /**
142      * Returns the character set containing the characters from this
143      * character set plus the characters from the character set specified.
144      *
145      * @param that the set containing the characters to be added.
146      * @return <code>this + that</code>
147      */

148     public CharSet plus(CharSet that) {
149         if (that._mapping.length > this._mapping.length)
150             return that.plus(this);
151         CharSet result = this.copy();
152         for (int i = that._mapping.length; --i >= 0;) {
153             result._mapping[i] |= that._mapping[i];
154         }
155         return result;
156     }
157
158     /**
159      * Returns the character set containing the characters from this
160      * character minus the characters from the character set specified.
161      *
162      * @param that the set containing the character to be removed.
163      * @return <code>this - that</code>
164      */

165     public CharSet minus(CharSet that) {
166         CharSet result = this.copy();
167         for (int i = MathLib.min(this._mapping.length, that._mapping.length); --i >= 0;) {
168             result._mapping[i] &= ~that._mapping[i];
169         }
170         return result;
171     }
172
173     /**
174      * Returns the textual representation of this character set.
175      *
176      * @return the textual representation.
177      */

178     public String JavaDoc toString() {
179         TextBuilder tb = TextBuilder.newInstance();
180         tb.append('{');
181         int length = _mapping.length << 6;
182         for (int i = 0; i < length; i++) {
183             if (this.contains((char) i)) {
184                 if (tb.length() > 1) {
185                     tb.append(',');
186                     tb.append(' ');
187                 }
188                 tb.append('\'');
189                 tb.append((char) i);
190                 tb.append('\'');
191             }
192         }
193         tb.append('}');
194         return tb.toString();
195     }
196
197     /**
198      * Returns a copy of this character set.
199      *
200      * @return an independant copy.
201      */

202     private CharSet copy() {
203         CharSet charSet = new CharSet(new long[this._mapping.length]);
204         for (int i = _mapping.length; --i >= 0;) {
205             charSet._mapping[i] = _mapping[i];
206         }
207         return charSet;
208     }
209 }
Popular Tags