KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > commons > lang > CharSet


1 /*
2  * Copyright 2002-2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.commons.lang;
17
18 import java.io.Serializable JavaDoc;
19 import java.util.HashMap JavaDoc;
20 import java.util.HashSet JavaDoc;
21 import java.util.Iterator JavaDoc;
22 import java.util.Map JavaDoc;
23 import java.util.Set JavaDoc;
24
25 /**
26  * <p>A set of characters.</p>
27  *
28  * <p>Instances are immutable, but instances of subclasses may not be.</p>
29  *
30  * @author Henri Yandell
31  * @author Stephen Colebourne
32  * @author Phil Steitz
33  * @author Pete Gieser
34  * @author Gary Gregory
35  * @since 1.0
36  * @version $Id: CharSet.java 161243 2005-04-14 04:30:28Z ggregory $
37  */

38 public class CharSet implements Serializable JavaDoc {
39
40     /** Serialization lock, Lang version 2.0. */
41     private static final long serialVersionUID = 5947847346149275958L;
42
43     /**
44      * A CharSet defining no characters.
45      * @since 2.0
46      */

47     public static final CharSet EMPTY = new CharSet((String JavaDoc) null);
48
49     /**
50      * A CharSet defining ASCII alphabetic characters "a-zA-Z".
51      * @since 2.0
52      */

53     public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
54
55     /**
56      * A CharSet defining ASCII alphabetic characters "a-z".
57      * @since 2.0
58      */

59     public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
60
61     /**
62      * A CharSet defining ASCII alphabetic characters "A-Z".
63      * @since 2.0
64      */

65     public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
66
67     /**
68      * A CharSet defining ASCII alphabetic characters "0-9".
69      * @since 2.0
70      */

71     public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
72
73     /**
74      * A Map of the common cases used in the factory.
75      * Subclasses can add more common patterns if desired.
76      * @since 2.0
77      */

78     protected static final Map JavaDoc COMMON = new HashMap JavaDoc();
79     
80     static {
81         COMMON.put(null, EMPTY);
82         COMMON.put("", EMPTY);
83         COMMON.put("a-zA-Z", ASCII_ALPHA);
84         COMMON.put("A-Za-z", ASCII_ALPHA);
85         COMMON.put("a-z", ASCII_ALPHA_LOWER);
86         COMMON.put("A-Z", ASCII_ALPHA_UPPER);
87         COMMON.put("0-9", ASCII_NUMERIC);
88     }
89
90     /** The set of CharRange objects. */
91     private Set JavaDoc set = new HashSet JavaDoc();
92
93     //-----------------------------------------------------------------------
94
/**
95      * <p>Factory method to create a new CharSet using a special syntax.</p>
96      *
97      * <ul>
98      * <li><code>null</code> or empty string ("")
99      * - set containing no characters</li>
100      * <li>Single character, such as "a"
101      * - set containing just that character</li>
102      * <li>Multi character, such as "a-e"
103      * - set containing characters from one character to the other</li>
104      * <li>Negated, such as "^a" or "^a-e"
105      * - set containing all characters except those defined</li>
106      * <li>Combinations, such as "abe-g"
107      * - set containing all the characters from the individual sets</li>
108      * </ul>
109      *
110      * <p>The matching order is:</p>
111      * <ol>
112      * <li>Negated multi character range, such as "^a-e"
113      * <li>Ordinary multi character range, such as "a-e"
114      * <li>Negated single character, such as "^a"
115      * <li>Ordinary single character, such as "a"
116      * </ol>
117      * <p>Matching works left to right. Once a match is found the
118      * search starts again from the next character.</p>
119      *
120      * <p>If the same range is defined twice using the same syntax, only
121      * one range will be kept.
122      * Thus, "a-ca-c" creates only one range of "a-c".</p>
123      *
124      * <p>If the start and end of a range are in the wrong order,
125      * they are reversed. Thus "a-e" is the same as "e-a".
126      * As a result, "a-ee-a" would create only one range,
127      * as the "a-e" and "e-a" are the same.</p>
128      *
129      * <p>The set of characters represented is the union of the specified ranges.</p>
130      *
131      * <p>All CharSet objects returned by this method will be immutable.</p>
132      *
133      * @param setStr the String describing the set, may be null
134      * @return a CharSet instance
135      * @since 2.0
136      */

137     public static CharSet getInstance(String JavaDoc setStr) {
138         Object JavaDoc set = COMMON.get(setStr);
139         if (set != null) {
140             return (CharSet) set;
141         }
142         return new CharSet(setStr);
143     }
144
145     //-----------------------------------------------------------------------
146
/**
147      * <p>Constructs a new CharSet using the set syntax.</p>
148      *
149      * @param setStr the String describing the set, may be null
150      * @since 2.0
151      */

152     protected CharSet(String JavaDoc setStr) {
153         super();
154         add(setStr);
155     }
156
157     /**
158      * <p>Constructs a new CharSet using the set syntax.
159      * Each string is merged in with the set.</p>
160      *
161      * @param set Strings to merge into the initial set
162      * @throws NullPointerException if set is <code>null</code>
163      */

164     protected CharSet(String JavaDoc[] set) {
165         super();
166         int sz = set.length;
167         for (int i = 0; i < sz; i++) {
168             add(set[i]);
169         }
170     }
171
172     //-----------------------------------------------------------------------
173
/**
174      * <p>Add a set definition string to the <code>CharSet</code>.</p>
175      *
176      * @param str set definition string
177      */

178     protected void add(String JavaDoc str) {
179         if (str == null) {
180             return;
181         }
182
183         int len = str.length();
184         int pos = 0;
185         while (pos < len) {
186             int remainder = (len - pos);
187             if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
188                 // negated range
189
set.add(new CharRange(str.charAt(pos + 1), str.charAt(pos + 3), true));
190                 pos += 4;
191             } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
192                 // range
193
set.add(new CharRange(str.charAt(pos), str.charAt(pos + 2)));
194                 pos += 3;
195             } else if (remainder >= 2 && str.charAt(pos) == '^') {
196                 // negated char
197
set.add(new CharRange(str.charAt(pos + 1), true));
198                 pos += 2;
199             } else {
200                 // char
201
set.add(new CharRange(str.charAt(pos)));
202                 pos += 1;
203             }
204         }
205     }
206
207     //-----------------------------------------------------------------------
208
/**
209      * <p>Gets the internal set as an array of CharRange objects.</p>
210      *
211      * @return an array of immutable CharRange objects
212      * @since 2.0
213      */

214     public CharRange[] getCharRanges() {
215         return (CharRange[]) set.toArray(new CharRange[set.size()]);
216     }
217
218     //-----------------------------------------------------------------------
219
/**
220      * <p>Does the <code>CharSet</code> contain the specified
221      * character <code>ch</code>.</p>
222      *
223      * @param ch the character to check for
224      * @return <code>true</code> if the set contains the characters
225      */

226     public boolean contains(char ch) {
227         for (Iterator JavaDoc it = set.iterator(); it.hasNext();) {
228             CharRange range = (CharRange) it.next();
229             if (range.contains(ch)) {
230                 return true;
231             }
232         }
233         return false;
234     }
235
236     // Basics
237
//-----------------------------------------------------------------------
238
/**
239      * <p>Compares two CharSet objects, returning true if they represent
240      * exactly the same set of characters defined in the same way.</p>
241      *
242      * <p>The two sets <code>abc</code> and <code>a-c</code> are <i>not</i>
243      * equal according to this method.</p>
244      *
245      * @param obj the object to compare to
246      * @return true if equal
247      * @since 2.0
248      */

249     public boolean equals(Object JavaDoc obj) {
250         if (obj == this) {
251             return true;
252         }
253         if (obj instanceof CharSet == false) {
254             return false;
255         }
256         CharSet other = (CharSet) obj;
257         return set.equals(other.set);
258     }
259
260     /**
261      * <p>Gets a hashCode compatible with the equals method.</p>
262      *
263      * @return a suitable hashCode
264      * @since 2.0
265      */

266     public int hashCode() {
267         return 89 + set.hashCode();
268     }
269
270     /**
271      * <p>Gets a string representation of the set.</p>
272      *
273      * @return string representation of the set
274      */

275     public String JavaDoc toString() {
276         return set.toString();
277     }
278
279 }
280
Popular Tags