KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > utils > regex > REUtil


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.utils.regex;
59
60
61 import java.text.CharacterIterator JavaDoc;
62
63 public final class REUtil {
64     private REUtil() {
65     }
66
67     static final int composeFromSurrogates(int high, int low) {
68         return 0x10000 + ((high-0xd800)<<10) + low-0xdc00;
69     }
70
71     static final boolean isLowSurrogate(int ch) {
72         return (ch & 0xfc00) == 0xdc00;
73     }
74
75     static final boolean isHighSurrogate(int ch) {
76         return (ch & 0xfc00) == 0xd800;
77     }
78
79     static final String JavaDoc decomposeToSurrogates(int ch) {
80         char[] chs = new char[2];
81         ch -= 0x10000;
82         chs[0] = (char)((ch>>10)+0xd800);
83         chs[1] = (char)((ch&0x3ff)+0xdc00);
84         return new String JavaDoc(chs);
85     }
86
87     static final String JavaDoc substring(CharacterIterator JavaDoc iterator, int begin, int end) {
88         char[] src = new char[end-begin];
89         for (int i = 0; i < src.length; i ++)
90             src[i] = iterator.setIndex(i+begin);
91         return new String JavaDoc(src);
92     }
93
94     // ================================================================
95

96     static final int getOptionValue(int ch) {
97         int ret = 0;
98         switch (ch) {
99           case 'i':
100             ret = RegularExpression.IGNORE_CASE;
101             break;
102           case 'm':
103             ret = RegularExpression.MULTIPLE_LINES;
104             break;
105           case 's':
106             ret = RegularExpression.SINGLE_LINE;
107             break;
108           case 'x':
109             ret = RegularExpression.EXTENDED_COMMENT;
110             break;
111           case 'u':
112             ret = RegularExpression.USE_UNICODE_CATEGORY;
113             break;
114           case 'w':
115             ret = RegularExpression.UNICODE_WORD_BOUNDARY;
116             break;
117           case 'F':
118             ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION;
119             break;
120           case 'H':
121             ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
122             break;
123           case 'X':
124             ret = RegularExpression.XMLSCHEMA_MODE;
125             break;
126           case ',':
127             ret = RegularExpression.SPECIAL_COMMA;
128             break;
129           default:
130         }
131         return ret;
132     }
133
134     static final int parseOptions(String JavaDoc opts) throws ParseException {
135         if (opts == null) return 0;
136         int options = 0;
137         for (int i = 0; i < opts.length(); i ++) {
138             int v = getOptionValue(opts.charAt(i));
139             if (v == 0)
140                 throw new ParseException("Unknown Option: "+opts.substring(i), -1);
141             options |= v;
142         }
143         return options;
144     }
145
146     static final String JavaDoc createOptionString(int options) {
147         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(9);
148         if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
149             sb.append((char)'F');
150         if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
151             sb.append((char)'H');
152         if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
153             sb.append((char)'X');
154         if ((options & RegularExpression.IGNORE_CASE) != 0)
155             sb.append((char)'i');
156         if ((options & RegularExpression.MULTIPLE_LINES) != 0)
157             sb.append((char)'m');
158         if ((options & RegularExpression.SINGLE_LINE) != 0)
159             sb.append((char)'s');
160         if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
161             sb.append((char)'u');
162         if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
163             sb.append((char)'w');
164         if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
165             sb.append((char)'x');
166         if ((options & RegularExpression.SPECIAL_COMMA) != 0)
167             sb.append((char)',');
168         return sb.toString().intern();
169     }
170
171     // ================================================================
172

173     static String JavaDoc stripExtendedComment(String JavaDoc regex) {
174         int len = regex.length();
175         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(len);
176         int offset = 0;
177         while (offset < len) {
178             int ch = regex.charAt(offset++);
179                                                 // Skips a white space.
180
if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ')
181                 continue;
182
183             if (ch == '#') { // Skips chracters between '#' and a line end.
184
while (offset < len) {
185                     ch = regex.charAt(offset++);
186                     if (ch == '\r' || ch == '\n')
187                         break;
188                 }
189                 continue;
190             }
191
192             int next; // Strips an escaped white space.
193
if (ch == '\\' && offset < len) {
194                 if ((next = regex.charAt(offset)) == '#'
195                     || next == '\t' || next == '\n' || next == '\f'
196                     || next == '\r' || next == ' ') {
197                     buffer.append((char)next);
198                     offset ++;
199                 } else { // Other escaped character.
200
buffer.append((char)'\\');
201                     buffer.append((char)next);
202                     offset ++;
203                 }
204             } else // As is.
205
buffer.append((char)ch);
206         }
207         return buffer.toString();
208     }
209
210     // ================================================================
211

212     /**
213      * Sample entry.
214      * <div>Usage: <KBD>org.enhydra.apache.xerces.utils.regex.REUtil &lt;regex&gt; &lt;string&gt;</KBD></div>
215      */

216     public static void main(String JavaDoc[] argv) {
217         String JavaDoc pattern = null;
218         try {
219             String JavaDoc options = "";
220             String JavaDoc target = null;
221             if( argv.length == 0 ) {
222                 System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" );
223                 System.exit( 0 );
224             }
225             for (int i = 0; i < argv.length; i ++) {
226                 if (argv[i].length() == 0 || argv[i].charAt(0) != '-') {
227                     if (pattern == null)
228                         pattern = argv[i];
229                     else if (target == null)
230                         target = argv[i];
231                     else
232                         System.err.println("Unnecessary: "+argv[i]);
233                 } else if (argv[i].equals("-i")) {
234                     options += "i";
235                 } else if (argv[i].equals("-m")) {
236                     options += "m";
237                 } else if (argv[i].equals("-s")) {
238                     options += "s";
239                 } else if (argv[i].equals("-u")) {
240                     options += "u";
241                 } else if (argv[i].equals("-w")) {
242                     options += "w";
243                 } else if (argv[i].equals("-X")) {
244                     options += "X";
245                 } else {
246                     System.err.println("Unknown option: "+argv[i]);
247                 }
248             }
249             RegularExpression reg = new RegularExpression(pattern, options);
250             System.out.println("RegularExpression: "+reg);
251             Match match = new Match();
252             reg.matches(target, match);
253             for (int i = 0; i < match.getNumberOfGroups(); i ++) {
254                 if (i == 0 ) System.out.print("Matched range for the whole pattern: ");
255                 else System.out.print("["+i+"]: ");
256                 if (match.getBeginning(i) < 0)
257                     System.out.println("-1");
258                 else {
259                     System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", ");
260                     System.out.println("\""+match.getCapturedText(i)+"\"");
261                 }
262             }
263         } catch (ParseException pe) {
264             if (pattern == null) {
265                 pe.printStackTrace();
266             } else {
267                 System.err.println("org.enhydra.apache.xerces.utils.regex.ParseException: "+pe.getMessage());
268                 String JavaDoc indent = " ";
269                 System.err.println(indent+pattern);
270                 int loc = pe.getLocation();
271                 if (loc >= 0) {
272                     System.err.print(indent);
273                     for (int i = 0; i < loc; i ++) System.err.print("-");
274                     System.err.println("^");
275                 }
276             }
277         } catch (Exception JavaDoc e) {
278             e.printStackTrace();
279         }
280     }
281
282     static final int CACHESIZE = 20;
283     static RegularExpression[] regexCache = new RegularExpression[CACHESIZE];
284     /**
285      * Creates a RegularExpression instance.
286      * This method caches created instances.
287      *
288      * @see org.enhydra.apache.xerces.utils.regex.RegularExpression#RegularExpression(java.lang.String, java.lang.String)
289      */

290     public static RegularExpression createRegex(String JavaDoc pattern, String JavaDoc options)
291         throws ParseException {
292         RegularExpression re = null;
293         int intOptions = REUtil.parseOptions(options);
294         synchronized (REUtil.regexCache) {
295             int i;
296             for (i = 0; i < REUtil.CACHESIZE; i ++) {
297                 re = REUtil.regexCache[i];
298                 if (re == null) {
299                     i = -1;
300                     break;
301                 }
302                 if (re.equals(pattern, intOptions))
303                     break;
304             }
305             if (re != null) {
306                 if (i != 0) {
307                     System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i);
308                     REUtil.regexCache[0] = re;
309                 }
310             } else {
311                 re = new RegularExpression(pattern, options);
312                 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1);
313                 REUtil.regexCache[0] = re;
314             }
315         }
316         return re;
317     }
318
319     /**
320      *
321      * @see org.enhydra.apache.xerces.utils.regex.RegularExpression#matches(java.lang.String)
322      */

323     public static boolean matches(String JavaDoc regex, String JavaDoc target) throws ParseException {
324         return REUtil.createRegex(regex, null).matches(target);
325     }
326
327     /**
328      *
329      * @see org.enhydra.apache.xerces.utils.regex.RegularExpression#matches(java.lang.String)
330      */

331     public static boolean matches(String JavaDoc regex, String JavaDoc options, String JavaDoc target) throws ParseException {
332         return REUtil.createRegex(regex, options).matches(target);
333     }
334
335     // ================================================================
336

337     /**
338      *
339      */

340     public static String JavaDoc quoteMeta(String JavaDoc literal) {
341         int len = literal.length();
342         StringBuffer JavaDoc buffer = null;
343         for (int i = 0; i < len; i ++) {
344             int ch = literal.charAt(i);
345             if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
346                 if (buffer == null) {
347                     buffer = new StringBuffer JavaDoc(i+(len-i)*2);
348                     if (i > 0) buffer.append(literal.substring(0, i));
349                 }
350                 buffer.append((char)'\\');
351             } else if (buffer != null)
352                 buffer.append((char)ch);
353         }
354         return buffer != null ? buffer.toString() : literal;
355     }
356
357     // ================================================================
358

359     static void dumpString(String JavaDoc v) {
360         for (int i = 0; i < v.length(); i ++) {
361             System.out.print(Integer.toHexString(v.charAt(i)));
362             System.out.print(" ");
363         }
364         System.out.println();
365     }
366 }
367
Popular Tags