KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xerces > internal > impl > xpath > regex > REUtil


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package com.sun.org.apache.xerces.internal.impl.xpath.regex;
59
60 import java.text.CharacterIterator JavaDoc;
61
62 /**
63  * @version $Id: REUtil.java,v 1.7 2002/11/20 00:49:47 twl Exp $
64  */

65
66 public final class REUtil {
67     private REUtil() {
68     }
69
70     static final int composeFromSurrogates(int high, int low) {
71         return 0x10000 + ((high-0xd800)<<10) + low-0xdc00;
72     }
73
74     static final boolean isLowSurrogate(int ch) {
75         return (ch & 0xfc00) == 0xdc00;
76     }
77
78     static final boolean isHighSurrogate(int ch) {
79         return (ch & 0xfc00) == 0xd800;
80     }
81
82     static final String JavaDoc decomposeToSurrogates(int ch) {
83         char[] chs = new char[2];
84         ch -= 0x10000;
85         chs[0] = (char)((ch>>10)+0xd800);
86         chs[1] = (char)((ch&0x3ff)+0xdc00);
87         return new String JavaDoc(chs);
88     }
89
90     static final String JavaDoc substring(CharacterIterator JavaDoc iterator, int begin, int end) {
91         char[] src = new char[end-begin];
92         for (int i = 0; i < src.length; i ++)
93             src[i] = iterator.setIndex(i+begin);
94         return new String JavaDoc(src);
95     }
96
97     // ================================================================
98

99     static final int getOptionValue(int ch) {
100         int ret = 0;
101         switch (ch) {
102           case 'i':
103             ret = RegularExpression.IGNORE_CASE;
104             break;
105           case 'm':
106             ret = RegularExpression.MULTIPLE_LINES;
107             break;
108           case 's':
109             ret = RegularExpression.SINGLE_LINE;
110             break;
111           case 'x':
112             ret = RegularExpression.EXTENDED_COMMENT;
113             break;
114           case 'u':
115             ret = RegularExpression.USE_UNICODE_CATEGORY;
116             break;
117           case 'w':
118             ret = RegularExpression.UNICODE_WORD_BOUNDARY;
119             break;
120           case 'F':
121             ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION;
122             break;
123           case 'H':
124             ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
125             break;
126           case 'X':
127             ret = RegularExpression.XMLSCHEMA_MODE;
128             break;
129           case ',':
130             ret = RegularExpression.SPECIAL_COMMA;
131             break;
132           default:
133         }
134         return ret;
135     }
136
137     static final int parseOptions(String JavaDoc opts) throws ParseException {
138         if (opts == null) return 0;
139         int options = 0;
140         for (int i = 0; i < opts.length(); i ++) {
141             int v = getOptionValue(opts.charAt(i));
142             if (v == 0)
143                 throw new ParseException("Unknown Option: "+opts.substring(i), -1);
144             options |= v;
145         }
146         return options;
147     }
148
149     static final String JavaDoc createOptionString(int options) {
150         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(9);
151         if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
152             sb.append((char)'F');
153         if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
154             sb.append((char)'H');
155         if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
156             sb.append((char)'X');
157         if ((options & RegularExpression.IGNORE_CASE) != 0)
158             sb.append((char)'i');
159         if ((options & RegularExpression.MULTIPLE_LINES) != 0)
160             sb.append((char)'m');
161         if ((options & RegularExpression.SINGLE_LINE) != 0)
162             sb.append((char)'s');
163         if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
164             sb.append((char)'u');
165         if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
166             sb.append((char)'w');
167         if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
168             sb.append((char)'x');
169         if ((options & RegularExpression.SPECIAL_COMMA) != 0)
170             sb.append((char)',');
171         return sb.toString().intern();
172     }
173
174     // ================================================================
175

176     static String JavaDoc stripExtendedComment(String JavaDoc regex) {
177         int len = regex.length();
178         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(len);
179         int offset = 0;
180         while (offset < len) {
181             int ch = regex.charAt(offset++);
182                                                 // Skips a white space.
183
if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ')
184                 continue;
185
186             if (ch == '#') { // Skips chracters between '#' and a line end.
187
while (offset < len) {
188                     ch = regex.charAt(offset++);
189                     if (ch == '\r' || ch == '\n')
190                         break;
191                 }
192                 continue;
193             }
194
195             int next; // Strips an escaped white space.
196
if (ch == '\\' && offset < len) {
197                 if ((next = regex.charAt(offset)) == '#'
198                     || next == '\t' || next == '\n' || next == '\f'
199                     || next == '\r' || next == ' ') {
200                     buffer.append((char)next);
201                     offset ++;
202                 } else { // Other escaped character.
203
buffer.append((char)'\\');
204                     buffer.append((char)next);
205                     offset ++;
206                 }
207             } else // As is.
208
buffer.append((char)ch);
209         }
210         return buffer.toString();
211     }
212
213     // ================================================================
214

215     /**
216      * Sample entry.
217      * <div>Usage: <KBD>com.sun.org.apache.xerces.internal.utils.regex.REUtil &lt;regex&gt; &lt;string&gt;</KBD></div>
218      */

219     public static void main(String JavaDoc[] argv) {
220         String JavaDoc pattern = null;
221         try {
222             String JavaDoc options = "";
223             String JavaDoc target = null;
224             if( argv.length == 0 ) {
225                 System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" );
226                 System.exit( 0 );
227             }
228             for (int i = 0; i < argv.length; i ++) {
229                 if (argv[i].length() == 0 || argv[i].charAt(0) != '-') {
230                     if (pattern == null)
231                         pattern = argv[i];
232                     else if (target == null)
233                         target = argv[i];
234                     else
235                         System.err.println("Unnecessary: "+argv[i]);
236                 } else if (argv[i].equals("-i")) {
237                     options += "i";
238                 } else if (argv[i].equals("-m")) {
239                     options += "m";
240                 } else if (argv[i].equals("-s")) {
241                     options += "s";
242                 } else if (argv[i].equals("-u")) {
243                     options += "u";
244                 } else if (argv[i].equals("-w")) {
245                     options += "w";
246                 } else if (argv[i].equals("-X")) {
247                     options += "X";
248                 } else {
249                     System.err.println("Unknown option: "+argv[i]);
250                 }
251             }
252             RegularExpression reg = new RegularExpression(pattern, options);
253             System.out.println("RegularExpression: "+reg);
254             Match match = new Match();
255             reg.matches(target, match);
256             for (int i = 0; i < match.getNumberOfGroups(); i ++) {
257                 if (i == 0 ) System.out.print("Matched range for the whole pattern: ");
258                 else System.out.print("["+i+"]: ");
259                 if (match.getBeginning(i) < 0)
260                     System.out.println("-1");
261                 else {
262                     System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", ");
263                     System.out.println("\""+match.getCapturedText(i)+"\"");
264                 }
265             }
266         } catch (ParseException pe) {
267             if (pattern == null) {
268                 pe.printStackTrace();
269             } else {
270                 System.err.println("com.sun.org.apache.xerces.internal.utils.regex.ParseException: "+pe.getMessage());
271                 String JavaDoc indent = " ";
272                 System.err.println(indent+pattern);
273                 int loc = pe.getLocation();
274                 if (loc >= 0) {
275                     System.err.print(indent);
276                     for (int i = 0; i < loc; i ++) System.err.print("-");
277                     System.err.println("^");
278                 }
279             }
280         } catch (Exception JavaDoc e) {
281             e.printStackTrace();
282         }
283     }
284
285     static final int CACHESIZE = 20;
286     static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE];
287     /**
288      * Creates a RegularExpression instance.
289      * This method caches created instances.
290      *
291      * @see RegularExpression#RegularExpression(java.lang.String, java.lang.String)
292      */

293     public static RegularExpression createRegex(String JavaDoc pattern, String JavaDoc options)
294         throws ParseException {
295         RegularExpression re = null;
296         int intOptions = REUtil.parseOptions(options);
297         synchronized (REUtil.regexCache) {
298             int i;
299             for (i = 0; i < REUtil.CACHESIZE; i ++) {
300                 RegularExpression cached = REUtil.regexCache[i];
301                 if (cached == null) {
302                     i = -1;
303                     break;
304                 }
305                 if (cached.equals(pattern, intOptions)) {
306                     re = cached;
307                     break;
308                 }
309             }
310             if (re != null) {
311                 if (i != 0) {
312                     System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i);
313                     REUtil.regexCache[0] = re;
314                 }
315             } else {
316                 re = new RegularExpression(pattern, options);
317                 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1);
318                 REUtil.regexCache[0] = re;
319             }
320         }
321         return re;
322     }
323
324     /**
325      *
326      * @see RegularExpression#matches(java.lang.String)
327      */

328     public static boolean matches(String JavaDoc regex, String JavaDoc target) throws ParseException {
329         return REUtil.createRegex(regex, null).matches(target);
330     }
331
332     /**
333      *
334      * @see RegularExpression#matches(java.lang.String)
335      */

336     public static boolean matches(String JavaDoc regex, String JavaDoc options, String JavaDoc target) throws ParseException {
337         return REUtil.createRegex(regex, options).matches(target);
338     }
339
340     // ================================================================
341

342     /**
343      *
344      */

345     public static String JavaDoc quoteMeta(String JavaDoc literal) {
346         int len = literal.length();
347         StringBuffer JavaDoc buffer = null;
348         for (int i = 0; i < len; i ++) {
349             int ch = literal.charAt(i);
350             if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
351                 if (buffer == null) {
352                     buffer = new StringBuffer JavaDoc(i+(len-i)*2);
353                     if (i > 0) buffer.append(literal.substring(0, i));
354                 }
355                 buffer.append((char)'\\');
356                 buffer.append((char)ch);
357             } else if (buffer != null)
358                 buffer.append((char)ch);
359         }
360         return buffer != null ? buffer.toString() : literal;
361     }
362
363     // ================================================================
364

365     static void dumpString(String JavaDoc v) {
366         for (int i = 0; i < v.length(); i ++) {
367             System.out.print(Integer.toHexString(v.charAt(i)));
368             System.out.print(" ");
369         }
370         System.out.println();
371     }
372 }
373
Popular Tags