KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > impl > xpath > regex > REUtil


1 /*
2  * Copyright 1999-2002,2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.impl.xpath.regex;
18
19 import java.text.CharacterIterator JavaDoc;
20
21 /**
22  * @xerces.internal
23  *
24  * @version $Id: REUtil.java,v 1.9 2004/10/04 22:07:40 mrglavas Exp $
25  */

26 public final class REUtil {
27     private REUtil() {
28     }
29
30     static final int composeFromSurrogates(int high, int low) {
31         return 0x10000 + ((high-0xd800)<<10) + low-0xdc00;
32     }
33
34     static final boolean isLowSurrogate(int ch) {
35         return (ch & 0xfc00) == 0xdc00;
36     }
37
38     static final boolean isHighSurrogate(int ch) {
39         return (ch & 0xfc00) == 0xd800;
40     }
41
42     static final String JavaDoc decomposeToSurrogates(int ch) {
43         char[] chs = new char[2];
44         ch -= 0x10000;
45         chs[0] = (char)((ch>>10)+0xd800);
46         chs[1] = (char)((ch&0x3ff)+0xdc00);
47         return new String JavaDoc(chs);
48     }
49
50     static final String JavaDoc substring(CharacterIterator JavaDoc iterator, int begin, int end) {
51         char[] src = new char[end-begin];
52         for (int i = 0; i < src.length; i ++)
53             src[i] = iterator.setIndex(i+begin);
54         return new String JavaDoc(src);
55     }
56
57     // ================================================================
58

59     static final int getOptionValue(int ch) {
60         int ret = 0;
61         switch (ch) {
62           case 'i':
63             ret = RegularExpression.IGNORE_CASE;
64             break;
65           case 'm':
66             ret = RegularExpression.MULTIPLE_LINES;
67             break;
68           case 's':
69             ret = RegularExpression.SINGLE_LINE;
70             break;
71           case 'x':
72             ret = RegularExpression.EXTENDED_COMMENT;
73             break;
74           case 'u':
75             ret = RegularExpression.USE_UNICODE_CATEGORY;
76             break;
77           case 'w':
78             ret = RegularExpression.UNICODE_WORD_BOUNDARY;
79             break;
80           case 'F':
81             ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION;
82             break;
83           case 'H':
84             ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
85             break;
86           case 'X':
87             ret = RegularExpression.XMLSCHEMA_MODE;
88             break;
89           case ',':
90             ret = RegularExpression.SPECIAL_COMMA;
91             break;
92           default:
93         }
94         return ret;
95     }
96
97     static final int parseOptions(String JavaDoc opts) throws ParseException {
98         if (opts == null) return 0;
99         int options = 0;
100         for (int i = 0; i < opts.length(); i ++) {
101             int v = getOptionValue(opts.charAt(i));
102             if (v == 0)
103                 throw new ParseException("Unknown Option: "+opts.substring(i), -1);
104             options |= v;
105         }
106         return options;
107     }
108
109     static final String JavaDoc createOptionString(int options) {
110         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(9);
111         if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
112             sb.append((char)'F');
113         if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
114             sb.append((char)'H');
115         if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
116             sb.append((char)'X');
117         if ((options & RegularExpression.IGNORE_CASE) != 0)
118             sb.append((char)'i');
119         if ((options & RegularExpression.MULTIPLE_LINES) != 0)
120             sb.append((char)'m');
121         if ((options & RegularExpression.SINGLE_LINE) != 0)
122             sb.append((char)'s');
123         if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
124             sb.append((char)'u');
125         if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
126             sb.append((char)'w');
127         if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
128             sb.append((char)'x');
129         if ((options & RegularExpression.SPECIAL_COMMA) != 0)
130             sb.append((char)',');
131         return sb.toString().intern();
132     }
133
134     // ================================================================
135

136     static String JavaDoc stripExtendedComment(String JavaDoc regex) {
137         int len = regex.length();
138         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(len);
139         int offset = 0;
140         while (offset < len) {
141             int ch = regex.charAt(offset++);
142                                                 // Skips a white space.
143
if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ')
144                 continue;
145
146             if (ch == '#') { // Skips chracters between '#' and a line end.
147
while (offset < len) {
148                     ch = regex.charAt(offset++);
149                     if (ch == '\r' || ch == '\n')
150                         break;
151                 }
152                 continue;
153             }
154
155             int next; // Strips an escaped white space.
156
if (ch == '\\' && offset < len) {
157                 if ((next = regex.charAt(offset)) == '#'
158                     || next == '\t' || next == '\n' || next == '\f'
159                     || next == '\r' || next == ' ') {
160                     buffer.append((char)next);
161                     offset ++;
162                 } else { // Other escaped character.
163
buffer.append((char)'\\');
164                     buffer.append((char)next);
165                     offset ++;
166                 }
167             } else // As is.
168
buffer.append((char)ch);
169         }
170         return buffer.toString();
171     }
172
173     // ================================================================
174

175     /**
176      * Sample entry.
177      * <div>Usage: <KBD>org.apache.xerces.utils.regex.REUtil &lt;regex&gt; &lt;string&gt;</KBD></div>
178      */

179     public static void main(String JavaDoc[] argv) {
180         String JavaDoc pattern = null;
181         try {
182             String JavaDoc options = "";
183             String JavaDoc target = null;
184             if( argv.length == 0 ) {
185                 System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" );
186                 System.exit( 0 );
187             }
188             for (int i = 0; i < argv.length; i ++) {
189                 if (argv[i].length() == 0 || argv[i].charAt(0) != '-') {
190                     if (pattern == null)
191                         pattern = argv[i];
192                     else if (target == null)
193                         target = argv[i];
194                     else
195                         System.err.println("Unnecessary: "+argv[i]);
196                 } else if (argv[i].equals("-i")) {
197                     options += "i";
198                 } else if (argv[i].equals("-m")) {
199                     options += "m";
200                 } else if (argv[i].equals("-s")) {
201                     options += "s";
202                 } else if (argv[i].equals("-u")) {
203                     options += "u";
204                 } else if (argv[i].equals("-w")) {
205                     options += "w";
206                 } else if (argv[i].equals("-X")) {
207                     options += "X";
208                 } else {
209                     System.err.println("Unknown option: "+argv[i]);
210                 }
211             }
212             RegularExpression reg = new RegularExpression(pattern, options);
213             System.out.println("RegularExpression: "+reg);
214             Match match = new Match();
215             reg.matches(target, match);
216             for (int i = 0; i < match.getNumberOfGroups(); i ++) {
217                 if (i == 0 ) System.out.print("Matched range for the whole pattern: ");
218                 else System.out.print("["+i+"]: ");
219                 if (match.getBeginning(i) < 0)
220                     System.out.println("-1");
221                 else {
222                     System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", ");
223                     System.out.println("\""+match.getCapturedText(i)+"\"");
224                 }
225             }
226         } catch (ParseException pe) {
227             if (pattern == null) {
228                 pe.printStackTrace();
229             } else {
230                 System.err.println("org.apache.xerces.utils.regex.ParseException: "+pe.getMessage());
231                 String JavaDoc indent = " ";
232                 System.err.println(indent+pattern);
233                 int loc = pe.getLocation();
234                 if (loc >= 0) {
235                     System.err.print(indent);
236                     for (int i = 0; i < loc; i ++) System.err.print("-");
237                     System.err.println("^");
238                 }
239             }
240         } catch (Exception JavaDoc e) {
241             e.printStackTrace();
242         }
243     }
244
245     static final int CACHESIZE = 20;
246     static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE];
247     /**
248      * Creates a RegularExpression instance.
249      * This method caches created instances.
250      *
251      * @see RegularExpression#RegularExpression(java.lang.String, java.lang.String)
252      */

253     public static RegularExpression createRegex(String JavaDoc pattern, String JavaDoc options)
254         throws ParseException {
255         RegularExpression re = null;
256         int intOptions = REUtil.parseOptions(options);
257         synchronized (REUtil.regexCache) {
258             int i;
259             for (i = 0; i < REUtil.CACHESIZE; i ++) {
260                 RegularExpression cached = REUtil.regexCache[i];
261                 if (cached == null) {
262                     i = -1;
263                     break;
264                 }
265                 if (cached.equals(pattern, intOptions)) {
266                     re = cached;
267                     break;
268                 }
269             }
270             if (re != null) {
271                 if (i != 0) {
272                     System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i);
273                     REUtil.regexCache[0] = re;
274                 }
275             } else {
276                 re = new RegularExpression(pattern, options);
277                 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1);
278                 REUtil.regexCache[0] = re;
279             }
280         }
281         return re;
282     }
283
284     /**
285      *
286      * @see RegularExpression#matches(java.lang.String)
287      */

288     public static boolean matches(String JavaDoc regex, String JavaDoc target) throws ParseException {
289         return REUtil.createRegex(regex, null).matches(target);
290     }
291
292     /**
293      *
294      * @see RegularExpression#matches(java.lang.String)
295      */

296     public static boolean matches(String JavaDoc regex, String JavaDoc options, String JavaDoc target) throws ParseException {
297         return REUtil.createRegex(regex, options).matches(target);
298     }
299
300     // ================================================================
301

302     /**
303      *
304      */

305     public static String JavaDoc quoteMeta(String JavaDoc literal) {
306         int len = literal.length();
307         StringBuffer JavaDoc buffer = null;
308         for (int i = 0; i < len; i ++) {
309             int ch = literal.charAt(i);
310             if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
311                 if (buffer == null) {
312                     buffer = new StringBuffer JavaDoc(i+(len-i)*2);
313                     if (i > 0) buffer.append(literal.substring(0, i));
314                 }
315                 buffer.append((char)'\\');
316                 buffer.append((char)ch);
317             } else if (buffer != null)
318                 buffer.append((char)ch);
319         }
320         return buffer != null ? buffer.toString() : literal;
321     }
322
323     // ================================================================
324

325     static void dumpString(String JavaDoc v) {
326         for (int i = 0; i < v.length(); i ++) {
327             System.out.print(Integer.toHexString(v.charAt(i)));
328             System.out.print(" ");
329         }
330         System.out.println();
331     }
332 }
333
Popular Tags