KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > oro > text > GlobCompiler


1 package org.apache.oro.text;
2
3 /* ====================================================================
4  * The Apache Software License, Version 1.1
5  *
6  * Copyright (c) 2000 The Apache Software Foundation. All rights
7  * reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in
18  * the documentation and/or other materials provided with the
19  * distribution.
20  *
21  * 3. The end-user documentation included with the redistribution,
22  * if any, must include the following acknowledgment:
23  * "This product includes software developed by the
24  * Apache Software Foundation (http://www.apache.org/)."
25  * Alternately, this acknowledgment may appear in the software itself,
26  * if and wherever such third-party acknowledgments normally appear.
27  *
28  * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
29  * must not be used to endorse or promote products derived from this
30  * software without prior written permission. For written
31  * permission, please contact apache@apache.org.
32  *
33  * 5. Products derived from this software may not be called "Apache"
34  * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
35  * name, without prior written permission of the Apache Software Foundation.
36  *
37  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  * ====================================================================
50  *
51  * This software consists of voluntary contributions made by many
52  * individuals on behalf of the Apache Software Foundation. For more
53  * information on the Apache Software Foundation, please see
54  * <http://www.apache.org/>.
55  *
56  * Portions of this software are based upon software originally written
57  * by Daniel F. Savarese. We appreciate his contributions.
58  */

59
60 import org.apache.oro.text.regex.*;
61
62 /**
63  * The GlobCompiler class will compile a glob expression into a Perl5Pattern
64  * that may be used to match patterns in conjunction with Perl5Matcher.
65  * Rather than create extra GlobMatcher and GlobPattern classes tailored
66  * to the task of matching glob expressions, we have simply reused the
67  * Perl5 regular expression classes from org.apache.oro.text.regex by
68  * making GlobCompiler translate a glob expression into a Perl5 expression
69  * that is compiled by a Perl5Compiler instance internal to the GlobCompiler.
70  * <p>
71  * Because there are various similar glob expression syntaxes, GlobCompiler
72  * tries to provide a small amount of customization by providing the
73  * {@link #STAR_CANNOT_MATCH_NULL_MASK}
74  * and {@link #QUESTION_MATCHES_ZERO_OR_ONE_MASK} compilation options.
75  * <p>
76  * The GlobCompiler expression syntax is based on Unix shell glob expressions
77  * but should be usable to simulate Win32 wildcards. The following syntax
78  * is supported:
79  * <ul>
80  * <li> <b>*</b> - Matches zero or more instances of any character. If the
81  * STAR_CANNOT_MATCH_NULL_MASK option is used, <b>*</b> matches
82  * one or more instances of any character.
83  * <li> <b>?</b> - Matches one instance of any character. If the
84  * QUESTION_MATCHES_ZERO_OR_ONE_MASK option is used, <b>?</b>
85  * matches zero or one instances of any character.
86  * <li> <b>[...]</b> - Matches any of characters enclosed by the brackets.
87  * <b> * </b> and <b>?</b> lose their special meanings within a
88  * character class. Additionaly if the first character following
89  * the opening bracket is a <b>!</b> or a <b>^</b>, then any
90  * character not in the character class is matched. A <b>-</b>
91  * between two characters can be used to denote a range. A
92  * <b>-</b> at the beginning or end of the character class matches
93  * itself rather than referring to a range. A <b>]</b> immediately
94  * following the opening <b>[</b> matches itself rather than
95  * indicating the end of the character class, otherwise it must be
96  * escaped with a backslash to refer to itself.
97  * <li> <b>\</b> - A backslash matches itself in most situations. But
98  * when a special character such as a <b>*</b> follows it, a
99  * backslash <em> escapes </em> the character, indicating that
100  * the special chracter should be interpreted as a normal character
101  * instead of its special meaning.
102  * <li> All other characters match themselves.
103  * </ul>
104  * <p>
105  * Please remember that the when you construct a Java string in Java code,
106  * the backslash character is itself a special Java character, and it must
107  * be double backslashed to represent single backslash in a regular
108  * expression.
109
110  @author <a HREF="dfs@savarese.org">Daniel F. Savarese</a>
111  @version $Id: GlobCompiler.java,v 1.1.1.1 2000/07/23 23:08:49 jon Exp $
112
113  * @see org.apache.oro.text.regex.PatternCompiler
114  * @see org.apache.oro.text.regex.Perl5Matcher
115  */

116 public final class GlobCompiler implements PatternCompiler {
117   /**
118    * The default mask for the {@link #compile compile} methods.
119    * It is equal to 0. The default behavior is for a glob expression to
120    * be case sensitive unless it is compiled with the CASE_INSENSITIVE_MASK
121    * option.
122    */

123   public static final int DEFAULT_MASK = 0;
124
125   /**
126    * A mask passed as an option to the {@link #compile compile} methods
127    * to indicate a compiled glob expression should be case insensitive.
128    */

129   public static final int CASE_INSENSITIVE_MASK = 0x0001;
130
131   /**
132    * A mask passed as an option to the {@link #compile compile} methods
133    * to indicate that a * should not be allowed to match the null string.
134    * The normal behavior of the * metacharacter is that it may match any
135    * 0 or more characters. This mask causes it to match 1 or more
136    * characters of anything.
137    */

138   public static final int STAR_CANNOT_MATCH_NULL_MASK = 0x0002;
139
140   /**
141    * A mask passed as an option to the {@link #compile compile} methods
142    * to indicate that a ? should not be allowed to match the null string.
143    * The normal behavior of the ? metacharacter is that it may match any 1
144    * character. This mask causes it to match 0 or 1 characters.
145    */

146   public static final int QUESTION_MATCHES_ZERO_OR_ONE_MASK = 0x0004;
147
148   private Perl5Compiler __perl5Compiler;
149
150   private static boolean __isPerl5MetaCharacter(char ch) {
151     return (ch == '*' || ch == '?' || ch == '+' || ch == '[' || ch == ']' ||
152         ch == '(' || ch == ')' || ch == '|' || ch == '^' || ch == '$' ||
153         ch == '.' || ch == '{' || ch == '}' || ch == '\\');
154   }
155
156   private static boolean __isGlobMetaCharacter(char ch) {
157     return (ch == '*' || ch == '?' || ch == '[' || ch == ']');
158   }
159
160   /**
161    * This static method is the basic engine of the Glob PatternCompiler
162    * implementation. It takes a glob expression in the form of a character
163    * array and converts it into a String representation of a Perl5 pattern.
164    * The method is made public so that programmers may use it for their
165    * own purposes. However, the GlobCompiler compile methods work by
166    * converting the glob pattern to a Perl5 pattern using this method, and
167    * then invoking the compile() method of an internally stored Perl5Compiler
168    * instance.
169    * <p>
170    * @param pattern A character array representation of a Glob pattern.
171    * @return A String representation of a Perl5 pattern equivalent to the
172    * Glob pattern.
173    */

174   public static String JavaDoc globToPerl5(char[] pattern, int options) {
175     boolean inCharSet, starCannotMatchNull = false, questionMatchesZero;
176     int ch;
177     StringBuffer JavaDoc buffer;
178
179     buffer = new StringBuffer JavaDoc(2*pattern.length);
180     inCharSet = false;
181
182     questionMatchesZero = ((options & QUESTION_MATCHES_ZERO_OR_ONE_MASK) != 0);
183     starCannotMatchNull = ((options & STAR_CANNOT_MATCH_NULL_MASK) != 0);
184
185     for(ch=0; ch < pattern.length; ch++) {
186       switch(pattern[ch]) {
187       case '*':
188     if(inCharSet)
189       buffer.append('*');
190     else {
191       if(starCannotMatchNull)
192           buffer.append(".+");
193       else
194         buffer.append(".*");
195     }
196     break;
197       case '?':
198     if(inCharSet)
199       buffer.append('?');
200     else {
201       if(questionMatchesZero)
202         buffer.append(".?");
203       else
204         buffer.append('.');
205     }
206     break;
207       case '[':
208     inCharSet = true;
209     buffer.append(pattern[ch]);
210
211     if(ch + 1 < pattern.length) {
212       switch(pattern[ch + 1]) {
213       case '!':
214       case '^':
215         buffer.append('^');
216         ++ch;
217         continue;
218       case ']':
219         buffer.append(']');
220         ++ch;
221         continue;
222       }
223     }
224     break;
225       case ']':
226     inCharSet = false;
227     buffer.append(pattern[ch]);
228     break;
229       case '\\':
230     buffer.append('\\');
231     if(ch == pattern.length - 1) {
232       buffer.append('\\');
233     } else if(__isGlobMetaCharacter(pattern[ch + 1]))
234       buffer.append(pattern[++ch]);
235     else
236       buffer.append('\\');
237     break;
238       default:
239     if(!inCharSet && __isPerl5MetaCharacter(pattern[ch]))
240       buffer.append('\\');
241     buffer.append(pattern[ch]);
242     break;
243       }
244     }
245
246     return buffer.toString();
247   }
248
249
250   /**
251    * The default GlobCompiler constructor. It initializes an internal
252    * Perl5Compiler instance to compile translated glob expressions.
253    */

254   public GlobCompiler() {
255     __perl5Compiler = new Perl5Compiler();
256   }
257
258
259   /**
260    * Compiles a Glob expression into a Perl5Pattern instance that
261    * can be used by a Perl5Matcher object to perform pattern matching.
262    * <p>
263    * @param pattern A Glob expression to compile.
264    * @param options A set of flags giving the compiler instructions on
265    * how to treat the glob expression. The flags
266    * are a logical OR of any number of the 3 <b>MASK</b>
267    * constants. For example:
268    * <pre>
269    * regex =
270    * compiler.compile(pattern, GlobCompiler.
271    * CASE_INSENSITIVE_MASK |
272    * GlobCompiler.STAR_CANNOT_MATCH_NULL_MASK);
273    * </pre>
274    * This says to compile the pattern so that *
275    * cannot match the null string and to perform
276    * matches in a case insensitive manner.
277    * @return A Pattern instance constituting the compiled expression.
278    * This instance will always be a Perl5Pattern and can be reliably
279    * casted to a Perl5Pattern.
280    * @exception MalformedPatternException If the compiled expression
281    * is not a valid Glob expression.
282    */

283   public Pattern compile(char[] pattern, int options)
284        throws MalformedPatternException
285   {
286     int perlOptions = 0;
287     if((options & CASE_INSENSITIVE_MASK) != 0)
288       perlOptions |= Perl5Compiler.CASE_INSENSITIVE_MASK;
289     return __perl5Compiler.compile(globToPerl5(pattern, options), perlOptions);
290   }
291
292   /**
293    * Same as calling <b>compile(pattern, GlobCompiler.DEFAULT_MASK);</b>
294    * <p>
295    * @param pattern A regular expression to compile.
296    * @return A Pattern instance constituting the compiled regular expression.
297    * This instance will always be a Perl5Pattern and can be reliably
298    * casted to a Perl5Pattern.
299    * @exception MalformedPatternException If the compiled expression
300    * is not a valid Glob expression.
301    */

302   public Pattern compile(char[] pattern) throws MalformedPatternException {
303     return compile(pattern, DEFAULT_MASK);
304   }
305
306   /**
307    * Same as calling <b>compile(pattern, GlobCompiler.DEFAULT_MASK);</b>
308    * <p>
309    * @param pattern A regular expression to compile.
310    * @return A Pattern instance constituting the compiled regular expression.
311    * This instance will always be a Perl5Pattern and can be reliably
312    * casted to a Perl5Pattern.
313    * @exception MalformedPatternException If the compiled expression
314    * is not a valid Glob expression.
315    */

316   public Pattern compile(String JavaDoc pattern) throws MalformedPatternException {
317     return compile(pattern.toCharArray(), DEFAULT_MASK);
318   }
319
320   /**
321    * Compiles a Glob expression into a Perl5Pattern instance that
322    * can be used by a Perl5Matcher object to perform pattern matching.
323    * <p>
324    * @param pattern A Glob expression to compile.
325    * @param options A set of flags giving the compiler instructions on
326    * how to treat the glob expression. The flags
327    * are a logical OR of any number of the 3 <b>MASK</b>
328    * constants. For example:
329    * <pre>
330    * regex =
331    * compiler.compile("*.*", GlobCompiler.
332    * CASE_INSENSITIVE_MASK |
333    * GlobCompiler.STAR_CANNOT_MATCH_NULL_MASK);
334    * </pre>
335    * This says to compile the pattern so that *
336    * cannot match the null string and to perform
337    * matches in a case insensitive manner.
338    * @return A Pattern instance constituting the compiled expression.
339    * This instance will always be a Perl5Pattern and can be reliably
340    * casted to a Perl5Pattern.
341    * @exception MalformedPatternException If the compiled expression
342    * is not a valid Glob expression.
343    */

344   public Pattern compile(String JavaDoc pattern, int options)
345        throws MalformedPatternException
346   {
347     return compile(pattern.toCharArray(), options);
348   }
349
350 }
351
352
Popular Tags