KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > cocoon > matching > helpers > WildcardHelper


1 /*
2  * Copyright 1999-2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.cocoon.matching.helpers;
17
18 import java.util.HashMap JavaDoc;
19
20 /**
21  * This class is an utility class that perform wilcard-patterns matching and
22  * isolation.
23  *
24  * @author <a HREF="mailto:pier@apache.org">Pierpaolo Fumagalli</a>
25  * (Apache Software Foundation)
26  * @author <a HREF="mailto:Giacomo.Pati@pwr.ch">Giacomo Pati</a>
27  * @author <a HREF="mailto:stefano@apache.org">Stefano Mazzocchi</a>
28  * @author <a HREF="mailto:bloritsch@apache.org">Berin Loritsch</a>
29  * @version CVS $Id: WildcardHelper.java 292159 2005-09-28 10:33:42Z sylvain $
30  */

31 public class WildcardHelper {
32
33     /** The int representing '*' in the pattern <code>int []</code>. */
34     protected static final int MATCH_FILE = -1;
35     /** The int representing '**' in the pattern <code>int []</code>. */
36     protected static final int MATCH_PATH = -2;
37     /** The int representing begin in the pattern <code>int []</code>. */
38     protected static final int MATCH_BEGIN = -4;
39     /** The int representing end in pattern <code>int []</code>. */
40     protected static final int MATCH_THEEND = -5;
41     /** The int value that terminates the pattern <code>int []</code>. */
42     protected static final int MATCH_END = -3;
43
44
45     /**
46      * Translate the given <code>String</code> into a <code>int []</code>
47      * representing the pattern matchable by this class.
48      * <br>
49      * This function translates a <code>String</code> into an int array
50      * converting the special '*' and '\' characters.
51      * <br>
52      * Here is how the conversion algorithm works:
53      * <ul>
54      * <li>The '*' character is converted to MATCH_FILE, meaning that zero
55      * or more characters (excluding the path separator '/') are to
56      * be matched.</li>
57      * <li>The '**' sequence is converted to MATCH_PATH, meaning that zero
58      * or more characters (including the path separator '/') are to
59      * be matched.</li>
60      * <li>The '\' character is used as an escape sequence ('\*' is
61      * translated in '*', not in MATCH_FILE). If an exact '\' character
62      * is to be matched the source string must contain a '\\'.
63      * sequence.</li>
64      * </ul>
65      * When more than two '*' characters, not separated by another character,
66      * are found their value is considered as '**' (MATCH_PATH).
67      * <br>
68      * The array is always terminated by a special value (MATCH_END).
69      * <br>
70      * All MATCH* values are less than zero, while normal characters are equal
71      * or greater.
72      *
73      * @param data The string to translate.
74      * @return The encoded string as an int array, terminated by the MATCH_END
75      * value (don't consider the array length).
76      * @exception NullPointerException If data is null.
77      */

78     public static int[] compilePattern(String JavaDoc data)
79     throws NullPointerException JavaDoc {
80
81         // Prepare the arrays
82
int expr[] = new int[data.length() + 2];
83         char buff[] = data.toCharArray();
84
85         // Prepare variables for the translation loop
86
int y = 0;
87         boolean slash = false;
88
89         // Must start from beginning
90
expr[y++] = MATCH_BEGIN;
91
92         if (buff.length > 0) {
93             if (buff[0]=='\\') {
94                 slash = true;
95             } else if (buff[0] == '*') {
96                 expr[y++] = MATCH_FILE;
97             } else {
98                 expr[y++] = buff[0];
99             }
100
101             // Main translation loop
102
for (int x = 1; x < buff.length; x++) {
103                 // If the previous char was '\' simply copy this char.
104
if (slash) {
105                     expr[y++] = buff[x];
106                     slash = false;
107                 // If the previous char was not '\' we have to do a bunch of checks
108
} else {
109                     // If this char is '\' declare that and continue
110
if (buff[x] == '\\') {
111                         slash = true;
112                     // If this char is '*' check the previous one
113
} else if (buff[x] == '*') {
114                         // If the previous character als was '*' match a path
115
if (expr[y-1] <= MATCH_FILE) {
116                             expr[y-1] = MATCH_PATH;
117                         } else {
118                             expr[y++] = MATCH_FILE;
119                         }
120                     } else {
121                         expr[y++]=buff[x];
122                     }
123                 }
124             }
125         }
126
127         // Must match end at the end
128
expr[y] = MATCH_THEEND;
129         return expr;
130     }
131
132     /**
133      * match a pattern agains a string and isolates wildcard replacement into a
134      * <code>Stack</code>.
135      */

136     public static boolean match (HashMap JavaDoc map, String JavaDoc data, int[] expr)
137     throws NullPointerException JavaDoc {
138         if (data == null) {
139             throw new NullPointerException JavaDoc ("No data provided");
140         }
141         if (expr == null) {
142             throw new NullPointerException JavaDoc ("No pattern expression provided");
143         }
144
145
146         char buff[] = data.toCharArray();
147         // Allocate the result buffer
148
char rslt[] = new char[expr.length + buff.length];
149
150
151         // The previous and current position of the expression character
152
// (MATCH_*)
153
int charpos = 0;
154
155         // The position in the expression, input, translation and result arrays
156
int exprpos = 0;
157         int buffpos = 0;
158         int rsltpos = 0;
159         int offset = -1;
160
161         // The matching count
162
int mcount = 0;
163
164         if ( map != null ) {
165             // We want the complete data be in {0}
166
map.put(Integer.toString(mcount),data);
167         }
168
169         // First check for MATCH_BEGIN
170
boolean matchBegin = false;
171         if (expr[charpos] == MATCH_BEGIN) {
172             matchBegin = true;
173             exprpos = ++charpos;
174         }
175
176         // Search the fist expression character (except MATCH_BEGIN - already skipped)
177
while (expr[charpos] >= 0)
178             charpos++;
179
180         // The expression charater (MATCH_*)
181
int exprchr = expr[charpos];
182
183         while (true) {
184             // Check if the data in the expression array before the current
185
// expression character matches the data in the input buffer
186
if (matchBegin) {
187                 if (!matchArray(expr, exprpos, charpos, buff, buffpos))
188                     return (false);
189                 matchBegin = false;
190             } else {
191                 offset = indexOfArray (expr, exprpos, charpos, buff,
192                         buffpos);
193                 if (offset < 0)
194                     return (false);
195             }
196
197             // Check for MATCH_BEGIN
198
if (matchBegin) {
199                 if (offset != 0)
200                     return (false);
201                 matchBegin = false;
202             }
203
204             // Advance buffpos
205
buffpos += (charpos - exprpos);
206
207             // Check for END's
208
if (exprchr == MATCH_END) {
209                 if (rsltpos > 0 && map != null) {
210                     map.put(Integer.toString(++mcount),new String JavaDoc(rslt, 0, rsltpos));
211                 }
212                 // Don't care about rest of input buffer
213
return (true);
214             } else if (exprchr == MATCH_THEEND) {
215                 if (rsltpos > 0 && map != null ) {
216                     map.put (Integer.toString(++mcount),new String JavaDoc(rslt, 0, rsltpos));
217                 }
218                 // Check that we reach buffer's end
219
return (buffpos == buff.length);
220             }
221
222             // Search the next expression character
223
exprpos = ++charpos;
224             while (expr[charpos] >= 0)
225                 charpos++;
226             int prevchr = exprchr;
227             exprchr = expr[charpos];
228
229             // We have here prevchr == * or **.
230
offset = (prevchr == MATCH_FILE) ?
231                     indexOfArray (expr, exprpos, charpos, buff, buffpos) :
232                     lastIndexOfArray (expr, exprpos, charpos, buff,
233                     buffpos);
234
235             if (offset < 0)
236                 return (false);
237
238             // Copy the data from the source buffer into the result buffer
239
// to substitute the expression character
240
if (prevchr == MATCH_PATH) {
241                 while (buffpos < offset)
242                     rslt[rsltpos++] = buff[buffpos++];
243             } else {
244                 // Matching file, don't copy '/'
245
while (buffpos < offset) {
246                     if (buff[buffpos] == '/')
247                         return (false);
248                     rslt[rsltpos++] = buff[buffpos++];
249                 }
250             }
251
252             if ( map != null ) {
253                 map.put(Integer.toString(++mcount),new String JavaDoc (rslt, 0, rsltpos));
254             }
255             rsltpos = 0;
256         }
257     }
258
259     /**
260       * Get the offset of a part of an int array within a char array.
261       * <br>
262       * This method return the index in d of the first occurrence after dpos of
263       * that part of array specified by r, starting at rpos and terminating at
264       * rend.
265       *
266       * @param r The array containing the data that need to be matched in d.
267       * @param rpos The index of the first character in r to look for.
268       * @param rend The index of the last character in r to look for plus 1.
269       * @param d The array of char that should contain a part of r.
270       * @param dpos The starting offset in d for the matching.
271       * @return The offset in d of the part of r matched in d or -1 if that was
272       * not found.
273       */

274     protected static int indexOfArray (int r[], int rpos, int rend,
275             char d[], int dpos) {
276         // Check if pos and len are legal
277
if (rend < rpos)
278             throw new IllegalArgumentException JavaDoc ("rend < rpos");
279         // If we need to match a zero length string return current dpos
280
if (rend == rpos)
281             return (d.length); //?? dpos?
282
// If we need to match a 1 char length string do it simply
283
if ((rend - rpos) == 1) {
284             // Search for the specified character
285
for (int x = dpos; x < d.length; x++)
286                 if (r[rpos] == d[x])
287                     return (x);
288         }
289         // Main string matching loop. It gets executed if the characters to
290
// match are less then the characters left in the d buffer
291
while ((dpos + rend - rpos) <= d.length) {
292             // Set current startpoint in d
293
int y = dpos;
294             // Check every character in d for equity. If the string is matched
295
// return dpos
296
for (int x = rpos; x <= rend; x++) {
297                 if (x == rend)
298                     return (dpos);
299                 if (r[x] != d[y++])
300                     break;
301             }
302             // Increase dpos to search for the same string at next offset
303
dpos++;
304         }
305         // The remaining chars in d buffer were not enough or the string
306
// wasn't matched
307
return (-1);
308     }
309
310     /**
311       * Get the offset of a last occurance of an int array within a char array.
312       * <br>
313       * This method return the index in d of the last occurrence after dpos of
314       * that part of array specified by r, starting at rpos and terminating at
315       * rend.
316       *
317       * @param r The array containing the data that need to be matched in d.
318       * @param rpos The index of the first character in r to look for.
319       * @param rend The index of the last character in r to look for plus 1.
320       * @param d The array of char that should contain a part of r.
321       * @param dpos The starting offset in d for the matching.
322       * @return The offset in d of the last part of r matched in d or -1 if that was
323       * not found.
324       */

325     protected static int lastIndexOfArray (int r[], int rpos, int rend,
326             char d[], int dpos) {
327         // Check if pos and len are legal
328
if (rend < rpos)
329             throw new IllegalArgumentException JavaDoc ("rend < rpos");
330         // If we need to match a zero length string return current dpos
331
if (rend == rpos)
332             return (d.length); //?? dpos?
333

334         // If we need to match a 1 char length string do it simply
335
if ((rend - rpos) == 1) {
336             // Search for the specified character
337
for (int x = d.length - 1; x > dpos; x--)
338                 if (r[rpos] == d[x])
339                     return (x);
340         }
341
342         // Main string matching loop. It gets executed if the characters to
343
// match are less then the characters left in the d buffer
344
int l = d.length - (rend - rpos);
345         while (l >= dpos) {
346             // Set current startpoint in d
347
int y = l;
348             // Check every character in d for equity. If the string is matched
349
// return dpos
350
for (int x = rpos; x <= rend; x++) {
351                 if (x == rend)
352                     return (l);
353                 if (r[x] != d[y++])
354                     break;
355             }
356             // Decrease l to search for the same string at next offset
357
l--;
358         }
359         // The remaining chars in d buffer were not enough or the string
360
// wasn't matched
361
return (-1);
362     }
363
364     /**
365       * Matches elements of array r from rpos to rend with array d, starting from dpos.
366       * <br>
367       * This method return true if elements of array r from rpos to rend
368       * equals elements of array d starting from dpos to dpos+(rend-rpos).
369       *
370       * @param r The array containing the data that need to be matched in d.
371       * @param rpos The index of the first character in r to look for.
372       * @param d The array of char that should start from a part of r.
373       * @param dpos The starting offset in d for the matching.
374       * @return true if array d starts from portion of array r.
375       */

376     protected static boolean matchArray (int r[], int rpos, int rend,
377             char d[], int dpos) {
378         if (d.length - dpos < rend - rpos)
379             return (false);
380         for (int i = rpos; i < rend; i++)
381             if (r[i] != d[dpos++])
382                 return (false);
383         return (true);
384     }
385 }
386
Popular Tags