KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > ui > internal > misc > StringMatcher


1 /*******************************************************************************
2  * Copyright (c) 2000, 2006 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.ui.internal.misc;
12
13 import java.util.Vector JavaDoc;
14
15 /**
16  * A string pattern matcher, suppporting "*" and "?" wildcards.
17  */

18 public class StringMatcher {
19     protected String JavaDoc fPattern;
20
21     protected int fLength; // pattern length
22

23     protected boolean fIgnoreWildCards;
24
25     protected boolean fIgnoreCase;
26
27     protected boolean fHasLeadingStar;
28
29     protected boolean fHasTrailingStar;
30
31     protected String JavaDoc fSegments[]; //the given pattern is split into * separated segments
32

33     /* boundary value beyond which we don't need to search in the text */
34     protected int fBound = 0;
35
36     protected static final char fSingleWildCard = '\u0000';
37
38     public static class Position {
39         int start; //inclusive
40

41         int end; //exclusive
42

43         public Position(int start, int end) {
44             this.start = start;
45             this.end = end;
46         }
47
48         public int getStart() {
49             return start;
50         }
51
52         public int getEnd() {
53             return end;
54         }
55     }
56
57     /**
58      * StringMatcher constructor takes in a String object that is a simple
59      * pattern which may contain '*' for 0 and many characters and
60      * '?' for exactly one character.
61      *
62      * Literal '*' and '?' characters must be escaped in the pattern
63      * e.g., "\*" means literal "*", etc.
64      *
65      * Escaping any other character (including the escape character itself),
66      * just results in that character in the pattern.
67      * e.g., "\a" means "a" and "\\" means "\"
68      *
69      * If invoking the StringMatcher with string literals in Java, don't forget
70      * escape characters are represented by "\\".
71      *
72      * @param pattern the pattern to match text against
73      * @param ignoreCase if true, case is ignored
74      * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
75      * (everything is taken literally).
76      */

77     public StringMatcher(String JavaDoc pattern, boolean ignoreCase,
78             boolean ignoreWildCards) {
79         if (pattern == null) {
80             throw new IllegalArgumentException JavaDoc();
81         }
82         fIgnoreCase = ignoreCase;
83         fIgnoreWildCards = ignoreWildCards;
84         fPattern = pattern;
85         fLength = pattern.length();
86
87         if (fIgnoreWildCards) {
88             parseNoWildCards();
89         } else {
90             parseWildCards();
91         }
92     }
93
94     /**
95      * Find the first occurrence of the pattern between <code>start</code)(inclusive)
96      * and <code>end</code>(exclusive).
97      * @param text the String object to search in
98      * @param start the starting index of the search range, inclusive
99      * @param end the ending index of the search range, exclusive
100      * @return an <code>StringMatcher.Position</code> object that keeps the starting
101      * (inclusive) and ending positions (exclusive) of the first occurrence of the
102      * pattern in the specified range of the text; return null if not found or subtext
103      * is empty (start==end). A pair of zeros is returned if pattern is empty string
104      * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
105      * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
106      */

107     public StringMatcher.Position find(String JavaDoc text, int start, int end) {
108         if (text == null) {
109             throw new IllegalArgumentException JavaDoc();
110         }
111
112         int tlen = text.length();
113         if (start < 0) {
114             start = 0;
115         }
116         if (end > tlen) {
117             end = tlen;
118         }
119         if (end < 0 || start >= end) {
120             return null;
121         }
122         if (fLength == 0) {
123             return new Position(start, start);
124         }
125         if (fIgnoreWildCards) {
126             int x = posIn(text, start, end);
127             if (x < 0) {
128                 return null;
129             }
130             return new Position(x, x + fLength);
131         }
132
133         int segCount = fSegments.length;
134         if (segCount == 0) {
135             return new Position(start, end);
136         }
137
138         int curPos = start;
139         int matchStart = -1;
140         int i;
141         for (i = 0; i < segCount && curPos < end; ++i) {
142             String JavaDoc current = fSegments[i];
143             int nextMatch = regExpPosIn(text, curPos, end, current);
144             if (nextMatch < 0) {
145                 return null;
146             }
147             if (i == 0) {
148                 matchStart = nextMatch;
149             }
150             curPos = nextMatch + current.length();
151         }
152         if (i < segCount) {
153             return null;
154         }
155         return new Position(matchStart, curPos);
156     }
157
158     /**
159      * match the given <code>text</code> with the pattern
160      * @return true if matched otherwise false
161      * @param text a String object
162      */

163     public boolean match(String JavaDoc text) {
164         if(text == null) {
165             return false;
166         }
167         return match(text, 0, text.length());
168     }
169
170     /**
171      * Given the starting (inclusive) and the ending (exclusive) positions in the
172      * <code>text</code>, determine if the given substring matches with aPattern
173      * @return true if the specified portion of the text matches the pattern
174      * @param text a String object that contains the substring to match
175      * @param start marks the starting position (inclusive) of the substring
176      * @param end marks the ending index (exclusive) of the substring
177      */

178     public boolean match(String JavaDoc text, int start, int end) {
179         if (null == text) {
180             throw new IllegalArgumentException JavaDoc();
181         }
182
183         if (start > end) {
184             return false;
185         }
186
187         if (fIgnoreWildCards) {
188             return (end - start == fLength)
189                     && fPattern.regionMatches(fIgnoreCase, 0, text, start,
190                             fLength);
191         }
192         int segCount = fSegments.length;
193         if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) {
194             return true;
195         }
196         if (start == end) {
197             return fLength == 0;
198         }
199         if (fLength == 0) {
200             return start == end;
201         }
202
203         int tlen = text.length();
204         if (start < 0) {
205             start = 0;
206         }
207         if (end > tlen) {
208             end = tlen;
209         }
210
211         int tCurPos = start;
212         int bound = end - fBound;
213         if (bound < 0) {
214             return false;
215         }
216         int i = 0;
217         String JavaDoc current = fSegments[i];
218         int segLength = current.length();
219
220         /* process first segment */
221         if (!fHasLeadingStar) {
222             if (!regExpRegionMatches(text, start, current, 0, segLength)) {
223                 return false;
224             } else {
225                 ++i;
226                 tCurPos = tCurPos + segLength;
227             }
228         }
229         if ((fSegments.length == 1) && (!fHasLeadingStar)
230                 && (!fHasTrailingStar)) {
231             // only one segment to match, no wildcards specified
232
return tCurPos == end;
233         }
234         /* process middle segments */
235         while (i < segCount) {
236             current = fSegments[i];
237             int currentMatch;
238             int k = current.indexOf(fSingleWildCard);
239             if (k < 0) {
240                 currentMatch = textPosIn(text, tCurPos, end, current);
241                 if (currentMatch < 0) {
242                     return false;
243                 }
244             } else {
245                 currentMatch = regExpPosIn(text, tCurPos, end, current);
246                 if (currentMatch < 0) {
247                     return false;
248                 }
249             }
250             tCurPos = currentMatch + current.length();
251             i++;
252         }
253
254         /* process final segment */
255         if (!fHasTrailingStar && tCurPos != end) {
256             int clen = current.length();
257             return regExpRegionMatches(text, end - clen, current, 0, clen);
258         }
259         return i == segCount;
260     }
261
262     /**
263      * This method parses the given pattern into segments seperated by wildcard '*' characters.
264      * Since wildcards are not being used in this case, the pattern consists of a single segment.
265      */

266     private void parseNoWildCards() {
267         fSegments = new String JavaDoc[1];
268         fSegments[0] = fPattern;
269         fBound = fLength;
270     }
271
272     /**
273      * Parses the given pattern into segments seperated by wildcard '*' characters.
274      * @param p, a String object that is a simple regular expression with '*' and/or '?'
275      */

276     private void parseWildCards() {
277         if (fPattern.startsWith("*")) { //$NON-NLS-1$
278
fHasLeadingStar = true;
279         }
280         if (fPattern.endsWith("*")) {//$NON-NLS-1$
281
/* make sure it's not an escaped wildcard */
282             if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
283                 fHasTrailingStar = true;
284             }
285         }
286
287         Vector JavaDoc temp = new Vector JavaDoc();
288
289         int pos = 0;
290         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
291         while (pos < fLength) {
292             char c = fPattern.charAt(pos++);
293             switch (c) {
294             case '\\':
295                 if (pos >= fLength) {
296                     buf.append(c);
297                 } else {
298                     char next = fPattern.charAt(pos++);
299                     /* if it's an escape sequence */
300                     if (next == '*' || next == '?' || next == '\\') {
301                         buf.append(next);
302                     } else {
303                         /* not an escape sequence, just insert literally */
304                         buf.append(c);
305                         buf.append(next);
306                     }
307                 }
308                 break;
309             case '*':
310                 if (buf.length() > 0) {
311                     /* new segment */
312                     temp.addElement(buf.toString());
313                     fBound += buf.length();
314                     buf.setLength(0);
315                 }
316                 break;
317             case '?':
318                 /* append special character representing single match wildcard */
319                 buf.append(fSingleWildCard);
320                 break;
321             default:
322                 buf.append(c);
323             }
324         }
325
326         /* add last buffer to segment list */
327         if (buf.length() > 0) {
328             temp.addElement(buf.toString());
329             fBound += buf.length();
330         }
331
332         fSegments = new String JavaDoc[temp.size()];
333         temp.copyInto(fSegments);
334     }
335
336     /**
337      * @param text a string which contains no wildcard
338      * @param start the starting index in the text for search, inclusive
339      * @param end the stopping point of search, exclusive
340      * @return the starting index in the text of the pattern , or -1 if not found
341      */

342     protected int posIn(String JavaDoc text, int start, int end) {//no wild card in pattern
343
int max = end - fLength;
344
345         if (!fIgnoreCase) {
346             int i = text.indexOf(fPattern, start);
347             if (i == -1 || i > max) {
348                 return -1;
349             }
350             return i;
351         }
352
353         for (int i = start; i <= max; ++i) {
354             if (text.regionMatches(true, i, fPattern, 0, fLength)) {
355                 return i;
356             }
357         }
358
359         return -1;
360     }
361
362     /**
363      * @param text a simple regular expression that may only contain '?'(s)
364      * @param start the starting index in the text for search, inclusive
365      * @param end the stopping point of search, exclusive
366      * @param p a simple regular expression that may contains '?'
367      * @return the starting index in the text of the pattern , or -1 if not found
368      */

369     protected int regExpPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
370         int plen = p.length();
371
372         int max = end - plen;
373         for (int i = start; i <= max; ++i) {
374             if (regExpRegionMatches(text, i, p, 0, plen)) {
375                 return i;
376             }
377         }
378         return -1;
379     }
380
381     /**
382      *
383      * @return boolean
384      * @param text a String to match
385      * @param start int that indicates the starting index of match, inclusive
386      * @param end</code> int that indicates the ending index of match, exclusive
387      * @param p String, String, a simple regular expression that may contain '?'
388      * @param ignoreCase boolean indicating wether code>p</code> is case sensitive
389      */

390     protected boolean regExpRegionMatches(String JavaDoc text, int tStart, String JavaDoc p,
391             int pStart, int plen) {
392         while (plen-- > 0) {
393             char tchar = text.charAt(tStart++);
394             char pchar = p.charAt(pStart++);
395
396             /* process wild cards */
397             if (!fIgnoreWildCards) {
398                 /* skip single wild cards */
399                 if (pchar == fSingleWildCard) {
400                     continue;
401                 }
402             }
403             if (pchar == tchar) {
404                 continue;
405             }
406             if (fIgnoreCase) {
407                 if (Character.toUpperCase(tchar) == Character
408                         .toUpperCase(pchar)) {
409                     continue;
410                 }
411                 // comparing after converting to upper case doesn't handle all cases;
412
// also compare after converting to lower case
413
if (Character.toLowerCase(tchar) == Character
414                         .toLowerCase(pchar)) {
415                     continue;
416                 }
417             }
418             return false;
419         }
420         return true;
421     }
422
423     /**
424      * @param text the string to match
425      * @param start the starting index in the text for search, inclusive
426      * @param end the stopping point of search, exclusive
427      * @param p a pattern string that has no wildcard
428      * @return the starting index in the text of the pattern , or -1 if not found
429      */

430     protected int textPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
431
432         int plen = p.length();
433         int max = end - plen;
434
435         if (!fIgnoreCase) {
436             int i = text.indexOf(p, start);
437             if (i == -1 || i > max) {
438                 return -1;
439             }
440             return i;
441         }
442
443         for (int i = start; i <= max; ++i) {
444             if (text.regionMatches(true, i, p, 0, plen)) {
445                 return i;
446             }
447         }
448
449         return -1;
450     }
451 }
452
Popular Tags