KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > ui > internal > ide > StringMatcher


1 /*******************************************************************************
2  * Copyright (c) 2000, 2006 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.ui.internal.ide;
12
13 import java.util.Vector JavaDoc;
14
15 /**
16  * A string pattern matcher suppporting '*' and '?' wildcards.
17  */

18 public class StringMatcher {
19     protected String JavaDoc fPattern;
20
21     protected int fLength; // pattern length
22

23     protected boolean fIgnoreWildCards;
24
25     protected boolean fIgnoreCase;
26
27     protected boolean fHasLeadingStar;
28
29     protected boolean fHasTrailingStar;
30
31     protected String JavaDoc fSegments[]; //the given pattern is split into * separated segments
32

33     /* boundary value beyond which we don't need to search in the text */
34     protected int fBound = 0;
35
36     protected static final char fSingleWildCard = '\u0000';
37
38     public static class Position {
39         int start; //inclusive
40

41         int end; //exclusive
42

43         public Position(int start, int end) {
44             this.start = start;
45             this.end = end;
46         }
47
48         public int getStart() {
49             return start;
50         }
51
52         public int getEnd() {
53             return end;
54         }
55     }
56
57     /**
58      * StringMatcher constructor takes in a String object that is a simple
59      * pattern which may contain &#39*&#39 for 0 and many characters and
60      * '?' for exactly one character.
61      *
62      * Literal '*' and '*' characters must be escaped in the pattern
63      * e.g. "\*" means literal "*", etc.
64      *
65      * Escaping any other character (including the escape character itself),
66      * just results in that character in the pattern.
67      * e.g. "\a" means "a" and "\\" means "\"
68      *
69      * If invoking the StringMatcher with string literals in Java, don't forget
70      * escape characters are represented by "\\".
71      *
72      * @param pattern the pattern to match text against
73      * @param ignoreCase if true, case is ignored
74      * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
75      * (everything is taken literally).
76      */

77     public StringMatcher(String JavaDoc pattern, boolean ignoreCase,
78             boolean ignoreWildCards) {
79         if (pattern == null) {
80             throw new IllegalArgumentException JavaDoc();
81         }
82         fIgnoreCase = ignoreCase;
83         fIgnoreWildCards = ignoreWildCards;
84         fPattern = pattern;
85         fLength = pattern.length();
86
87         if (fIgnoreWildCards) {
88             parseNoWildCards();
89         } else {
90             parseWildCards();
91         }
92     }
93
94     /**
95      * Find the first occurrence of the pattern between <code>start</code)(inclusive)
96      * and <code>end</code>(exclusive).
97      * @param text the String object to search in
98      * @param start the starting index of the search range, inclusive
99      * @param end the ending index of the search range, exclusive
100      * @return an <code>StringMatcher.Position</code> object that keeps the starting
101      * (inclusive) and ending positions (exclusive) of the first occurrence of the
102      * pattern in the specified range of the text; return null if not found or subtext
103      * is empty (start==end). A pair of zeros is returned if pattern is empty string
104      * Note that for pattern like &quot;*abc*&quot; with leading and trailing stars, position of &quot;abc&quot;
105      * is returned. For a pattern like&quot;*&#63;&#63;*&quot; in text &quot;abcdf&quot;, (1,3) is returned
106      */

107     public StringMatcher.Position find(String JavaDoc text, int start, int end) {
108         if (text == null) {
109             throw new IllegalArgumentException JavaDoc();
110         }
111
112         int tlen = text.length();
113         if (start < 0) {
114             start = 0;
115         }
116         if (end > tlen) {
117             end = tlen;
118         }
119         if (end < 0 || start >= end) {
120             return null;
121         }
122         if (fLength == 0) {
123             return new Position(start, start);
124         }
125         if (fIgnoreWildCards) {
126             int x = posIn(text, start, end);
127             if (x < 0) {
128                 return null;
129             }
130             return new Position(x, x + fLength);
131         }
132
133         int segCount = fSegments.length;
134         if (segCount == 0) {
135             return new Position(start, end);
136         }
137
138         int curPos = start;
139         int matchStart = -1;
140         int i;
141         for (i = 0; i < segCount && curPos < end; ++i) {
142             String JavaDoc current = fSegments[i];
143             int nextMatch = regExpPosIn(text, curPos, end, current);
144             if (nextMatch < 0) {
145                 return null;
146             }
147             if (i == 0) {
148                 matchStart = nextMatch;
149             }
150             curPos = nextMatch + current.length();
151         }
152         if (i < segCount) {
153             return null;
154         }
155         return new Position(matchStart, curPos);
156     }
157
158     /**
159      * match the given <code>text</code> with the pattern
160      * @return true if matched eitherwise false
161      * @param text a String object
162      */

163     public boolean match(String JavaDoc text) {
164         return match(text, 0, text.length());
165     }
166
167     /**
168      * Given the starting (inclusive) and the ending (exclusive) positions in the
169      * <code>text</code>, determine if the given substring matches with aPattern
170      * @return true if the specified portion of the text matches the pattern
171      * @param text a <code>String</code> object that contains the substring to match
172      * @param start marks the starting position (inclusive) of the substring
173      * @param end marks the ending index (exclusive) of the substring
174      */

175     public boolean match(String JavaDoc text, int start, int end) {
176         if (null == text) {
177             throw new IllegalArgumentException JavaDoc();
178         }
179
180         if (start > end) {
181             return false;
182         }
183
184         if (fIgnoreWildCards) {
185             return (end - start == fLength)
186                     && fPattern.regionMatches(fIgnoreCase, 0, text, start,
187                             fLength);
188         }
189         int segCount = fSegments.length;
190         if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) {
191             return true;
192         }
193         if (start == end) {
194             return fLength == 0;
195         }
196         if (fLength == 0) {
197             return start == end;
198         }
199
200         int tlen = text.length();
201         if (start < 0) {
202             start = 0;
203         }
204         if (end > tlen) {
205             end = tlen;
206         }
207
208         int tCurPos = start;
209         int bound = end - fBound;
210         if (bound < 0) {
211             return false;
212         }
213         int i = 0;
214         String JavaDoc current = fSegments[i];
215         int segLength = current.length();
216
217         /* process first segment */
218         if (!fHasLeadingStar) {
219             if (!regExpRegionMatches(text, start, current, 0, segLength)) {
220                 return false;
221             } else {
222                 ++i;
223                 tCurPos = tCurPos + segLength;
224             }
225         }
226         if ((fSegments.length == 1) && (!fHasLeadingStar)
227                 && (!fHasTrailingStar)) {
228             // only one segment to match, no wildcards specified
229
return tCurPos == end;
230         }
231         /* process middle segments */
232         while (i < segCount) {
233             current = fSegments[i];
234             int currentMatch;
235             int k = current.indexOf(fSingleWildCard);
236             if (k < 0) {
237                 currentMatch = textPosIn(text, tCurPos, end, current);
238                 if (currentMatch < 0) {
239                     return false;
240                 }
241             } else {
242                 currentMatch = regExpPosIn(text, tCurPos, end, current);
243                 if (currentMatch < 0) {
244                     return false;
245                 }
246             }
247             tCurPos = currentMatch + current.length();
248             i++;
249         }
250
251         /* process final segment */
252         if (!fHasTrailingStar && tCurPos != end) {
253             int clen = current.length();
254             return regExpRegionMatches(text, end - clen, current, 0, clen);
255         }
256         return i == segCount;
257     }
258
259     /**
260      * This method parses the given pattern into segments seperated by wildcard '*' characters.
261      * Since wildcards are not being used in this case, the pattern consists of a single segment.
262      */

263     private void parseNoWildCards() {
264         fSegments = new String JavaDoc[1];
265         fSegments[0] = fPattern;
266         fBound = fLength;
267     }
268
269     /**
270      * Parses the given pattern into segments seperated by wildcard &#39;*&#39; characters.
271      * @param p, a String object that is a simple regular expression with ?*? and/or &#39;&#63;&#39;
272      */

273     private void parseWildCards() {
274         if (fPattern.startsWith("*")) { //$NON-NLS-1$
275
fHasLeadingStar = true;
276         }
277         if (fPattern.endsWith("*")) {//$NON-NLS-1$
278
/* make sure it's not an escaped wildcard */
279             if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
280                 fHasTrailingStar = true;
281             }
282         }
283
284         Vector JavaDoc temp = new Vector JavaDoc();
285
286         int pos = 0;
287         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
288         while (pos < fLength) {
289             char c = fPattern.charAt(pos++);
290             switch (c) {
291             case '\\':
292                 if (pos >= fLength) {
293                     buf.append(c);
294                 } else {
295                     char next = fPattern.charAt(pos++);
296                     /* if it's an escape sequence */
297                     if (next == '*' || next == '?' || next == '\\') {
298                         buf.append(next);
299                     } else {
300                         /* not an escape sequence, just insert literally */
301                         buf.append(c);
302                         buf.append(next);
303                     }
304                 }
305                 break;
306             case '*':
307                 if (buf.length() > 0) {
308                     /* new segment */
309                     temp.addElement(buf.toString());
310                     fBound += buf.length();
311                     buf.setLength(0);
312                 }
313                 break;
314             case '?':
315                 /* append special character representing single match wildcard */
316                 buf.append(fSingleWildCard);
317                 break;
318             default:
319                 buf.append(c);
320             }
321         }
322
323         /* add last buffer to segment list */
324         if (buf.length() > 0) {
325             temp.addElement(buf.toString());
326             fBound += buf.length();
327         }
328
329         fSegments = new String JavaDoc[temp.size()];
330         temp.copyInto(fSegments);
331     }
332
333     /**
334      * @param text a string which contains no wildcard
335      * @param start the starting index in the text for search, inclusive
336      * @param end the stopping point of search, exclusive
337      * @return the starting index in the text of the pattern , or -1 if not found
338      */

339     protected int posIn(String JavaDoc text, int start, int end) {//no wild card in pattern
340
int max = end - fLength;
341
342         if (!fIgnoreCase) {
343             int i = text.indexOf(fPattern, start);
344             if (i == -1 || i > max) {
345                 return -1;
346             }
347             return i;
348         }
349
350         for (int i = start; i <= max; ++i) {
351             if (text.regionMatches(true, i, fPattern, 0, fLength)) {
352                 return i;
353             }
354         }
355
356         return -1;
357     }
358
359     /**
360      * @param text a simple regular expression that may only contain '&#63;'(s)
361      * @param start the starting index in the text for search, inclusive
362      * @param end the stopping point of search, exclusive
363      * @param p a simple regular expression that may contains '&#63;'
364      * @return the starting index in the text of the pattern , or -1 if not found
365      */

366     protected int regExpPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
367         int plen = p.length();
368
369         int max = end - plen;
370         for (int i = start; i <= max; ++i) {
371             if (regExpRegionMatches(text, i, p, 0, plen)) {
372                 return i;
373             }
374         }
375         return -1;
376     }
377
378     /**
379      *
380      * @return boolean
381      * @param text a String to match
382      * @param tStart int that indicates the starting index of match, inclusive
383      * @param p String, String, a simple regular expression that may contain '&#63;'
384      * @param pStart
385      * @param plen
386      */

387     protected boolean regExpRegionMatches(String JavaDoc text, int tStart, String JavaDoc p,
388             int pStart, int plen) {
389         while (plen-- > 0) {
390             char tchar = text.charAt(tStart++);
391             char pchar = p.charAt(pStart++);
392
393             /* process wild cards */
394             if (!fIgnoreWildCards) {
395                 /* skip single wild cards */
396                 if (pchar == fSingleWildCard) {
397                     continue;
398                 }
399             }
400             if (pchar == tchar) {
401                 continue;
402             }
403             if (fIgnoreCase) {
404                 if (Character.toUpperCase(tchar) == Character
405                         .toUpperCase(pchar)) {
406                     continue;
407                 }
408                 // comparing after converting to upper case doesn't handle all cases;
409
// also compare after converting to lower case
410
if (Character.toLowerCase(tchar) == Character
411                         .toLowerCase(pchar)) {
412                     continue;
413                 }
414             }
415             return false;
416         }
417         return true;
418     }
419
420     /**
421      * @param text the string to match
422      * @param start the starting index in the text for search, inclusive
423      * @param end the stopping point of search, exclusive
424      * @param p a string that has no wildcard
425      * @return the starting index in the text of the pattern , or -1 if not found
426      */

427     protected int textPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
428
429         int plen = p.length();
430         int max = end - plen;
431
432         if (!fIgnoreCase) {
433             int i = text.indexOf(p, start);
434             if (i == -1 || i > max) {
435                 return -1;
436             }
437             return i;
438         }
439
440         for (int i = start; i <= max; ++i) {
441             if (text.regionMatches(true, i, p, 0, plen)) {
442                 return i;
443             }
444         }
445
446         return -1;
447     }
448 }
449
Popular Tags