KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > ui > internal > navigator > StringMatcher


1 /*******************************************************************************
2  * Copyright (c) 2000, 2006 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11
12 package org.eclipse.ui.internal.navigator;
13
14 import java.util.Vector JavaDoc;
15
16 /**
17  * A string pattern matcher, suppporting "*" and "?" wildcards.
18  */

19 public class StringMatcher {
20     protected String JavaDoc fPattern;
21
22     protected int fLength; // pattern length
23

24     protected boolean fIgnoreWildCards;
25
26     protected boolean fIgnoreCase;
27
28     protected boolean fHasLeadingStar;
29
30     protected boolean fHasTrailingStar;
31
32     protected String JavaDoc fSegments[]; // the given pattern is split into *
33

34     // separated segments
35

36     /* boundary value beyond which we don't need to search in the text */
37     protected int fBound = 0;
38
39     protected static final char fSingleWildCard = '\u0000';
40
41     /**
42      *
43      */

44     static class Position {
45         int start; // inclusive
46

47         int end; // exclusive
48

49         Position(int start, int end) {
50             this.start = start;
51             this.end = end;
52         }
53
54         int getStart() {
55             return start;
56         }
57
58         int getEnd() {
59             return end;
60         }
61     }
62
63     /**
64      * StringMatcher constructor takes in a String object that is a simple
65      * pattern which may contain '*' for 0 and many characters and '?' for
66      * exactly one character.
67      *
68      * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*"
69      * means literal "*", etc.
70      *
71      * Escaping any other character (including the escape character itself),
72      * just results in that character in the pattern. e.g., "\a" means "a" and
73      * "\\" means "\"
74      *
75      * If invoking the StringMatcher with string literals in Java, don't forget
76      * escape characters are represented by "\\".
77      *
78      * @param pattern
79      * the pattern to match text against
80      * @param ignoreCase
81      * if true, case is ignored
82      * @param ignoreWildCards
83      * if true, wild cards and their escape sequences are ignored
84      * (everything is taken literally).
85      */

86     public StringMatcher(String JavaDoc pattern, boolean ignoreCase,
87             boolean ignoreWildCards) {
88         if (pattern == null) {
89             throw new IllegalArgumentException JavaDoc();
90         }
91         fIgnoreCase = ignoreCase;
92         fIgnoreWildCards = ignoreWildCards;
93         fPattern = pattern;
94         fLength = pattern.length();
95
96         if (fIgnoreWildCards) {
97             parseNoWildCards();
98         } else {
99             parseWildCards();
100         }
101     }
102
103     /**
104      * Find the first occurrence of the pattern between
105      * <code>start</code)(inclusive)
106      * and <code>end</code>(exclusive).
107      * @param text the String object to search in
108      * @param start the starting index of the search range, inclusive
109      * @param end the ending index of the search range, exclusive
110      * @return an <code>StringMatcher.Position</code> object that keeps the starting
111      * (inclusive) and ending positions (exclusive) of the first occurrence of the
112      * pattern in the specified range of the text; return null if not found or subtext
113      * is empty (start==end). A pair of zeros is returned if pattern is empty string
114      * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
115      * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
116      */

117     public StringMatcher.Position find(String JavaDoc text, int start, int end) {
118         if (text == null) {
119             throw new IllegalArgumentException JavaDoc();
120         }
121
122         int tlen = text.length();
123         if (start < 0) {
124             start = 0;
125         }
126         if (end > tlen) {
127             end = tlen;
128         }
129         if (end < 0 || start >= end) {
130             return null;
131         }
132         if (fLength == 0) {
133             return new Position(start, start);
134         }
135         if (fIgnoreWildCards) {
136             int x = posIn(text, start, end);
137             if (x < 0) {
138                 return null;
139             }
140             return new Position(x, x + fLength);
141         }
142
143         int segCount = fSegments.length;
144         if (segCount == 0) {
145             return new Position(start, end);
146         }
147
148         int curPos = start;
149         int matchStart = -1;
150         int i;
151         for (i = 0; i < segCount && curPos < end; ++i) {
152             String JavaDoc current = fSegments[i];
153             int nextMatch = regExpPosIn(text, curPos, end, current);
154             if (nextMatch < 0) {
155                 return null;
156             }
157             if (i == 0) {
158                 matchStart = nextMatch;
159             }
160             curPos = nextMatch + current.length();
161         }
162         if (i < segCount) {
163             return null;
164         }
165         return new Position(matchStart, curPos);
166     }
167
168     /**
169      * match the given <code>text</code> with the pattern
170      *
171      * @return true if matched eitherwise false
172      * @param text
173      * a String object
174      */

175     public boolean match(String JavaDoc text) {
176         if (text == null) {
177             return false;
178         }
179         return match(text, 0, text.length());
180     }
181
182     /**
183      * Given the starting (inclusive) and the ending (exclusive) positions in
184      * the <code>text</code>, determine if the given substring matches with
185      * aPattern
186      *
187      * @return true if the specified portion of the text matches the pattern
188      * @param text
189      * a String object that contains the substring to match
190      * @param start
191      * marks the starting position (inclusive) of the substring
192      * @param end
193      * marks the ending index (exclusive) of the substring
194      */

195     public boolean match(String JavaDoc text, int start, int end) {
196         if (null == text) {
197             throw new IllegalArgumentException JavaDoc();
198         }
199
200         if (start > end) {
201             return false;
202         }
203
204         if (fIgnoreWildCards) {
205             return (end - start == fLength)
206                     && fPattern.regionMatches(fIgnoreCase, 0, text, start,
207                             fLength);
208         }
209         int segCount = fSegments.length;
210         if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) {
211             // contains
212
// only
213
// '*'(s)
214
return true;
215         }
216         if (start == end) {
217             return fLength == 0;
218         }
219         if (fLength == 0) {
220             return start == end;
221         }
222
223         int tlen = text.length();
224         if (start < 0) {
225             start = 0;
226         }
227         if (end > tlen) {
228             end = tlen;
229         }
230
231         int tCurPos = start;
232         int bound = end - fBound;
233         if (bound < 0) {
234             return false;
235         }
236         int i = 0;
237         String JavaDoc current = fSegments[i];
238         int segLength = current.length();
239
240         /* process first segment */
241         if (!fHasLeadingStar) {
242             if (!regExpRegionMatches(text, start, current, 0, segLength)) {
243                 return false;
244             }
245             ++i;
246             tCurPos = tCurPos + segLength;
247
248         }
249         if ((fSegments.length == 1) && (!fHasLeadingStar)
250                 && (!fHasTrailingStar)) {
251             // only one segment to match, no wildcards specified
252
return tCurPos == end;
253         }
254         /* process middle segments */
255         while (i < segCount) {
256             current = fSegments[i];
257             int currentMatch;
258             int k = current.indexOf(fSingleWildCard);
259             if (k < 0) {
260                 currentMatch = textPosIn(text, tCurPos, end, current);
261                 if (currentMatch < 0) {
262                     return false;
263                 }
264             } else {
265                 currentMatch = regExpPosIn(text, tCurPos, end, current);
266                 if (currentMatch < 0) {
267                     return false;
268                 }
269             }
270             tCurPos = currentMatch + current.length();
271             i++;
272         }
273
274         /* process final segment */
275         if (!fHasTrailingStar && tCurPos != end) {
276             int clen = current.length();
277             return regExpRegionMatches(text, end - clen, current, 0, clen);
278         }
279         return i == segCount;
280     }
281
282     /**
283      * This method parses the given pattern into segments seperated by wildcard
284      * '*' characters. Since wildcards are not being used in this case, the
285      * pattern consists of a single segment.
286      */

287     private void parseNoWildCards() {
288         fSegments = new String JavaDoc[1];
289         fSegments[0] = fPattern;
290         fBound = fLength;
291     }
292
293     /**
294      * Parses the given pattern into segments seperated by wildcard '*'
295      * characters.
296      *
297      */

298     private void parseWildCards() {
299         if (fPattern.startsWith("*")) { //$NON-NLS-1$
300
fHasLeadingStar = true;
301         }
302         if (fPattern.endsWith("*")) {//$NON-NLS-1$
303
/* make sure it's not an escaped wildcard */
304             if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
305                 fHasTrailingStar = true;
306             }
307         }
308
309         Vector JavaDoc temp = new Vector JavaDoc();
310
311         int pos = 0;
312         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
313         while (pos < fLength) {
314             char c = fPattern.charAt(pos++);
315             switch (c) {
316             case '\\':
317                 if (pos >= fLength) {
318                     buf.append(c);
319                 } else {
320                     char next = fPattern.charAt(pos++);
321                     /* if it's an escape sequence */
322                     if (next == '*' || next == '?' || next == '\\') {
323                         buf.append(next);
324                     } else {
325                         /* not an escape sequence, just insert literally */
326                         buf.append(c);
327                         buf.append(next);
328                     }
329                 }
330                 break;
331             case '*':
332                 if (buf.length() > 0) {
333                     /* new segment */
334                     temp.addElement(buf.toString());
335                     fBound += buf.length();
336                     buf.setLength(0);
337                 }
338                 break;
339             case '?':
340                 /* append special character representing single match wildcard */
341                 buf.append(fSingleWildCard);
342                 break;
343             default:
344                 buf.append(c);
345             }
346         }
347
348         /* add last buffer to segment list */
349         if (buf.length() > 0) {
350             temp.addElement(buf.toString());
351             fBound += buf.length();
352         }
353
354         fSegments = new String JavaDoc[temp.size()];
355         temp.copyInto(fSegments);
356     }
357
358     /**
359      * @param text
360      * a string which contains no wildcard
361      * @param start
362      * the starting index in the text for search, inclusive
363      * @param end
364      * the stopping point of search, exclusive
365      * @return the starting index in the text of the pattern , or -1 if not
366      * found
367      */

368     protected int posIn(String JavaDoc text, int start, int end) {// no wild card in
369
// pattern
370
int max = end - fLength;
371
372         if (!fIgnoreCase) {
373             int i = text.indexOf(fPattern, start);
374             if (i == -1 || i > max) {
375                 return -1;
376             }
377             return i;
378         }
379
380         for (int i = start; i <= max; ++i) {
381             if (text.regionMatches(true, i, fPattern, 0, fLength)) {
382                 return i;
383             }
384         }
385
386         return -1;
387     }
388
389     /**
390      * @param text
391      * a simple regular expression that may only contain '?'(s)
392      * @param start
393      * the starting index in the text for search, inclusive
394      * @param end
395      * the stopping point of search, exclusive
396      * @param p
397      * a simple regular expression that may contains '?'
398      * @return the starting index in the text of the pattern , or -1 if not
399      * found
400      */

401     protected int regExpPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
402         int plen = p.length();
403
404         int max = end - plen;
405         for (int i = start; i <= max; ++i) {
406             if (regExpRegionMatches(text, i, p, 0, plen)) {
407                 return i;
408             }
409         }
410         return -1;
411     }
412
413     /**
414      *
415      * @return boolean
416      * @param text
417      * a String to match
418      * @param tStart
419      * indicates the starting index of match, inclusive
420      * @param p
421      * a simple regular expression that may contain '?'
422      * @param pStart
423      * @param plen
424      */

425     protected boolean regExpRegionMatches(String JavaDoc text, int tStart, String JavaDoc p,
426             int pStart, int plen) {
427         while (plen-- > 0) {
428             char tchar = text.charAt(tStart++);
429             char pchar = p.charAt(pStart++);
430
431             /* process wild cards */
432             if (!fIgnoreWildCards) {
433                 /* skip single wild cards */
434                 if (pchar == fSingleWildCard) {
435                     continue;
436                 }
437             }
438             if (pchar == tchar) {
439                 continue;
440             }
441             if (fIgnoreCase) {
442                 if (Character.toUpperCase(tchar) == Character
443                         .toUpperCase(pchar)) {
444                     continue;
445                 }
446                 // comparing after converting to upper case doesn't handle all
447
// cases;
448
// also compare after converting to lower case
449
if (Character.toLowerCase(tchar) == Character
450                         .toLowerCase(pchar)) {
451                     continue;
452                 }
453             }
454             return false;
455         }
456         return true;
457     }
458
459     /**
460      * @param text
461      * the string to match
462      * @param start
463      * the starting index in the text for search, inclusive
464      * @param end
465      * the stopping point of search, exclusive
466      * @param p
467      * a string that has no wildcard
468      * @return the starting index in the text of the pattern , or -1 if not
469      * found
470      */

471     protected int textPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
472
473         int plen = p.length();
474         int max = end - plen;
475
476         if (!fIgnoreCase) {
477             int i = text.indexOf(p, start);
478             if (i == -1 || i > max) {
479                 return -1;
480             }
481             return i;
482         }
483
484         for (int i = start; i <= max; ++i) {
485             if (text.regionMatches(true, i, p, 0, plen)) {
486                 return i;
487             }
488         }
489
490         return -1;
491     }
492 }
493
Popular Tags