KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > ui > externaltools > internal > model > StringMatcher


1 /*******************************************************************************
2  * Copyright (c) 2000, 2004 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.ui.externaltools.internal.model;
12  
13 import java.util.Vector JavaDoc;
14
15 /**
16  * A string pattern matcher, suppporting * and ? wildcards.
17  */

18 public class StringMatcher {
19     protected String JavaDoc fPattern;
20     protected int fLength; // pattern length
21
protected boolean fIgnoreWildCards;
22     protected boolean fIgnoreCase;
23     protected boolean fHasLeadingStar;
24     protected boolean fHasTrailingStar;
25     protected String JavaDoc fSegments[]; //the given pattern is split into * separated segments
26

27     /* boundary value beyond which we don't need to search in the text */
28     protected int fBound= 0;
29     
30
31     protected static final char fSingleWildCard= '\u0000';
32     
33     public static class Position {
34         int start; //inclusive
35
int end; //exclusive
36
public Position(int start, int end) {
37             this.start= start;
38             this.end= end;
39         }
40         public int getStart() {
41             return start;
42         }
43         public int getEnd() {
44             return end;
45         }
46     }
47     /**
48      * StringMatcher constructor takes in a String object that is a simple
49      * pattern which may contain '*' for 0 and many characters and
50      * '?' for exactly one character.
51      *
52      * Literal '*' and '?' characters must be escaped in the pattern
53      * e.g., "\*" means literal "*", etc.
54      *
55      * Escaping any other character (including the escape character itself),
56      * just results in that character in the pattern.
57      * e.g., "\a" means "a" and "\\" means "\"
58      *
59      * If invoking the StringMatcher with string literals in Java, don't forget
60      * escape characters are represented by "\\".
61      *
62      * @param pattern the pattern to match text against
63      * @param ignoreCase if true, case is ignored
64      * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
65      * (everything is taken literally).
66      */

67     public StringMatcher(String JavaDoc pattern, boolean ignoreCase, boolean ignoreWildCards) {
68         if (pattern == null)
69             throw new IllegalArgumentException JavaDoc();
70         fIgnoreCase= ignoreCase;
71         fIgnoreWildCards= ignoreWildCards;
72         fPattern= pattern;
73         fLength= pattern.length();
74         
75         if (fIgnoreWildCards) {
76             parseNoWildCards();
77         } else {
78             parseWildCards();
79         }
80     }
81     /**
82      * Find the first occurrence of the pattern between <code>start</code>(inclusive)
83      * and <code>end</code>(exclusive).
84      * @param text the String object to search in
85      * @param start the starting index of the search range, inclusive
86      * @param end the ending index of the search range, exclusive
87      * @return an StringMatcher.Position</code> object that keeps the starting
88      * (inclusive) and ending positions (exclusive) of the first occurrence of the
89      * pattern in the specified range of the text; return null if not found or subtext
90      * is empty (start==end). A pair of zeros is returned if pattern is empty string
91      * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
92      * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
93      */

94     public StringMatcher.Position find(String JavaDoc text, int start, int end) {
95         if (text == null)
96             throw new IllegalArgumentException JavaDoc();
97             
98         int tlen= text.length();
99         if (start < 0)
100             start= 0;
101         if (end > tlen)
102             end= tlen;
103         if (end < 0 ||start >= end )
104             return null;
105         if (fLength == 0)
106             return new Position(start, start);
107         if (fIgnoreWildCards) {
108             int x= posIn(text, start, end);
109             if (x < 0)
110                 return null;
111             return new Position(x, x+fLength);
112         }
113
114         int segCount= fSegments.length;
115         if (segCount == 0)//pattern contains only '*'(s)
116
return new Position (start, end);
117                     
118         int curPos= start;
119         int matchStart= -1;
120         int i;
121         for (i= 0; i < segCount && curPos < end; ++i) {
122             String JavaDoc current= fSegments[i];
123             int nextMatch= regExpPosIn(text, curPos, end, current);
124             if (nextMatch < 0 )
125                 return null;
126             if(i == 0)
127                 matchStart= nextMatch;
128             curPos= nextMatch + current.length();
129         }
130         if (i < segCount)
131             return null;
132         return new Position(matchStart, curPos);
133     }
134     /**
135      * match the given <code>text</code> with the pattern
136      * @return true if matched eitherwise false
137      * @param text a String object
138      */

139     public boolean match(String JavaDoc text) {
140         return match(text, 0, text.length());
141     }
142     /**
143      * Given the starting (inclusive) and the ending (exclusive) positions in the
144      * <code>text</code>, determine if the given substring matches with aPattern
145      * @return true if the specified portion of the text matches the pattern
146      * @param text a String object that contains the substring to match
147      * @param start marks the starting position (inclusive) of the substring
148      * @param end marks the ending index (exclusive) of the substring
149      */

150     public boolean match(String JavaDoc text, int start, int end) {
151         if (null == text)
152             throw new IllegalArgumentException JavaDoc();
153
154         if (start > end)
155             return false;
156
157         if (fIgnoreWildCards)
158             return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
159         int segCount= fSegments.length;
160         if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
161
return true;
162         if (start == end)
163             return fLength == 0;
164         if (fLength == 0)
165             return start == end;
166
167         int tlen= text.length();
168         if (start < 0)
169             start= 0;
170         if (end > tlen)
171             end= tlen;
172
173         int tCurPos= start;
174         int bound= end - fBound;
175         if ( bound < 0)
176             return false;
177         int i=0;
178         String JavaDoc current= fSegments[i];
179         int segLength= current.length();
180
181         /* process first segment */
182         if (!fHasLeadingStar){
183             if(!regExpRegionMatches(text, start, current, 0, segLength)) {
184                 return false;
185             }
186             ++i;
187             tCurPos= tCurPos + segLength;
188         }
189         if ((fSegments.length == 1) && (!fHasLeadingStar) && (!fHasTrailingStar)) {
190             // only one segment to match, no wildcards specified
191
return tCurPos == end;
192         }
193         /* process middle segments */
194         while (i < segCount) {
195             current= fSegments[i];
196             int currentMatch;
197             int k= current.indexOf(fSingleWildCard);
198             if (k < 0) {
199                 currentMatch= textPosIn(text, tCurPos, end, current);
200                 if (currentMatch < 0)
201                     return false;
202             } else {
203                 currentMatch= regExpPosIn(text, tCurPos, end, current);
204                 if (currentMatch < 0)
205                     return false;
206             }
207             tCurPos= currentMatch + current.length();
208             i++;
209         }
210
211         /* process final segment */
212         if (!fHasTrailingStar && tCurPos != end) {
213             int clen= current.length();
214             return regExpRegionMatches(text, end - clen, current, 0, clen);
215         }
216         return i == segCount ;
217     }
218
219     /**
220      * This method parses the given pattern into segments seperated by wildcard '*' characters.
221      * Since wildcards are not being used in this case, the pattern consists of a single segment.
222      */

223     private void parseNoWildCards() {
224         fSegments= new String JavaDoc[1];
225         fSegments[0]= fPattern;
226         fBound= fLength;
227     }
228     /**
229      * Parses the given pattern into segments seperated by wildcard '*' characters.
230      * @param p, a String object that is a simple regular expression with '*' and/or '?'
231      */

232     private void parseWildCards() {
233         if(fPattern.startsWith("*"))//$NON-NLS-1$
234
fHasLeadingStar= true;
235         if(fPattern.endsWith("*")) {//$NON-NLS-1$
236
/* make sure it's not an escaped wildcard */
237             if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
238                 fHasTrailingStar= true;
239             }
240         }
241
242         Vector JavaDoc temp= new Vector JavaDoc();
243
244         int pos= 0;
245         StringBuffer JavaDoc buf= new StringBuffer JavaDoc();
246         while (pos < fLength) {
247             char c= fPattern.charAt(pos++);
248             switch (c) {
249                 case '\\':
250                     if (pos >= fLength) {
251                         buf.append(c);
252                     } else {
253                         char next= fPattern.charAt(pos++);
254                         /* if it's an escape sequence */
255                         if (next == '*' || next == '?' || next == '\\') {
256                             buf.append(next);
257                         } else {
258                             /* not an escape sequence, just insert literally */
259                             buf.append(c);
260                             buf.append(next);
261                         }
262                     }
263                 break;
264                 case '*':
265                     if (buf.length() > 0) {
266                         /* new segment */
267                         temp.addElement(buf.toString());
268                         fBound += buf.length();
269                         buf.setLength(0);
270                     }
271                 break;
272                 case '?':
273                     /* append special character representing single match wildcard */
274                     buf.append(fSingleWildCard);
275                 break;
276                 default:
277                     buf.append(c);
278             }
279         }
280
281         /* add last buffer to segment list */
282         if (buf.length() > 0) {
283             temp.addElement(buf.toString());
284             fBound += buf.length();
285         }
286             
287         fSegments= new String JavaDoc[temp.size()];
288         temp.copyInto(fSegments);
289     }
290     /**
291      * @param text a string which contains no wildcard
292      * @param start the starting index in the text for search, inclusive
293      * @param end the stopping point of search, exclusive
294      * @return the starting index in the text of the pattern , or -1 if not found
295      */

296     protected int posIn(String JavaDoc text, int start, int end) {//no wild card in pattern
297
int max= end - fLength;
298         
299         if (!fIgnoreCase) {
300             int i= text.indexOf(fPattern, start);
301             if (i == -1 || i > max)
302                 return -1;
303             return i;
304         }
305         
306         for (int i= start; i <= max; ++i) {
307             if (text.regionMatches(true, i, fPattern, 0, fLength))
308                 return i;
309         }
310         
311         return -1;
312     }
313     /**
314      * @param text a simple regular expression that may only contain '?'(s)
315      * @param start the starting index in the text for search, inclusive
316      * @param end the stopping point of search, exclusive
317      * @param p a simple regular expression that may contains '?'
318      * @param caseIgnored whether the pattern is not casesensitive
319      * @return the starting index in the text of the pattern , or -1 if not found
320      */

321     protected int regExpPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
322         int plen= p.length();
323         
324         int max= end - plen;
325         for (int i= start; i <= max; ++i) {
326             if (regExpRegionMatches(text, i, p, 0, plen))
327                 return i;
328         }
329         return -1;
330     }
331     /**
332      *
333      * @return boolean
334      * @param text a String to match
335      * @param start int that indicates the starting index of match, inclusive
336      * @param end int that indicates the ending index of match, exclusive
337      * @param p a simple regular expression that may contain '?'
338      * @param ignoreCase boolean indicating wether code>p</code> is case sensitive
339      */

340     protected boolean regExpRegionMatches(String JavaDoc text, int tStart, String JavaDoc p, int pStart, int plen) {
341         while (plen-- > 0) {
342             char tchar= text.charAt(tStart++);
343             char pchar= p.charAt(pStart++);
344
345             /* process wild cards */
346             if (!fIgnoreWildCards) {
347                 /* skip single wild cards */
348                 if (pchar == fSingleWildCard) {
349                     continue;
350                 }
351             }
352             if (pchar == tchar)
353                 continue;
354             if (fIgnoreCase) {
355                 if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar))
356                     continue;
357                 // comparing after converting to upper case doesn't handle all cases;
358
// also compare after converting to lower case
359
if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar))
360                     continue;
361             }
362             return false;
363         }
364         return true;
365     }
366     /**
367      * @param text the string to match
368      * @param start the starting index in the text for search, inclusive
369      * @param end the stopping point of search, exclusive
370      * @param p a string that has no wildcard
371      * @param ignoreCase boolean indicating wether code>p</code> is case sensitive
372      * @return the starting index in the text of the pattern , or -1 if not found
373      */

374     protected int textPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
375         
376         int plen= p.length();
377         int max= end - plen;
378         
379         if (!fIgnoreCase) {
380             int i= text.indexOf(p, start);
381             if (i == -1 || i > max)
382                 return -1;
383             return i;
384         }
385         
386         for (int i= start; i <= max; ++i) {
387             if (text.regionMatches(true, i, p, 0, plen))
388                 return i;
389         }
390         
391         return -1;
392     }
393 }
Popular Tags