KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > jdt > internal > debug > core > StringMatcher


1 /*******************************************************************************
2  * Copyright (c) 2000, 2003 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.jdt.internal.debug.core;
12
13  
14 import java.util.*;
15
16 /**
17  * A string pattern matcher, suppporting * and ? wildcards.
18  */

19 public class StringMatcher {
20     protected String JavaDoc fPattern;
21     protected int fLength; // pattern length
22
protected boolean fIgnoreWildCards;
23     protected boolean fIgnoreCase;
24     protected boolean fHasLeadingStar;
25     protected boolean fHasTrailingStar;
26     protected String JavaDoc fSegments[]; //the given pattern is split into * separated segments
27

28     /* boundary value beyond which we don't need to search in the text */
29     protected int fBound= 0;
30     
31
32     protected static final char fSingleWildCard= '\u0000';
33     
34     public static class Position {
35         int start; //inclusive
36
int end; //exclusive
37
public Position(int start, int end) {
38             this.start= start;
39             this.end= end;
40         }
41         public int getStart() {
42             return start;
43         }
44         public int getEnd() {
45             return end;
46         }
47     }
48     /**
49      * StringMatcher constructor takes in a String object that is a simple
50      * pattern which may contain '*' for 0 and many characters and
51      * '?' for exactly one character.
52      *
53      * Literal '*' and '?' characters must be escaped in the pattern
54      * e.g., "\*" means literal "*", etc.
55      *
56      * Escaping any other character (including the escape character itself),
57      * just results in that character in the pattern.
58      * e.g., "\a" means "a" and "\\" means "\"
59      *
60      * If invoking the StringMatcher with string literals in Java, don't forget
61      * escape characters are represented by "\\".
62      *
63      * @param pattern the pattern to match text against
64      * @param ignoreCase if true, case is ignored
65      * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
66      * (everything is taken literally).
67      */

68     public StringMatcher(String JavaDoc pattern, boolean ignoreCase, boolean ignoreWildCards) {
69         if (pattern == null)
70             throw new IllegalArgumentException JavaDoc();
71         fIgnoreCase= ignoreCase;
72         fIgnoreWildCards= ignoreWildCards;
73         fPattern= pattern;
74         fLength= pattern.length();
75         
76         if (fIgnoreWildCards) {
77             parseNoWildCards();
78         } else {
79             parseWildCards();
80         }
81     }
82     /**
83      * Find the first occurrence of the pattern between <code>start</code)(inclusive)
84      * and <code>end</code>(exclusive).
85      * @param <code>text</code>, the String object to search in
86      * @param <code>start</code>, the starting index of the search range, inclusive
87      * @param <code>end</code>, the ending index of the search range, exclusive
88      * @return an <code>StringMatcher.Position</code> object that keeps the starting
89      * (inclusive) and ending positions (exclusive) of the first occurrence of the
90      * pattern in the specified range of the text; return null if not found or subtext
91      * is empty (start==end). A pair of zeros is returned if pattern is empty string
92      * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
93      * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
94      */

95     public StringMatcher.Position find(String JavaDoc text, int start, int end) {
96         if (text == null)
97             throw new IllegalArgumentException JavaDoc();
98             
99         int tlen= text.length();
100         if (start < 0)
101             start= 0;
102         if (end > tlen)
103             end= tlen;
104         if (end < 0 ||start >= end )
105             return null;
106         if (fLength == 0)
107             return new Position(start, start);
108         if (fIgnoreWildCards) {
109             int x= posIn(text, start, end);
110             if (x < 0)
111                 return null;
112             return new Position(x, x+fLength);
113         }
114
115         int segCount= fSegments.length;
116         if (segCount == 0)//pattern contains only '*'(s)
117
return new Position (start, end);
118                     
119         int curPos= start;
120         int matchStart= -1;
121         int i;
122         for (i= 0; i < segCount && curPos < end; ++i) {
123             String JavaDoc current= fSegments[i];
124             int nextMatch= regExpPosIn(text, curPos, end, current);
125             if (nextMatch < 0 )
126                 return null;
127             if(i == 0)
128                 matchStart= nextMatch;
129             curPos= nextMatch + current.length();
130         }
131         if (i < segCount)
132             return null;
133         return new Position(matchStart, curPos);
134     }
135     /**
136      * match the given <code>text</code> with the pattern
137      * @return true if matched eitherwise false
138      * @param <code>text</code>, a String object
139      */

140     public boolean match(String JavaDoc text) {
141         return match(text, 0, text.length());
142     }
143     /**
144      * Given the starting (inclusive) and the ending (exclusive) positions in the
145      * <code>text</code>, determine if the given substring matches with aPattern
146      * @return true if the specified portion of the text matches the pattern
147      * @param String <code>text</code>, a String object that contains the substring to match
148      * @param int <code>start<code> marks the starting position (inclusive) of the substring
149      * @param int <code>end<code> marks the ending index (exclusive) of the substring
150      */

151     public boolean match(String JavaDoc text, int start, int end) {
152         if (null == text)
153             throw new IllegalArgumentException JavaDoc();
154
155         if (start > end)
156             return false;
157
158         if (fIgnoreWildCards)
159             return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
160         int segCount= fSegments.length;
161         if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
162
return true;
163         if (start == end)
164             return fLength == 0;
165         if (fLength == 0)
166             return start == end;
167
168         int tlen= text.length();
169         if (start < 0)
170             start= 0;
171         if (end > tlen)
172             end= tlen;
173
174         int tCurPos= start;
175         int bound= end - fBound;
176         if ( bound < 0)
177             return false;
178         int i=0;
179         String JavaDoc current= fSegments[i];
180         int segLength= current.length();
181
182         /* process first segment */
183         if (!fHasLeadingStar){
184             if(!regExpRegionMatches(text, start, current, 0, segLength)) {
185                 return false;
186             } else {
187                 ++i;
188                 tCurPos= tCurPos + segLength;
189             }
190         }
191         if ((fSegments.length == 1) && (!fHasLeadingStar) && (!fHasTrailingStar)) {
192             // only one segment to match, no wildcards specified
193
return tCurPos == end;
194         }
195         /* process middle segments */
196         while (i < segCount) {
197             current= fSegments[i];
198             int currentMatch;
199             int k= current.indexOf(fSingleWildCard);
200             if (k < 0) {
201                 currentMatch= textPosIn(text, tCurPos, end, current);
202                 if (currentMatch < 0)
203                     return false;
204             } else {
205                 currentMatch= regExpPosIn(text, tCurPos, end, current);
206                 if (currentMatch < 0)
207                     return false;
208             }
209             tCurPos= currentMatch + current.length();
210             i++;
211         }
212
213         /* process final segment */
214         if (!fHasTrailingStar && tCurPos != end) {
215             int clen= current.length();
216             return regExpRegionMatches(text, end - clen, current, 0, clen);
217         }
218         return i == segCount ;
219     }
220
221     /**
222      * This method parses the given pattern into segments seperated by wildcard '*' characters.
223      * Since wildcards are not being used in this case, the pattern consists of a single segment.
224      */

225     private void parseNoWildCards() {
226         fSegments= new String JavaDoc[1];
227         fSegments[0]= fPattern;
228         fBound= fLength;
229     }
230     /**
231      * Parses the given pattern into segments seperated by wildcard '*' characters.
232      * @param p, a String object that is a simple regular expression with ‘*’ and/or ‘?’
233      */

234     private void parseWildCards() {
235         if(fPattern.startsWith("*"))//$NON-NLS-1$
236
fHasLeadingStar= true;
237         if(fPattern.endsWith("*")) {//$NON-NLS-1$
238
/* make sure it's not an escaped wildcard */
239             if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
240                 fHasTrailingStar= true;
241             }
242         }
243
244         Vector temp= new Vector();
245
246         int pos= 0;
247         StringBuffer JavaDoc buf= new StringBuffer JavaDoc();
248         while (pos < fLength) {
249             char c= fPattern.charAt(pos++);
250             switch (c) {
251                 case '\\':
252                     if (pos >= fLength) {
253                         buf.append(c);
254                     } else {
255                         char next= fPattern.charAt(pos++);
256                         /* if it's an escape sequence */
257                         if (next == '*' || next == '?' || next == '\\') {
258                             buf.append(next);
259                         } else {
260                             /* not an escape sequence, just insert literally */
261                             buf.append(c);
262                             buf.append(next);
263                         }
264                     }
265                 break;
266                 case '*':
267                     if (buf.length() > 0) {
268                         /* new segment */
269                         temp.addElement(buf.toString());
270                         fBound += buf.length();
271                         buf.setLength(0);
272                     }
273                 break;
274                 case '?':
275                     /* append special character representing single match wildcard */
276                     buf.append(fSingleWildCard);
277                 break;
278                 default:
279                     buf.append(c);
280             }
281         }
282
283         /* add last buffer to segment list */
284         if (buf.length() > 0) {
285             temp.addElement(buf.toString());
286             fBound += buf.length();
287         }
288             
289         fSegments= new String JavaDoc[temp.size()];
290         temp.copyInto(fSegments);
291     }
292     /**
293      * @param <code>text</code>, a string which contains no wildcard
294      * @param <code>start</code>, the starting index in the text for search, inclusive
295      * @param <code>end</code>, the stopping point of search, exclusive
296      * @return the starting index in the text of the pattern , or -1 if not found
297      */

298     protected int posIn(String JavaDoc text, int start, int end) {//no wild card in pattern
299
int max= end - fLength;
300         
301         if (!fIgnoreCase) {
302             int i= text.indexOf(fPattern, start);
303             if (i == -1 || i > max)
304                 return -1;
305             return i;
306         }
307         
308         for (int i= start; i <= max; ++i) {
309             if (text.regionMatches(true, i, fPattern, 0, fLength))
310                 return i;
311         }
312         
313         return -1;
314     }
315     /**
316      * @param <code>text</code>, a simple regular expression that may only contain '?'(s)
317      * @param <code>start</code>, the starting index in the text for search, inclusive
318      * @param <code>end</code>, the stopping point of search, exclusive
319      * @param <code>p</code>, a simple regular expression that may contains '?'
320      * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
321      * @return the starting index in the text of the pattern , or -1 if not found
322      */

323     protected int regExpPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
324         int plen= p.length();
325         
326         int max= end - plen;
327         for (int i= start; i <= max; ++i) {
328             if (regExpRegionMatches(text, i, p, 0, plen))
329                 return i;
330         }
331         return -1;
332     }
333     /**
334      *
335      * @return boolean
336      * @param <code>text</code>, a String to match
337      * @param <code>start</code>, int that indicates the starting index of match, inclusive
338      * @param <code>end</code> int that indicates the ending index of match, exclusive
339      * @param <code>p</code>, String, String, a simple regular expression that may contain '?'
340      * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
341      */

342     protected boolean regExpRegionMatches(String JavaDoc text, int tStart, String JavaDoc p, int pStart, int plen) {
343         while (plen-- > 0) {
344             char tchar= text.charAt(tStart++);
345             char pchar= p.charAt(pStart++);
346
347             /* process wild cards */
348             if (!fIgnoreWildCards) {
349                 /* skip single wild cards */
350                 if (pchar == fSingleWildCard) {
351                     continue;
352                 }
353             }
354             if (pchar == tchar)
355                 continue;
356             if (fIgnoreCase) {
357                 if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar))
358                     continue;
359                 // comparing after converting to upper case doesn't handle all cases;
360
// also compare after converting to lower case
361
if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar))
362                     continue;
363             }
364             return false;
365         }
366         return true;
367     }
368     /**
369      * @param <code>text</code>, the string to match
370      * @param <code>start</code>, the starting index in the text for search, inclusive
371      * @param <code>end</code>, the stopping point of search, exclusive
372      * @param code>p</code>, a string that has no wildcard
373      * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
374      * @return the starting index in the text of the pattern , or -1 if not found
375      */

376     protected int textPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
377         
378         int plen= p.length();
379         int max= end - plen;
380         
381         if (!fIgnoreCase) {
382             int i= text.indexOf(p, start);
383             if (i == -1 || i > max)
384                 return -1;
385             return i;
386         }
387         
388         for (int i= start; i <= max; ++i) {
389             if (text.regionMatches(true, i, p, 0, plen))
390                 return i;
391         }
392         
393         return -1;
394     }
395 }
396
Popular Tags