KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > pde > internal > core > util > StringMatcher


1 /*******************************************************************************
2  * Copyright (c) 2000, 2003 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.pde.internal.core.util;
12
13 import java.util.*;
14
15 /**
16  * A string pattern matcher, suppporting * and ? wildcards.
17  */

18 public class StringMatcher {
19     protected String JavaDoc fPattern;
20     protected int fLength; // pattern length
21
protected boolean fIgnoreWildCards;
22     protected boolean fIgnoreCase;
23     protected boolean fHasLeadingStar;
24     protected boolean fHasTrailingStar;
25     protected String JavaDoc fSegments[]; //the given pattern is split into * separated segments
26

27     /* boundary value beyond which we don't need to search in the text */
28     protected int fBound= 0;
29     
30
31     protected static final char fSingleWildCard= '\u0000';
32     
33     public static class Position {
34         int start; //inclusive
35
int end; //exclusive
36
public Position(int start, int end) {
37             this.start= start;
38             this.end= end;
39         }
40         public int getStart() {
41             return start;
42         }
43         public int getEnd() {
44             return end;
45         }
46     }
47     /**
48      * StringMatcher constructor takes in a String object that is a simple
49      * pattern which may contain ‘*’ for 0 and many characters and
50      * ‘?’ for exactly one character.
51      *
52      * Literal '*' and '?' characters must be escaped in the pattern
53      * e.g., "\*" means literal "*", etc.
54      *
55      * Escaping any other character (including the escape character itself),
56      * just results in that character in the pattern.
57      * e.g., "\a" means "a" and "\\" means "\"
58      *
59      * If invoking the StringMatcher with string literals in Java, don't forget
60      * escape characters are represented by "\\".
61      *
62      * @param pattern the pattern to match text against
63      * @param ignoreCase if true, case is ignored
64      * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
65      * (everything is taken literally).
66      */

67     public StringMatcher(String JavaDoc pattern, boolean ignoreCase, boolean ignoreWildCards) {
68         fIgnoreCase= ignoreCase;
69         fIgnoreWildCards= ignoreWildCards;
70         setPattern(pattern);
71     }
72     
73     public void setPattern(String JavaDoc pattern) {
74         if (pattern == null)
75             throw new IllegalArgumentException JavaDoc();
76
77         fPattern= pattern;
78         fLength= pattern.length();
79         fHasLeadingStar = false;
80         fHasTrailingStar = false;
81         fBound= 0;
82         fSegments = null;
83         
84         if (fIgnoreWildCards) {
85             parseNoWildCards();
86         } else {
87             parseWildCards();
88         }
89     }
90     /**
91      * Find the first occurrence of the pattern between <code>start</code)(inclusive)
92      * and <code>end</code>(exclusive).
93      * @param <code>text</code>, the String object to search in
94      * @param <code>start</code>, the starting index of the search range, inclusive
95      * @param <code>end</code>, the ending index of the search range, exclusive
96      * @return an <code>StringMatcher.Position</code> object that keeps the starting
97      * (inclusive) and ending positions (exclusive) of the first occurrence of the
98      * pattern in the specified range of the text; return null if not found or subtext
99      * is empty (start==end). A pair of zeros is returned if pattern is empty string
100      * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
101      * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
102      */

103     public StringMatcher.Position find(String JavaDoc text, int start, int end) {
104         if (text == null)
105             throw new IllegalArgumentException JavaDoc();
106             
107         int tlen= text.length();
108         if (start < 0)
109             start= 0;
110         if (end > tlen)
111             end= tlen;
112         if (end < 0 ||start >= end )
113             return null;
114         if (fLength == 0)
115             return new Position(start, start);
116         if (fIgnoreWildCards) {
117             int x= posIn(text, start, end);
118             if (x < 0)
119                 return null;
120             return new Position(x, x+fLength);
121         }
122
123         int segCount= fSegments.length;
124         if (segCount == 0)//pattern contains only '*'(s)
125
return new Position (start, end);
126                     
127         int curPos= start;
128         int matchStart= -1;
129         int i;
130         for (i= 0; i < segCount && curPos < end; ++i) {
131             String JavaDoc current= fSegments[i];
132             int nextMatch= regExpPosIn(text, curPos, end, current);
133             if (nextMatch < 0 )
134                 return null;
135             if(i == 0)
136                 matchStart= nextMatch;
137             curPos= nextMatch + current.length();
138         }
139         if (i < segCount)
140             return null;
141         return new Position(matchStart, curPos);
142     }
143     /**
144      * match the given <code>text</code> with the pattern
145      * @return true if matched eitherwise false
146      * @param <code>text</code>, a String object
147      */

148     public boolean match(String JavaDoc text) {
149         return match(text, 0, text.length());
150     }
151     /**
152      * Given the starting (inclusive) and the ending (exclusive) positions in the
153      * <code>text</code>, determine if the given substring matches with aPattern
154      * @return true if the specified portion of the text matches the pattern
155      * @param String <code>text</code>, a String object that contains the substring to match
156      * @param int <code>start<code> marks the starting position (inclusive) of the substring
157      * @param int <code>end<code> marks the ending index (exclusive) of the substring
158      */

159     public boolean match(String JavaDoc text, int start, int end) {
160         if (null == text)
161             throw new IllegalArgumentException JavaDoc();
162
163         if (start > end)
164             return false;
165
166         if (fIgnoreWildCards)
167             return (end - start == fLength)
168                 && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
169         int segCount = fSegments.length;
170         if (segCount == 0
171             && (fHasLeadingStar
172                 || fHasTrailingStar)) // pattern contains only '*'(s)
173
return true;
174         if (start == end)
175             return fLength == 0;
176         if (fLength == 0)
177             return start == end;
178
179         int tlen = text.length();
180         if (start < 0)
181             start = 0;
182         if (end > tlen)
183             end = tlen;
184
185         int tCurPos = start;
186         int bound = end - fBound;
187         if (bound < 0)
188             return false;
189         int i = 0;
190         String JavaDoc current = fSegments[i];
191         int segLength = current.length();
192
193         /* process first segment */
194         if (!fHasLeadingStar) {
195             if (!regExpRegionMatches(text, start, current, 0, segLength)) {
196                 return false;
197             } else {
198                 ++i;
199                 tCurPos = tCurPos + segLength;
200             }
201         }
202         if ((fSegments.length == 1)
203             && (!fHasLeadingStar)
204             && (!fHasTrailingStar)) {
205             // only one segment to match, no wildcards specified
206
return tCurPos == end;
207         }
208         /* process middle segments */
209         while (i < segCount) {
210             current = fSegments[i];
211             int currentMatch;
212             int k = current.indexOf(fSingleWildCard);
213             if (k < 0) {
214                 currentMatch = textPosIn(text, tCurPos, end, current);
215                 if (currentMatch < 0)
216                     return false;
217             } else {
218                 currentMatch = regExpPosIn(text, tCurPos, end, current);
219                 if (currentMatch < 0)
220                     return false;
221             }
222             tCurPos = currentMatch + current.length();
223             i++;
224         }
225
226         /* process final segment */
227         if (!fHasTrailingStar && tCurPos != end) {
228             int clen = current.length();
229             return regExpRegionMatches(text, end - clen, current, 0, clen);
230         }
231         return i == segCount;
232     }
233     /**
234      * This method parses the given pattern into segments seperated by wildcard '*' characters.
235      * Since wildcards are not being used in this case, the pattern consists of a single segment.
236      */

237     private void parseNoWildCards() {
238         fSegments= new String JavaDoc[1];
239         fSegments[0]= fPattern;
240         fBound= fLength;
241     }
242     /**
243      * Parses the given pattern into segments seperated by wildcard '*' characters.
244      * @param p, a String object that is a simple regular expression with ‘*’ and/or ‘?’
245      */

246     private void parseWildCards() {
247         if(fPattern.startsWith("*"))//$NON-NLS-1$
248
fHasLeadingStar= true;
249         if(fPattern.endsWith("*")) {//$NON-NLS-1$
250
/* make sure it's not an escaped wildcard */
251             if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
252                 fHasTrailingStar= true;
253             }
254         }
255
256         Vector temp= new Vector();
257
258         int pos= 0;
259         StringBuffer JavaDoc buf= new StringBuffer JavaDoc();
260         while (pos < fLength) {
261             char c= fPattern.charAt(pos++);
262             switch (c) {
263                 case '\\':
264                     if (pos >= fLength) {
265                         buf.append(c);
266                     } else {
267                         char next= fPattern.charAt(pos++);
268                         /* if it's an escape sequence */
269                         if (next == '*' || next == '?' || next == '\\') {
270                             buf.append(next);
271                         } else {
272                             /* not an escape sequence, just insert literally */
273                             buf.append(c);
274                             buf.append(next);
275                         }
276                     }
277                 break;
278                 case '*':
279                     if (buf.length() > 0) {
280                         /* new segment */
281                         temp.addElement(buf.toString());
282                         fBound += buf.length();
283                         buf.setLength(0);
284                     }
285                 break;
286                 case '?':
287                     /* append special character representing single match wildcard */
288                     buf.append(fSingleWildCard);
289                 break;
290                 default:
291                     buf.append(c);
292             }
293         }
294
295         /* add last buffer to segment list */
296         if (buf.length() > 0) {
297             temp.addElement(buf.toString());
298             fBound += buf.length();
299         }
300             
301         fSegments= new String JavaDoc[temp.size()];
302         temp.copyInto(fSegments);
303     }
304     /**
305      * @param <code>text</code>, a string which contains no wildcard
306      * @param <code>start</code>, the starting index in the text for search, inclusive
307      * @param <code>end</code>, the stopping point of search, exclusive
308      * @return the starting index in the text of the pattern , or -1 if not found
309      */

310     protected int posIn(String JavaDoc text, int start, int end) {//no wild card in pattern
311
int max= end - fLength;
312         
313         if (!fIgnoreCase) {
314             int i= text.indexOf(fPattern, start);
315             if (i == -1 || i > max)
316                 return -1;
317             return i;
318         }
319         
320         for (int i= start; i <= max; ++i) {
321             if (text.regionMatches(true, i, fPattern, 0, fLength))
322                 return i;
323         }
324         
325         return -1;
326     }
327     /**
328      * @param <code>text</code>, a simple regular expression that may only contain '?'(s)
329      * @param <code>start</code>, the starting index in the text for search, inclusive
330      * @param <code>end</code>, the stopping point of search, exclusive
331      * @param <code>p</code>, a simple regular expression that may contains '?'
332      * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
333      * @return the starting index in the text of the pattern , or -1 if not found
334      */

335     protected int regExpPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
336         int plen= p.length();
337         
338         int max= end - plen;
339         for (int i= start; i <= max; ++i) {
340             if (regExpRegionMatches(text, i, p, 0, plen))
341                 return i;
342         }
343         return -1;
344     }
345     /**
346      *
347      * @return boolean
348      * @param <code>text</code>, a String to match
349      * @param <code>start</code>, int that indicates the starting index of match, inclusive
350      * @param <code>end</code> int that indicates the ending index of match, exclusive
351      * @param <code>p</code>, String, String, a simple regular expression that may contain '?'
352      * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
353      */

354     protected boolean regExpRegionMatches(String JavaDoc text, int tStart, String JavaDoc p, int pStart, int plen) {
355         while (plen-- > 0) {
356             char tchar= text.charAt(tStart++);
357             char pchar= p.charAt(pStart++);
358
359             /* process wild cards */
360             if (!fIgnoreWildCards) {
361                 /* skip single wild cards */
362                 if (pchar == fSingleWildCard) {
363                     continue;
364                 }
365             }
366             if (pchar == tchar)
367                 continue;
368             if (fIgnoreCase) {
369                 if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar))
370                     continue;
371                 // comparing after converting to upper case doesn't handle all cases;
372
// also compare after converting to lower case
373
if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar))
374                     continue;
375             }
376             return false;
377         }
378         return true;
379     }
380     /**
381      * @param <code>text</code>, the string to match
382      * @param <code>start</code>, the starting index in the text for search, inclusive
383      * @param <code>end</code>, the stopping point of search, exclusive
384      * @param code>p</code>, a string that has no wildcard
385      * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
386      * @return the starting index in the text of the pattern , or -1 if not found
387      */

388     protected int textPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
389         
390         int plen= p.length();
391         int max= end - plen;
392         
393         if (!fIgnoreCase) {
394             int i= text.indexOf(p, start);
395             if (i == -1 || i > max)
396                 return -1;
397             return i;
398         }
399         
400         for (int i= start; i <= max; ++i) {
401             if (text.regionMatches(true, i, p, 0, plen))
402                 return i;
403         }
404         
405         return -1;
406     }
407 }
408
Popular Tags