KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > team > internal > core > StringMatcher


1 /*******************************************************************************
2  * Copyright (c) 2000, 2006 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.team.internal.core;
12
13
14 import java.util.*;
15
16 /**
17  * A string pattern matcher, suppporting * and ? wildcards.
18  * Note: code copied from org.eclipse.jdt.internal.core.util.StringMatcher on April 3, 2001
19  * (version 0.1 - 010901H18 [rename jbl]).
20  */

21 public class StringMatcher {
22     protected String JavaDoc fPattern;
23     protected int fLength; // pattern length
24
protected boolean fIgnoreWildCards;
25     protected boolean fIgnoreCase;
26     protected boolean fHasLeadingStar;
27     protected boolean fHasTrailingStar;
28     protected String JavaDoc fSegments[]; //the given pattern is split into * separated segments
29

30     /* boundary value beyond which we don't need to search in the text */
31     protected int fBound = 0;
32     
33
34     protected static final char fSingleWildCard = '\u0000';
35     
36     public static class Position {
37         int start; //inclusive
38
int end; //exclusive
39
public Position(int start, int end) {
40             this.start = start;
41             this.end = end;
42         }
43         public int getStart() {
44             return start;
45         }
46         public int getEnd() {
47             return end;
48         }
49     }
50     /**
51      * Find the first occurrence of the pattern between <code>start</code)(inclusive)
52      * and <code>end</code>(exclusive).
53      * @param text the String object to search in
54      * @param start the starting index of the search range, inclusive
55      * @param end the ending index of the search range, exclusive
56      * @return an <code>StringMatcher.Position</code> object that keeps the starting
57      * (inclusive) and ending positions (exclusive) of the first occurrence of the
58      * pattern in the specified range of the text; return null if not found or subtext
59      * is empty (start==end). A pair of zeros is returned if pattern is empty string
60      * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
61      * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
62      */

63
64     public StringMatcher.Position find(String JavaDoc text, int start, int end) {
65         if (fPattern == null|| text == null)
66             throw new IllegalArgumentException JavaDoc();
67             
68         int tlen = text.length();
69         if (start < 0)
70             start = 0;
71         if (end > tlen)
72             end = tlen;
73         if (end < 0 ||start >= end )
74             return null;
75         if (fLength == 0)
76             return new Position(start, start);
77         if (fIgnoreWildCards) {
78             int x = posIn(text, start, end);
79             if (x < 0)
80                 return null;
81             return new Position(x, x+fLength);
82         }
83
84         int segCount = fSegments.length;
85         if (segCount == 0)//pattern contains only '*'(s)
86
return new Position (start, end);
87                     
88         int curPos = start;
89         int matchStart = -1;
90         int i;
91         for (i = 0; i < segCount && curPos < end; ++i) {
92             String JavaDoc current = fSegments[i];
93             int nextMatch = regExpPosIn(text, curPos, end, current);
94             if (nextMatch < 0 )
95                 return null;
96             if(i == 0)
97                 matchStart = nextMatch;
98             curPos = nextMatch + current.length();
99         }
100         if (i < segCount)
101             return null;
102         return new Position(matchStart, curPos);
103     }
104     /**
105      * StringMatcher constructor takes in a String object that is a simple
106      * pattern which may contain '*' for 0 and many characters and
107      * '?' for exactly one character.
108      *
109      * Literal '*' and '?' characters must be escaped in the pattern
110      * e.g., "\*" means literal "*", etc.
111      *
112      * Escaping any other character (including the escape character itself),
113      * just results in that character in the pattern.
114      * e.g., "\a" means "a" and "\\" means "\"
115      *
116      * If invoking the StringMatcher with string literals in Java, don't forget
117      * escape characters are represented by "\\".
118      *
119      * @param aPattern the pattern to match text with
120      * @param ignoreCase if true, case is ignored
121      * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
122      * (everything is taken literally).
123      */

124     public StringMatcher(String JavaDoc aPattern, boolean ignoreCase, boolean ignoreWildCards) {
125         fIgnoreCase = ignoreCase;
126         fIgnoreWildCards = ignoreWildCards;
127         fLength = aPattern.length();
128
129         /* convert case */
130         if (fIgnoreCase) {
131             fPattern = aPattern.toUpperCase();
132         } else {
133             fPattern = aPattern;
134         }
135         
136         if (fIgnoreWildCards) {
137             parseNoWildCards();
138         } else {
139             parseWildCards();
140         }
141     }
142     /**
143      * Given the starting (inclusive) and the ending (exclusive) poisitions in the
144      * <code>text</code>, determine if the given substring matches with aPattern
145      * @return true if the specified portion of the text matches the pattern
146      * @param text a String object that contains the substring to match
147      * @param start marks the starting position (inclusive) of the substring
148      * @param end marks the ending index (exclusive) of the substring
149      */

150     public boolean match(String JavaDoc text, int start, int end) {
151         if (null == text)
152             throw new IllegalArgumentException JavaDoc();
153
154         if (start > end)
155             return false;
156
157         if (fIgnoreWildCards)
158             return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
159         int segCount= fSegments.length;
160         if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
161
return true;
162         if (start == end)
163             return fLength == 0;
164         if (fLength == 0)
165             return start == end;
166
167         int tlen= text.length();
168         if (start < 0)
169             start= 0;
170         if (end > tlen)
171             end= tlen;
172
173         int tCurPos= start;
174         int bound= end - fBound;
175         if ( bound < 0)
176             return false;
177         int i=0;
178         String JavaDoc current= fSegments[i];
179         int segLength= current.length();
180
181         /* process first segment */
182         if (!fHasLeadingStar){
183             if(!regExpRegionMatches(text, start, current, 0, segLength)) {
184                 return false;
185             } else {
186                 ++i;
187                 tCurPos= tCurPos + segLength;
188             }
189         }
190         if ((fSegments.length == 1) && (!fHasLeadingStar) && (!fHasTrailingStar)) {
191             // only one segment to match, no wildcards specified
192
return tCurPos == end;
193         }
194         /* process middle segments */
195         while (i < segCount) {
196             current= fSegments[i];
197             int currentMatch;
198             int k= current.indexOf(fSingleWildCard);
199             if (k < 0) {
200                 currentMatch= textPosIn(text, tCurPos, end, current);
201                 if (currentMatch < 0)
202                     return false;
203             } else {
204                 currentMatch= regExpPosIn(text, tCurPos, end, current);
205                 if (currentMatch < 0)
206                     return false;
207             }
208             tCurPos= currentMatch + current.length();
209             i++;
210         }
211
212         /* process final segment */
213         if (!fHasTrailingStar && tCurPos != end) {
214             int clen= current.length();
215             return regExpRegionMatches(text, end - clen, current, 0, clen);
216         }
217         return i == segCount ;
218     }
219     /**
220      * match the given <code>text</code> with the pattern
221      * @return true if matched eitherwise false
222      * @param text a String object
223      */

224     public boolean match(String JavaDoc text) {
225         return match(text, 0, text.length());
226     }
227     /**
228      * This method parses the given pattern into segments seperated by wildcard '*' characters.
229      * Since wildcards are not being used in this case, the pattern consists of a single segment.
230      */

231     private void parseNoWildCards() {
232         fSegments = new String JavaDoc[1];
233         fSegments[0] = fPattern;
234         fBound = fLength;
235     }
236     /**
237      * This method parses the given pattern into segments seperated by wildcard '*' characters.
238      * @param p a String object that is a simple regular expression with '*' and/or '?'
239      */

240     private void parseWildCards() {
241         if(fPattern.startsWith("*"))//$NON-NLS-1$
242
fHasLeadingStar = true;
243         if(fPattern.endsWith("*")) {//$NON-NLS-1$
244
/* make sure it's not an escaped wildcard */
245             if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
246                 fHasTrailingStar = true;
247             }
248         }
249
250         Vector temp = new Vector();
251
252         int pos = 0;
253         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
254         while (pos < fLength) {
255             char c = fPattern.charAt(pos++);
256             switch (c) {
257                 case '\\':
258                     if (pos >= fLength) {
259                         buf.append(c);
260                     } else {
261                         char next = fPattern.charAt(pos++);
262                         /* if it's an escape sequence */
263                         if (next == '*' || next == '?' || next == '\\') {
264                             buf.append(next);
265                         } else {
266                             /* not an escape sequence, just insert literally */
267                             buf.append(c);
268                             buf.append(next);
269                         }
270                     }
271                 break;
272                 case '*':
273                     if (buf.length() > 0) {
274                         /* new segment */
275                         temp.addElement(buf.toString());
276                         fBound += buf.length();
277                         buf.setLength(0);
278                     }
279                 break;
280                 case '?':
281                     /* append special character representing single match wildcard */
282                     buf.append(fSingleWildCard);
283                 break;
284                 default:
285                     buf.append(c);
286             }
287         }
288
289         /* add last buffer to segment list */
290         if (buf.length() > 0) {
291             temp.addElement(buf.toString());
292             fBound += buf.length();
293         }
294             
295         fSegments = new String JavaDoc[temp.size()];
296         temp.copyInto(fSegments);
297     }
298     /**
299      * @param text a string which contains no wildcard
300      * @param start the starting index in the text for search, inclusive
301      * @param end the stopping point of search, exclusive
302      * @return the starting index in the text of the pattern , or -1 if not found
303      */

304     protected int posIn(String JavaDoc text, int start, int end) {//no wild card in pattern
305
int max = end - fLength;
306         
307         if (!fIgnoreCase) {
308             int i = text.indexOf(fPattern, start);
309             if (i == -1 || i > max)
310                 return -1;
311             return i;
312         }
313         
314         for (int i = start; i <= max; ++i) {
315             if (text.regionMatches(true, i, fPattern, 0, fLength))
316                 return i;
317         }
318         
319         return -1;
320     }
321     /**
322      * @param text a simple regular expression that may only contain '?'(s)
323      * @param start the starting index in the text for search, inclusive
324      * @param end the stopping point of search, exclusive
325      * @param p a simple regular expression that may contains '?'
326      * @return the starting index in the text of the pattern , or -1 if not found
327      */

328     protected int regExpPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
329         int plen = p.length();
330         
331         int max = end - plen;
332         for (int i = start; i <= max; ++i) {
333             if (regExpRegionMatches(text, i, p, 0, plen))
334                 return i;
335         }
336         return -1;
337     }
338
339     /**
340      *
341      * @param text the text
342      * @param tStart the start
343      * @param p the pattern
344      * @param pStart the pattern start
345      * @param plen the pattern length
346      * @return whether the region matches
347      */

348     protected boolean regExpRegionMatches(String JavaDoc text, int tStart, String JavaDoc p, int pStart, int plen) {
349         while (plen-- > 0) {
350             char tchar = text.charAt(tStart++);
351             char pchar = p.charAt(pStart++);
352
353             /* process wild cards */
354             if (!fIgnoreWildCards) {
355                 /* skip single wild cards */
356                 if (pchar == fSingleWildCard) {
357                     continue;
358                 }
359             }
360             if (pchar == tchar)
361                 continue;
362             if (fIgnoreCase) {
363                 char tc = Character.toUpperCase(tchar);
364                 if (tc == pchar)
365                     continue;
366             }
367             return false;
368         }
369         return true;
370     }
371     /**
372      * @param text the string to match
373      * @param start the starting index in the text for search, inclusive
374      * @param end the stopping point of search, exclusive
375      * @param p a string that has no wildcard
376      * @return the starting index in the text of the pattern , or -1 if not found
377      */

378     protected int textPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
379         
380         int plen = p.length();
381         int max = end - plen;
382         
383         if (!fIgnoreCase) {
384             int i = text.indexOf(p, start);
385             if (i == -1 || i > max)
386                 return -1;
387             return i;
388         }
389         
390         for (int i = start; i <= max; ++i) {
391             if (text.regionMatches(true, i, p, 0, plen))
392                 return i;
393         }
394         
395         return -1;
396     }
397 }
398
Popular Tags