KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > team > internal > ccvs > core > util > StringMatcher


1 /*******************************************************************************
2  * Copyright (c) 2000, 2006 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  * Matt McCutchen (hashproduct+eclipse@gmail.com) - Bug 132260 Eclipse doesn't understand negated character classes in .cvsignore
11  *******************************************************************************/

12 package org.eclipse.team.internal.ccvs.core.util;
13
14 import java.util.Vector JavaDoc;
15 import java.util.HashMap JavaDoc;
16
17 /**
18  * A StringMatcher contains a glob and matches it against strings.
19  * StringMatcher supports * and ? wildcards and character classes, possibly
20  * negated by !, that contain single characters and/or ranges.
21  * Note: code copied from org.eclipse.jdt.internal.core.util.StringMatcher on April 3, 2001
22  * (version 0.1 - 010901H18 [rename jbl]).
23  */

24 public class StringMatcher {
25
26     protected static class CharacterClass {
27         final boolean isNegated;
28         final String JavaDoc text;
29
30         CharacterClass(boolean isNegated, String JavaDoc text) {
31             this.isNegated = isNegated;
32             this.text = text;
33         }
34
35         boolean listed(char c) {
36             for (int i = 0; i < text.length(); ) {
37                 if (i + 2 < text.length() && text.charAt(i + 1) == '-') {
38                     if (c >= text.charAt(i) && c <= text.charAt(i + 2))
39                         return true;
40                     i += 3;
41                 } else {
42                     if (c == text.charAt(i))
43                         return true;
44                     i++;
45                 }
46             }
47             return false;
48         }
49         boolean match(char c) {
50             return listed(c) ^ isNegated;
51         }
52     }
53     
54     protected String JavaDoc fPattern;
55     protected int fLength; // pattern length
56
protected boolean fIgnoreWildCards;
57     protected boolean fIgnoreCase;
58     protected boolean fHasLeadingStar;
59     protected boolean fHasTrailingStar;
60     protected String JavaDoc fSegments[]; //the given pattern is split into * separated segments
61
protected HashMap JavaDoc/*<Integer, CharacterClass>*/ fCharacterClassMaps[];
62
63     /* boundary value beyond which we don't need to search in the text */
64     protected int fBound = 0;
65     
66     /** \? in pattern becomes ? in fSegments, while ? in pattern becomes this */
67     protected static final char fSingleWildCard = '\u0000';
68     
69     public static class Position {
70         int start; //inclusive
71
int end; //exclusive
72
public Position(int start, int end) {
73             this.start = start;
74             this.end = end;
75         }
76         public int getStart() {
77             return start;
78         }
79         public int getEnd() {
80             return end;
81         }
82     }
83
84     /**
85      * Find the first occurrence of the pattern between <code>start</code> (inclusive)
86      * and <code>end</code> (exclusive).
87      * @param text the String object to search in
88      * @param start the starting index of the search range, inclusive
89      * @param end the ending index of the search range, exclusive
90      * @return a <code>StringMatcher.Position</code> object that keeps the starting
91      * (inclusive) and ending positions (exclusive) of the first occurrence of the
92      * pattern in the specified range of the text; return null if not found or subtext
93      * is empty (start==end). A pair of zeros is returned if pattern is empty string
94      * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
95      * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
96      */

97     public StringMatcher.Position find(String JavaDoc text, int start, int end) {
98         if (fPattern == null|| text == null)
99             throw new IllegalArgumentException JavaDoc();
100             
101         int tlen = text.length();
102         if (start < 0)
103             start = 0;
104         if (end > tlen)
105             end = tlen;
106         if (end < 0 ||start >= end )
107             return null;
108         if (fLength == 0)
109             return new Position(start, start);
110         if (fIgnoreWildCards) {
111             int x = posIn(text, start, end);
112             if (x < 0)
113                 return null;
114             return new Position(x, x+fLength);
115         }
116
117         int segCount = fSegments.length;
118         if (segCount == 0)//pattern contains only '*'(s)
119
return new Position (start, end);
120                     
121         int curPos = start;
122         int matchStart = -1;
123         int i;
124         for (i = 0; i < segCount && curPos < end; ++i) {
125             String JavaDoc current = fSegments[i];
126             int nextMatch = regExpPosIn(text, curPos, end, current, fCharacterClassMaps[i]);
127             if (nextMatch < 0 )
128                 return null;
129             if(i == 0)
130                 matchStart = nextMatch;
131             curPos = nextMatch + current.length();
132         }
133         if (i < segCount)
134             return null;
135         return new Position(matchStart, curPos);
136     }
137
138     /**
139      * Constructs a StringMatcher that matches strings against the glob
140      * <code>aPattern</code>.
141      *
142      * <code>aPattern</code> may contain "?"s, which match single characters,
143      * "*"s, which match zero or more characters, and character classes in
144      * "[...]". All characters other than "*", "?", and "[" match themselves,
145      * except for "\", which escapes the following character. For example,
146      * "\*" matches "*" and "\a" matches "a", while "\\" matches "\". Remember
147      * that Java string literals have an additional level of escaping, so a
148      * string literal for a glob matching a single backslash is written "\\\\".
149      *
150      * "[" begins a character class, which may contain characters and/or ranges;
151      * "]" ends the class. A character class matches any single character in
152      * it; for example, "[ac-e]" matches an "a", a "c", a "d", or an "e". A
153      * negated character class begins with "[!" and matches any single character
154      * not listed. Inside a character class, "\" loses its special meaning as
155      * an escape character. The fancier POSIX requirements for character
156      * classes are not supported: ranges use Unicode character numbers, and
157      * (for example) [:alpha:], [.ch.], and [=a=] are not recognized.
158      *
159      * @param aPattern the glob to match text with
160      * @param ignoreCase if true, case is ignored
161      * @param ignoreWildCards if true, the pattern is taken literally instead of
162      * as a glob
163      */

164     public StringMatcher(String JavaDoc aPattern, boolean ignoreCase, boolean ignoreWildCards) {
165         fIgnoreCase = ignoreCase;
166         fIgnoreWildCards = ignoreWildCards;
167         fLength = aPattern.length();
168
169         /* convert case */
170         if (fIgnoreCase) {
171             fPattern = aPattern.toUpperCase();
172         } else {
173             fPattern = aPattern;
174         }
175         
176         if (fIgnoreWildCards) {
177             parseNoWildCards();
178         } else {
179             parseWildCards();
180         }
181     }
182     /**
183      * Given the starting (inclusive) and the ending (exclusive) poisitions in the
184      * <code>text</code>, determine if the given substring matches with aPattern
185      * @return true if the specified portion of the text matches the pattern
186      * @param text a String object that contains the substring to match
187      * @param start marks the starting position (inclusive) of the substring
188      * @param end marks the ending index (exclusive) of the substring
189      */

190     public boolean match(String JavaDoc text, int start, int end) {
191         if (null == text)
192             throw new IllegalArgumentException JavaDoc();
193
194         if (start > end)
195             return false;
196
197         if (fIgnoreWildCards)
198             return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
199         int segCount= fSegments.length;
200         if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
201
return true;
202         if (start == end)
203             return fLength == 0;
204         if (fLength == 0)
205             return start == end;
206
207         int tlen= text.length();
208         if (start < 0)
209             start= 0;
210         if (end > tlen)
211             end= tlen;
212
213         int tCurPos= start;
214         int bound= end - fBound;
215         if ( bound < 0)
216             return false;
217         int i=0;
218         String JavaDoc current= fSegments[i];
219         HashMap JavaDoc/*<Integer, CharacterClass>*/ curCharClassMap= fCharacterClassMaps[i];
220         int segLength= current.length();
221
222         /* process first segment */
223         if (!fHasLeadingStar){
224             if(!regExpRegionMatches(text, start, current, 0, segLength, curCharClassMap)) {
225                 return false;
226             } else {
227                 ++i;
228                 tCurPos= tCurPos + segLength;
229             }
230         }
231         if ((fSegments.length == 1) && (!fHasLeadingStar) && (!fHasTrailingStar)) {
232             // only one segment to match, no wildcards specified
233
return tCurPos == end;
234         }
235         /* process middle segments */
236         while (i < segCount) {
237             current= fSegments[i];
238             curCharClassMap= fCharacterClassMaps[i];
239             int currentMatch;
240             int k= current.indexOf(fSingleWildCard);
241             if (k < 0) {
242                 currentMatch= textPosIn(text, tCurPos, end, current);
243                 if (currentMatch < 0)
244                     return false;
245             } else {
246                 currentMatch= regExpPosIn(text, tCurPos, end, current, curCharClassMap);
247                 if (currentMatch < 0)
248                     return false;
249             }
250             tCurPos= currentMatch + current.length();
251             i++;
252         }
253
254         /* process final segment */
255         if (!fHasTrailingStar && tCurPos != end) {
256             int clen= current.length();
257             return regExpRegionMatches(text, end - clen, current, 0, clen, curCharClassMap);
258         }
259         return i == segCount ;
260     }
261     /**
262      * match the given <code>text</code> with the pattern
263      * @return true if matched eitherwise false
264      * @param text the String object to match against the pattern
265      */

266     public boolean match(String JavaDoc text) {
267         return match(text, 0, text.length());
268     }
269     /**
270      * This method parses the given pattern into segments separated by wildcard '*' characters.
271      * Since wildcards are not being used in this case, the pattern consists of a single segment.
272      */

273     private void parseNoWildCards() {
274         fSegments = new String JavaDoc[1];
275         fSegments[0] = fPattern;
276         fBound = fLength;
277     }
278     /**
279      * This method parses the given pattern into segments separated by wildcard '*' characters.
280      * @param p a String object that is a simple regular expression with *  and/or ? 
281      */

282     private void parseWildCards() {
283         if(fPattern.startsWith("*"))//$NON-NLS-1$
284
fHasLeadingStar = true;
285
286         Vector JavaDoc temp = new Vector JavaDoc();
287         HashMap JavaDoc/*<Integer, CharacterClass>*/ segmentCCs = null;
288         Vector JavaDoc/*<HashMap<Integer, CharacterClass>>*/ allCCs = new Vector JavaDoc();
289
290         int pos = 0;
291         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
292         while (pos < fLength) {
293             char c = fPattern.charAt(pos++);
294             fHasTrailingStar = false;
295             switch (c) {
296                 case '\\':
297                     if (pos >= fLength) {
298                         buf.append(c);
299                     } else {
300                         c = fPattern.charAt(pos++);
301                         buf.append(c);
302                     }
303                 break;
304                 case '*':
305                     fHasTrailingStar = true;
306                     if (buf.length() > 0) {
307                         /* new segment */
308                         temp.addElement(buf.toString());
309                         allCCs.addElement(segmentCCs);
310                         fBound += buf.length();
311                         buf.setLength(0);
312                         segmentCCs = null;
313                     }
314                 break;
315                 case '[':
316                     if (segmentCCs == null)
317                         segmentCCs = new HashMap JavaDoc/*<Integer, CharacterClass>*/();
318                     if (pos >= fLength) {
319                         // Unterminated; take [ literally for lack of anything better to do
320
buf.append(c);
321                         break;
322                     }
323                     boolean negated = (fPattern.charAt(pos) == '!');
324                     int beginPos = (negated ? pos + 1 : pos);
325                     int endPos = fPattern.indexOf(']', beginPos + 1);
326                     if (endPos == -1) {
327                         // Unterminated; take [ literally for lack of anything better to do
328
buf.append(c);
329                         break;
330                     }
331                     CharacterClass cc = new CharacterClass(negated, fPattern.substring(beginPos, endPos));
332                     segmentCCs.put(new Integer JavaDoc(buf.length()), cc);
333                     pos = endPos + 1;
334                     /* fall through; fSingleWildCard can also represent a character class */
335                 case '?':
336                     /* append special character representing single match wildcard */
337                     buf.append(fSingleWildCard);
338                 break;
339                 default:
340                     buf.append(c);
341             }
342         }
343
344         /* add last buffer to segment list */
345         if (buf.length() > 0) {
346             temp.addElement(buf.toString());
347             allCCs.addElement(segmentCCs);
348             fBound += buf.length();
349         }
350             
351         fSegments = new String JavaDoc[temp.size()];
352         temp.copyInto(fSegments);
353         fCharacterClassMaps = new HashMap JavaDoc[allCCs.size()];
354         allCCs.copyInto(fCharacterClassMaps);
355     }
356     /**
357      * @param text a string which contains no wildcard
358      * @param start the starting index in the text for search, inclusive
359      * @param end the stopping point of search, exclusive
360      * @return the starting index in the text of the pattern , or -1 if not found
361      */

362     protected int posIn(String JavaDoc text, int start, int end) {//no wild card in pattern
363
return textPosIn(text, start, end, fPattern);
364     }
365     /**
366      * @param text a simple regular expression that may only contain '?'(s)
367      * @param start the starting index in the text for search, inclusive
368      * @param end the stopping point of search, exclusive
369      * @param p a simple regular expression that may contains '?'
370      * @param caseIgnored whether the pattern is not casesensitive
371      * @return the starting index in the text of the pattern , or -1 if not found
372      */

373     protected int regExpPosIn(String JavaDoc text, int start, int end, String JavaDoc p, HashMap JavaDoc/*<Integer, CharacterClass>*/ ccMap) {
374         int plen = p.length();
375         
376         int max = end - plen;
377         for (int i = start; i <= max; ++i) {
378             if (regExpRegionMatches(text, i, p, 0, plen, ccMap))
379                 return i;
380         }
381         return -1;
382     }
383     /**
384      *
385      * @return boolean
386      * @param text a String to match
387      * @param start int that indicates the starting index of match, inclusive
388      * @param end int that indicates the ending index of match, exclusive
389      * @param p String, a simple regular expression that may contain '?'
390      * @param ignoreCase boolean indicating whether <code>p</code> is case sensitive
391      * @param ccMap maps each index of p at which fSingleWildCard occurs (as an
392      * Integer) to CharacterClass data, or null for a plain old ?
393      */

394     protected boolean regExpRegionMatches(String JavaDoc text, int tStart, String JavaDoc p, int pStart, int plen, HashMap JavaDoc/*<Integer, CharacterClass>*/ ccMap) {
395         for (int ppos = 0; plen-- > 0; ppos++) {
396             char tchar = text.charAt(tStart++);
397             char pchar = p.charAt(pStart++);
398
399             /* process wild cards */
400             if (!fIgnoreWildCards) {
401                 /* skip single wild cards */
402                 if (pchar == fSingleWildCard) {
403                     if (ccMap == null)
404                         continue;
405                     CharacterClass cc = (CharacterClass) ccMap.get(new Integer JavaDoc(ppos));
406                     if (cc == null || cc.match(tchar))
407                         continue;
408                     else
409                         return false;
410                 }
411             }
412             if (pchar == tchar)
413                 continue;
414             if (fIgnoreCase) {
415                 char tc = Character.toUpperCase(tchar);
416                 if (tc == pchar)
417                     continue;
418             }
419             return false;
420         }
421         return true;
422     }
423     /**
424      * @param text the string to match
425      * @param start the starting index in the text for search, inclusive
426      * @param end the stopping point of search, exclusive
427      * @param p a string that has no wildcard
428      * @param ignoreCase boolean indicating whether p is case sensitive
429      * @return the starting index in the text of the pattern , or -1 if not found
430      */

431     protected int textPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
432         
433         int plen = p.length();
434         int max = end - plen;
435         
436         if (!fIgnoreCase) {
437             int i = text.indexOf(p, start);
438             if (i == -1 || i > max)
439                 return -1;
440             return i;
441         }
442         
443         for (int i = start; i <= max; ++i) {
444             if (text.regionMatches(true, i, p, 0, plen))
445                 return i;
446         }
447         
448         return -1;
449     }
450 }
451
Popular Tags