KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > core > internal > net > StringMatcher


1 /*******************************************************************************
2  * Copyright (c) 2007 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.core.internal.net;
12
13 import java.util.Vector JavaDoc;
14
15 /**
16  * A string pattern matcher, suppporting * and ? wildcards.
17  * Note: code copied from org.eclipse.jdt.internal.core.util.StringMatcher on April 3, 2001
18  * (version 0.1 - 010901H18 [rename jbl]).
19  */

20 public class StringMatcher {
21     protected String JavaDoc fPattern;
22     protected int fLength; // pattern length
23
protected boolean fIgnoreWildCards;
24     protected boolean fIgnoreCase;
25     protected boolean fHasLeadingStar;
26     protected boolean fHasTrailingStar;
27     protected String JavaDoc fSegments[]; //the given pattern is split into * separated segments
28

29     /* boundary value beyond which we don't need to search in the text */
30     protected int fBound = 0;
31     
32
33     protected static final char fSingleWildCard = '\u0000';
34     
35     public static class Position {
36         int start; //inclusive
37
int end; //exclusive
38
public Position(int start, int end) {
39             this.start = start;
40             this.end = end;
41         }
42         public int getStart() {
43             return start;
44         }
45         public int getEnd() {
46             return end;
47         }
48     }
49     /**
50      * Find the first occurrence of the pattern between <code>start</code)(inclusive)
51      * and <code>end</code>(exclusive).
52      * @param text the String object to search in
53      * @param start the starting index of the search range, inclusive
54      * @param end the ending index of the search range, exclusive
55      * @return an <code>StringMatcher.Position</code> object that keeps the starting
56      * (inclusive) and ending positions (exclusive) of the first occurrence of the
57      * pattern in the specified range of the text; return null if not found or subtext
58      * is empty (start==end). A pair of zeros is returned if pattern is empty string
59      * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
60      * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
61      */

62
63     public StringMatcher.Position find(String JavaDoc text, int start, int end) {
64         if (fPattern == null|| text == null)
65             throw new IllegalArgumentException JavaDoc();
66             
67         int tlen = text.length();
68         if (start < 0)
69             start = 0;
70         if (end > tlen)
71             end = tlen;
72         if (end < 0 ||start >= end )
73             return null;
74         if (fLength == 0)
75             return new Position(start, start);
76         if (fIgnoreWildCards) {
77             int x = posIn(text, start, end);
78             if (x < 0)
79                 return null;
80             return new Position(x, x+fLength);
81         }
82
83         int segCount = fSegments.length;
84         if (segCount == 0)//pattern contains only '*'(s)
85
return new Position (start, end);
86                     
87         int curPos = start;
88         int matchStart = -1;
89         int i;
90         for (i = 0; i < segCount && curPos < end; ++i) {
91             String JavaDoc current = fSegments[i];
92             int nextMatch = regExpPosIn(text, curPos, end, current);
93             if (nextMatch < 0 )
94                 return null;
95             if(i == 0)
96                 matchStart = nextMatch;
97             curPos = nextMatch + current.length();
98         }
99         if (i < segCount)
100             return null;
101         return new Position(matchStart, curPos);
102     }
103     /**
104      * StringMatcher constructor takes in a String object that is a simple
105      * pattern which may contain '*' for 0 and many characters and
106      * '?' for exactly one character.
107      *
108      * Literal '*' and '?' characters must be escaped in the pattern
109      * e.g., "\*" means literal "*", etc.
110      *
111      * Escaping any other character (including the escape character itself),
112      * just results in that character in the pattern.
113      * e.g., "\a" means "a" and "\\" means "\"
114      *
115      * If invoking the StringMatcher with string literals in Java, don't forget
116      * escape characters are represented by "\\".
117      *
118      * @param aPattern the pattern to match text with
119      * @param ignoreCase if true, case is ignored
120      * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
121      * (everything is taken literally).
122      */

123     public StringMatcher(String JavaDoc aPattern, boolean ignoreCase, boolean ignoreWildCards) {
124         fIgnoreCase = ignoreCase;
125         fIgnoreWildCards = ignoreWildCards;
126         fLength = aPattern.length();
127
128         /* convert case */
129         if (fIgnoreCase) {
130             fPattern = aPattern.toUpperCase();
131         } else {
132             fPattern = aPattern;
133         }
134         
135         if (fIgnoreWildCards) {
136             parseNoWildCards();
137         } else {
138             parseWildCards();
139         }
140     }
141     /**
142      * Given the starting (inclusive) and the ending (exclusive) poisitions in the
143      * <code>text</code>, determine if the given substring matches with aPattern
144      * @return true if the specified portion of the text matches the pattern
145      * @param text a String object that contains the substring to match
146      * @param start marks the starting position (inclusive) of the substring
147      * @param end marks the ending index (exclusive) of the substring
148      */

149     public boolean match(String JavaDoc text, int start, int end) {
150         if (null == text)
151             throw new IllegalArgumentException JavaDoc();
152
153         if (start > end)
154             return false;
155
156         if (fIgnoreWildCards)
157             return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
158         int segCount= fSegments.length;
159         if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
160
return true;
161         if (start == end)
162             return fLength == 0;
163         if (fLength == 0)
164             return start == end;
165
166         int tlen= text.length();
167         if (start < 0)
168             start= 0;
169         if (end > tlen)
170             end= tlen;
171
172         int tCurPos= start;
173         int bound= end - fBound;
174         if ( bound < 0)
175             return false;
176         int i=0;
177         String JavaDoc current= fSegments[i];
178         int segLength= current.length();
179
180         /* process first segment */
181         if (!fHasLeadingStar){
182             if(!regExpRegionMatches(text, start, current, 0, segLength)) {
183                 return false;
184             }
185             ++i;
186             tCurPos= tCurPos + segLength;
187         }
188         if ((fSegments.length == 1) && (!fHasLeadingStar) && (!fHasTrailingStar)) {
189             // only one segment to match, no wildcards specified
190
return tCurPos == end;
191         }
192         /* process middle segments */
193         while (i < segCount) {
194             current= fSegments[i];
195             int currentMatch;
196             int k= current.indexOf(fSingleWildCard);
197             if (k < 0) {
198                 currentMatch= textPosIn(text, tCurPos, end, current);
199                 if (currentMatch < 0)
200                     return false;
201             } else {
202                 currentMatch= regExpPosIn(text, tCurPos, end, current);
203                 if (currentMatch < 0)
204                     return false;
205             }
206             tCurPos= currentMatch + current.length();
207             i++;
208         }
209
210         /* process final segment */
211         if (!fHasTrailingStar && tCurPos != end) {
212             int clen= current.length();
213             return regExpRegionMatches(text, end - clen, current, 0, clen);
214         }
215         return i == segCount ;
216     }
217     /**
218      * match the given <code>text</code> with the pattern
219      * @return true if matched eitherwise false
220      * @param text a String object
221      */

222     public boolean match(String JavaDoc text) {
223         return match(text, 0, text.length());
224     }
225     /**
226      * This method parses the given pattern into segments seperated by wildcard '*' characters.
227      * Since wildcards are not being used in this case, the pattern consists of a single segment.
228      */

229     private void parseNoWildCards() {
230         fSegments = new String JavaDoc[1];
231         fSegments[0] = fPattern;
232         fBound = fLength;
233     }
234     /**
235      * This method parses the given pattern into segments seperated by wildcard '*' characters.
236      * @param p a String object that is a simple regular expression with '*' and/or '?'
237      */

238     private void parseWildCards() {
239         if(fPattern.startsWith("*"))//$NON-NLS-1$
240
fHasLeadingStar = true;
241         if(fPattern.endsWith("*")) {//$NON-NLS-1$
242
/* make sure it's not an escaped wildcard */
243             if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
244                 fHasTrailingStar = true;
245             }
246         }
247
248         Vector JavaDoc temp = new Vector JavaDoc();
249
250         int pos = 0;
251         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
252         while (pos < fLength) {
253             char c = fPattern.charAt(pos++);
254             switch (c) {
255                 case '\\':
256                     if (pos >= fLength) {
257                         buf.append(c);
258                     } else {
259                         char next = fPattern.charAt(pos++);
260                         /* if it's an escape sequence */
261                         if (next == '*' || next == '?' || next == '\\') {
262                             buf.append(next);
263                         } else {
264                             /* not an escape sequence, just insert literally */
265                             buf.append(c);
266                             buf.append(next);
267                         }
268                     }
269                 break;
270                 case '*':
271                     if (buf.length() > 0) {
272                         /* new segment */
273                         temp.addElement(buf.toString());
274                         fBound += buf.length();
275                         buf.setLength(0);
276                     }
277                 break;
278                 case '?':
279                     /* append special character representing single match wildcard */
280                     buf.append(fSingleWildCard);
281                 break;
282                 default:
283                     buf.append(c);
284             }
285         }
286
287         /* add last buffer to segment list */
288         if (buf.length() > 0) {
289             temp.addElement(buf.toString());
290             fBound += buf.length();
291         }
292             
293         fSegments = new String JavaDoc[temp.size()];
294         temp.copyInto(fSegments);
295     }
296     /**
297      * @param text a string which contains no wildcard
298      * @param start the starting index in the text for search, inclusive
299      * @param end the stopping point of search, exclusive
300      * @return the starting index in the text of the pattern , or -1 if not found
301      */

302     protected int posIn(String JavaDoc text, int start, int end) {//no wild card in pattern
303
int max = end - fLength;
304         
305         if (!fIgnoreCase) {
306             int i = text.indexOf(fPattern, start);
307             if (i == -1 || i > max)
308                 return -1;
309             return i;
310         }
311         
312         for (int i = start; i <= max; ++i) {
313             if (text.regionMatches(true, i, fPattern, 0, fLength))
314                 return i;
315         }
316         
317         return -1;
318     }
319     /**
320      * @param text a simple regular expression that may only contain '?'(s)
321      * @param start the starting index in the text for search, inclusive
322      * @param end the stopping point of search, exclusive
323      * @param p a simple regular expression that may contains '?'
324      * @return the starting index in the text of the pattern , or -1 if not found
325      */

326     protected int regExpPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
327         int plen = p.length();
328         
329         int max = end - plen;
330         for (int i = start; i <= max; ++i) {
331             if (regExpRegionMatches(text, i, p, 0, plen))
332                 return i;
333         }
334         return -1;
335     }
336
337     /**
338      *
339      * @param text the text
340      * @param tStart the start
341      * @param p the pattern
342      * @param pStart the pattern start
343      * @param plen the pattern length
344      * @return whether the region matches
345      */

346     protected boolean regExpRegionMatches(String JavaDoc text, int tStart, String JavaDoc p, int pStart, int plen) {
347         while (plen-- > 0) {
348             char tchar = text.charAt(tStart++);
349             char pchar = p.charAt(pStart++);
350
351             /* process wild cards */
352             if (!fIgnoreWildCards) {
353                 /* skip single wild cards */
354                 if (pchar == fSingleWildCard) {
355                     continue;
356                 }
357             }
358             if (pchar == tchar)
359                 continue;
360             if (fIgnoreCase) {
361                 char tc = Character.toUpperCase(tchar);
362                 if (tc == pchar)
363                     continue;
364             }
365             return false;
366         }
367         return true;
368     }
369     /**
370      * @param text the string to match
371      * @param start the starting index in the text for search, inclusive
372      * @param end the stopping point of search, exclusive
373      * @param p a string that has no wildcard
374      * @return the starting index in the text of the pattern , or -1 if not found
375      */

376     protected int textPosIn(String JavaDoc text, int start, int end, String JavaDoc p) {
377         
378         int plen = p.length();
379         int max = end - plen;
380         
381         if (!fIgnoreCase) {
382             int i = text.indexOf(p, start);
383             if (i == -1 || i > max)
384                 return -1;
385             return i;
386         }
387         
388         for (int i = start; i <= max; ++i) {
389             if (text.regionMatches(true, i, p, 0, plen))
390                 return i;
391         }
392         
393         return -1;
394     }
395 }
396
Popular Tags