KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > rcm > util > Str


1 /*
2  * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in
14  * the documentation and/or other materials provided with the
15  * distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
18  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
21  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  */

30
31 package rcm.util;
32
33 import java.util.StringTokenizer JavaDoc;
34
35 /**
36  * String utility routines.
37  */

38 public abstract class Str {
39
40     /**
41      * Find first occurence of any of a set of characters.
42      * @param subject String in which to search
43      * @param chars Characters to search for
44      * @return index of first occurence in subject of a character from chars,
45      * or -1 if no match.
46      */

47     public static int indexOfAnyChar (String JavaDoc subject, String JavaDoc chars) {
48         return indexOfAnyChar (subject, chars, 0);
49     }
50
51     /**
52      * Find first occurence of any of a set of characters, starting
53      * at a specified index.
54      * @param subject String in which to search
55      * @param chars Characters to search for
56      * @param start Starting offset to search from
57      * @return index of first occurence (after start) in subject of a character from chars,
58      * or -1 if no match.
59      */

60     public static int indexOfAnyChar (String JavaDoc subject, String JavaDoc chars, int start) {
61         for (int i=start; i<subject.length(); ++i)
62             if (chars.indexOf (subject.charAt (i)) != -1)
63                 return i;
64         return -1;
65     }
66
67     /**
68      * Replace all occurences of a string.
69      * @param subject String in which to search
70      * @param original String to search for in subject
71      * @param replacement String to substitute
72      * @return subject with all occurences of original replaced by replacement
73      */

74     public static String JavaDoc replace (String JavaDoc subject, String JavaDoc original, String JavaDoc replacement) {
75         StringBuffer JavaDoc output = new StringBuffer JavaDoc ();
76
77         int p = 0;
78         int i;
79         while ((i = subject.indexOf (original, p)) != -1) {
80             output.append (subject.substring (p, i));
81             output.append (replacement);
82             p = i + original.length();
83         }
84         if (p < subject.length ())
85             output.append (subject.substring(p));
86         return output.toString ();
87     }
88
89     /**
90      * Escapes metacharacters in a string.
91      * @param subject String in which metacharacters are to be escaped
92      * @param escapeChar the escape character (e.g., \)
93      * @param metachars the metacharacters that should be escaped
94      * @return subject with escapeChar inserted before every character found in metachars
95      */

96     public static String JavaDoc escape (String JavaDoc subject, char escapeChar, String JavaDoc metachars) {
97         return escape (subject, metachars, escapeChar, metachars);
98     }
99
100     /**
101      * Escapes characters in a string.
102      * @param subject String in which metacharacters are to be escaped
103      * @param chars Characters that need to be escaped (e.g. "\b\t\r\n\\")
104      * @param escapeChar the escape character (e.g., '\\')
105      * @param metachars escape code letters corresponding to each letter in chars (e.g. "btrn\\")
106      * <B>Must have metachars.length () == chars.length().</B>
107      * @return subject where every occurence of c in chars is replaced
108      * by escapeChar followed the character corresponding to c in metachars.
109      *
110      */

111     public static String JavaDoc escape (String JavaDoc subject, String JavaDoc chars, char escapeChar, String JavaDoc metachars) {
112         StringBuffer JavaDoc output = new StringBuffer JavaDoc ();
113
114         int p = 0;
115         int i;
116         while ((i = indexOfAnyChar (subject, chars, p)) != -1) {
117             output.append (subject.substring (p, i));
118
119             char c = subject.charAt (i); // character that needs escaping
120
int k = chars.indexOf (c);
121             char metac = metachars.charAt (k); // its corresponding metachar
122
output.append (escapeChar);
123             output.append (metac);
124
125             p = i + 1;
126         }
127         if (p < subject.length ())
128             output.append (subject.substring(p));
129         return output.toString ();
130     }
131
132     /**
133      * Translate escape sequences (e.g. \r, \n) to characters.
134      * @param subject String in which metacharacters are to be escaped
135      * @param escapeChar the escape character (e.g., \)
136      * @param metachars letters representing escape codes (typically "btrn\\")
137      * @param chars characters corresponding to metachars (typically "\b\t\r\n\\").
138      * <B>Must have chars.length () == metachars.length().</B>
139      * @param keepUntranslatedEscapes Controls behavior on unknown escape sequences
140      * (see below).
141      * @return subject where every escapeChar followed by c in metachars
142      * is replaced by the character corresponding to c in chars. If an escape
143      * sequence is untranslatable (because escapeChar is followed by some character c
144      * not in metachars), then the escapeChar is kept if keepUntranslatedEscapes is true,
145      * otherwise the escapeChar is deleted. (The character c is always kept.)
146      *
147      */

148     public static String JavaDoc unescape (String JavaDoc subject, char escapeChar, String JavaDoc metachars, String JavaDoc chars, boolean keepUntranslatedEscapes) {
149         StringBuffer JavaDoc output = new StringBuffer JavaDoc ();
150
151         int p = 0;
152         int i;
153         int len = subject.length ();
154         while ((i = subject.indexOf (escapeChar, p)) != -1) {
155             output.append (subject.substring (p, i));
156             if (i + 1 == len)
157                 break;
158
159             char metac = subject.charAt (i+1); // metachar to replace
160
int k = metachars.indexOf (metac);
161             if (k == -1) {
162                 // untranslatable sequence
163
if (keepUntranslatedEscapes)
164                     output.append (escapeChar);
165                 output.append (metac);
166             }
167             else
168                 output.append (chars.charAt (k)); // its corresponding true char
169

170             p = i + 2; // skip over both escapeChar & metac
171
}
172
173         if (p < len)
174             output.append (subject.substring(p));
175         return output.toString ();
176     }
177
178     /**
179      * Parse a number from a string. Finds the first recognizable base-10 number (integer or floating point)
180      * in the string and returns it as a Number. Uses American English conventions
181      * (i.e., '.' as decimal point and ',' as thousands separator).
182      * @param string String to parse
183      * @return first recognizable number
184      * @exception NumberFormatException if no recognizable number is found
185      */

186     private static final int INT = 0;
187     private static final int FRAC = 1;
188     private static final int EXP = 2;
189     public static Number JavaDoc parseNumber (String JavaDoc s) throws NumberFormatException JavaDoc {
190         int p = 0;
191         for (int i=0; i<s.length(); ++i) {
192             char c = s.charAt (i);
193             if (Character.isDigit (c)) {
194                 int start = i;
195                 int end = ++i;
196                 int state = INT;
197
198                 if (start > 0 && s.charAt (start-1) == '.') {
199                     --start;
200                     state = FRAC;
201                 }
202                 if (start > 0 && s.charAt (start-1) == '-')
203                     --start;
204
205               foundEnd:
206                 while (i < s.length()) {
207                     switch (s.charAt (i)) {
208                       case '0': case '1': case '2': case '3': case '4':
209                       case '5': case '6': case '7': case '8': case '9':
210                         end = ++i;
211                         break;
212                       case '.':
213                         if (state != INT)
214                             break foundEnd;
215                         state = FRAC;
216                         ++i;
217                         break;
218                       case ',': // ignore commas
219
++i;
220                         break;
221                       case 'e':
222                       case 'E':
223                         state = EXP;
224                         ++i;
225                         if (i < s.length() &&
226                             ( (c = s.charAt (i)) == '+' || c == '-') )
227                           ++i;
228                         break;
229                       default:
230                         break foundEnd;
231                     }
232                 }
233
234                 String JavaDoc num = s.substring (start, end);
235                 num = replace (num, ",", "");
236                 try {
237                     if (state == INT)
238                         return new Integer JavaDoc (num);
239                     else
240                         return new Float JavaDoc (num);
241                 } catch (NumberFormatException JavaDoc e) {
242                     throw new RuntimeException JavaDoc ("internal error: " + e);
243                 }
244             }
245         }
246         throw new NumberFormatException JavaDoc (s);
247     }
248 /*
249     For testing parseNumber
250
251     public static void main (String[] args) {
252       for (int i=0; i<args.length; ++i)
253           System.out.println (parseNumber (args[i]));
254   }
255 */

256
257
258     /**
259      * Generate a string by concatenating n copies of another string.
260      * @param s String to repeat
261      * @param n number of times to repeat s
262      * @return s concatenated with itself n times
263      */

264     public static String JavaDoc repeat (String JavaDoc s, int n) {
265         StringBuffer JavaDoc out = new StringBuffer JavaDoc ();
266         while (--n >= 0)
267             out.append (s);
268         return out.toString ();
269     }
270
271     /**
272      * Compress whitespace.
273      * @param s String to compress
274      * @return string with leading and trailing whitespace removed, and
275      * internal runs of whitespace replaced by a single space character
276      */

277     public static String JavaDoc compressWhitespace (String JavaDoc s) {
278         StringBuffer JavaDoc output = new StringBuffer JavaDoc ();
279         int p = 0;
280         boolean inSpace = true;
281         for (int i = 0, len = s.length (); i < len; ++i) {
282             if (Character.isWhitespace (s.charAt (i))) {
283                 if (!inSpace) {
284                     output.append (s.substring (p, i));
285                     output.append (' ');
286                     inSpace = true;
287                 }
288             }
289             else {
290                 if (inSpace) {
291                     p = i;
292                     inSpace = false;
293                 }
294             }
295         }
296         if (!inSpace)
297             output.append (s.substring (p));
298         return output.toString ();
299     }
300
301     /**
302      * Test if string contains only whitespace.
303      * @param s String to test
304      * @return true iff all characters in s satisfy Character.isWhitespace().
305      * If s is empty, returns true.
306      */

307     public static boolean isWhitespace (String JavaDoc s) {
308         for (int i = 0, n = s.length (); i < n; ++i)
309             if (!Character.isWhitespace (s.charAt (i)))
310                 return false;
311         return true;
312     }
313
314     /**
315      * Concatenate an array of strings.
316      * @param list Array of strings to concatenate
317      * @param sep Separator to insert between each string
318      * @return string consisting of list[0] + sep + list[1] + sep + ... + sep + list[list.length-1]
319      */

320     public static String JavaDoc join (String JavaDoc[] list, String JavaDoc sep) {
321         StringBuffer JavaDoc result = new StringBuffer JavaDoc ();
322         for (int i=0; i < list.length; ++i) {
323             if (i > 0)
324                 result.append (sep);
325             result.append (list[i]);
326         }
327         return result.toString ();
328     }
329
330     /**
331      * Abbreviate a string.
332      * @param s String to abbreviate
333      * @param max Maximum length of returned string; must be at least 5
334      * @returns s with linebreaks removed and enough characters removed from
335      * the middle (replaced by "...") to make length &lt;= max
336      */

337     public static String JavaDoc abbreviate (String JavaDoc s, int max) {
338         s = compressWhitespace (s);
339         if (s.length() < max)
340             return s;
341         else {
342             max = Math.max (max-3, 2); // for "..."
343
int half = max/2;
344             return s.substring (0, half) + "..." + s.substring (s.length()-half);
345         }
346     }
347
348     /**
349      * Abbreviate a multi-line string.
350      * @param s String to abbreviate
351      * @param maxLines Max number of lines in returned string; must be at least 3
352      * @param message Message to replace removed lines with; should end with
353      * \n, but may be multiple lines. Occurrences of %d are replaced with
354      * the number of lines removed.
355      * @returns s with enough whole lines removed from
356      * the middle (replaced by message) to make its length in lines &lt;= max
357      */

358     public static String JavaDoc abbreviateLines (String JavaDoc s, int maxLines, String JavaDoc message) {
359         int nLines = countLines (s);
360         if (nLines < maxLines)
361             return s;
362         else {
363             maxLines = Math.max (maxLines-1, 2); // take out one line for "..."
364
int half = maxLines/2;
365             return s.substring (0, nthLine (s, half))
366                 + replace (message, "%d", String.valueOf (nLines - half*2))
367                 + s.substring (nthLine (s, -half));
368         }
369     }
370
371     static int countLines (String JavaDoc s) {
372         int n = 1;
373         int i = -1;
374         while ((i = s.indexOf ('\n', i+1)) != -1)
375             ++n;
376         return n;
377     }
378     static int nthLine (String JavaDoc s, int n) {
379         if (n >= 0) {
380             int i = -1;
381             while (n > 0 && (i = s.indexOf ('\n', i+1)) != -1)
382                 --n;
383             return i+1;
384         } else {
385             int i = s.length ();
386             while (n < 0 && (i = s.lastIndexOf ('\n', i-1)) != -1)
387                 ++n;
388             return i+1;
389         }
390     }
391
392     /**
393       * Split string around a substring match and return prefix.
394       * @param s String to split
395       * @param pat Substring to search for in s
396       * @return Prefix of s ending just before the first occurrence
397       * of pat. If pat is not found in s, returns s itself.
398       */

399     public static String JavaDoc before (String JavaDoc s, String JavaDoc pat) {
400         int i = s.indexOf (pat);
401         return (i >= 0) ? s.substring(0, i) : s;
402     }
403
404     /**
405       * Split string around a substring match and return suffix.
406       * @param s String to split
407       * @param pat Substring to search for in s
408       * @return Suffix of s starting just after the first occurrence
409       * of pat. If pat is not found in s, returns "".
410       */

411     public static String JavaDoc after (String JavaDoc s, String JavaDoc pat) {
412         int i = s.indexOf (pat);
413         return (i >= 0) ? s.substring(i + pat.length ()) : "";
414     }
415
416
417     /**
418       * Like String.startsWith, but case-insensitive.
419       */

420     public static boolean startsWithIgnoreCase (String JavaDoc s, String JavaDoc prefix) {
421         int sLen = s.length ();
422         int prefixLen = prefix.length ();
423         return (sLen >= prefixLen
424                 && s.substring (0, prefixLen).equalsIgnoreCase (prefix));
425     }
426
427     /**
428       * Like String.endsWith, but case-insensitive.
429       */

430     public static boolean endsWithIgnoreCase (String JavaDoc s, String JavaDoc suffix) {
431         int sLen = s.length ();
432         int suffixLen = suffix.length ();
433         return (sLen >= suffixLen
434                 && s.substring (sLen - suffixLen).equalsIgnoreCase (suffix));
435     }
436
437     /**
438       * Expands tabs to spaces.
439       */

440     public static String JavaDoc untabify (String JavaDoc s, int tabsize) {
441         if (s.indexOf ('\t') == -1)
442             return s; // no tabs, don't bother
443

444         int col = 0;
445         StringBuffer JavaDoc result = new StringBuffer JavaDoc ();
446         for (StringTokenizer JavaDoc tokenizer = new StringTokenizer JavaDoc (s, "\t\r\n", true);
447              tokenizer.hasMoreTokens (); ) {
448             String JavaDoc tok = tokenizer.nextToken ();
449             switch (tok.charAt (0)) {
450             case '\t':
451                 {
452                     int oldcol = col;
453                     col = (col/tabsize + 1) * tabsize;
454                     result.append (Str.repeat (" ", col - oldcol));
455                 }
456                 break;
457             case '\r':
458             case '\n':
459                 col = 0;
460                 result.append (tok);
461                 break;
462             default:
463                 col += tok.length ();
464                 result.append (tok);
465                 break;
466             }
467         }
468
469         return result.toString ();
470     }
471
472     /**
473      * Reverse a string.
474      * @param s String to reverse
475      * @return string containing characters of s in reverse order
476      */

477     public static String JavaDoc reverse (String JavaDoc s) {
478         StringBuffer JavaDoc t = new StringBuffer JavaDoc (s.length ());
479         for (int i = s.length () - 1; i >= 0; --i)
480             t.append (s.charAt(i));
481         return t.toString ();
482     }
483
484     /**
485      * Find longest common prefix of two strings.
486      */

487     public static String JavaDoc longestCommonPrefix (String JavaDoc s, String JavaDoc t) {
488         return s.substring (0, longestCommonPrefixLength (s, t));
489     }
490
491     public static int longestCommonPrefixLength (String JavaDoc s, String JavaDoc t) {
492         int m = Math.min (s.length (), t.length());
493         for (int k = 0; k < m; ++k)
494             if (s.charAt (k) != t.charAt (k))
495                 return k;
496         return m;
497     }
498
499     /**
500      * Find longest common suffix of two strings.
501      */

502     public static String JavaDoc longestCommonSuffix (String JavaDoc s, String JavaDoc t) {
503         return s.substring (s.length () - longestCommonSuffixLength (s, t));
504     }
505
506     public static int longestCommonSuffixLength (String JavaDoc s, String JavaDoc t) {
507         int i = s.length ()-1;
508         int j = t.length ()-1;
509         for (; i >= 0 && j >= 0; --i, --j)
510             if (s.charAt (i) != t.charAt (j))
511                 return s.length () - (i+1);
512         return s.length () - (i+1);
513     }
514
515
516
517
518     /**
519      * Find longest common prefix of two strings, ignoring case.
520      */

521     public static String JavaDoc longestCommonPrefixIgnoreCase (String JavaDoc s, String JavaDoc t) {
522         return s.substring (0, longestCommonPrefixLengthIgnoreCase (s, t));
523     }
524
525     public static int longestCommonPrefixLengthIgnoreCase (String JavaDoc s, String JavaDoc t) {
526         int m = Math.min (s.length (), t.length());
527         for (int k = 0; k < m; ++k)
528             if (Character.toLowerCase (s.charAt (k)) != Character.toLowerCase (t.charAt (k)))
529                 return k;
530         return m;
531     }
532
533     /**
534      * Find longest common suffix of two strings, ignoring case.
535      */

536     public static String JavaDoc longestCommonSuffixIgnoreCase (String JavaDoc s, String JavaDoc t) {
537         return s.substring (s.length () - longestCommonSuffixLengthIgnoreCase (s, t));
538     }
539
540     public static int longestCommonSuffixLengthIgnoreCase (String JavaDoc s, String JavaDoc t) {
541         int i = s.length ()-1;
542         int j = t.length ()-1;
543         for (; i >= 0 && j >= 0; --i, --j)
544             if (Character.toLowerCase (s.charAt (i)) != Character.toLowerCase (t.charAt (j)))
545                 return s.length () - (i+1);
546         return s.length () - (i+1);
547     }
548 }
549
Popular Tags