KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > Yasna > util > StringUtils


1 /**
2  * $RCSfile: StringUtils.java,v $
3  * $Revision: 1.3 $
4  * $Date: 2006/01/07 00:21:06 $
5  *
6  * Copyright (C) 2000 CoolServlets.com. All rights reserved.
7  *
8  * ===================================================================
9  * The Apache Software License, Version 1.1
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in
20  * the documentation and/or other materials provided with the
21  * distribution.
22  *
23  * 3. The end-user documentation included with the redistribution,
24  * if any, must include the following acknowledgment:
25  * "This product includes software developed by
26  * CoolServlets.com (http://www.Yasna.com)."
27  * Alternately, this acknowledgment may appear in the software itself,
28  * if and wherever such third-party acknowledgments normally appear.
29  *
30  * 4. The names "Jive" and "CoolServlets.com" must not be used to
31  * endorse or promote products derived from this software without
32  * prior written permission. For written permission, please
33  * contact webmaster@Yasna.com.
34  *
35  * 5. Products derived from this software may not be called "Jive",
36  * nor may "Jive" appear in their name, without prior written
37  * permission of CoolServlets.com.
38  *
39  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
41  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
42  * DISCLAIMED. IN NO EVENT SHALL COOLSERVLETS.COM OR
43  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
45  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
46  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
47  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
49  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  * ====================================================================
52  *
53  * This software consists of voluntary contributions made by many
54  * individuals on behalf of CoolServlets.com. For more information
55  * on CoolServlets.com, please see <http://www.Yasna.com>.
56  */

57
58 package com.Yasna.util;
59
60 import java.security.*;
61 import java.text.*;
62 import java.util.*;
63
64 /**
65  * Utility class to peform common String manipulation algorithms.
66  */

67 public class StringUtils {
68
69     /**
70      * Initialization lock for the whole class. Init's only happen once per
71      * class load so this shouldn't be a bottleneck.
72      */

73     private static Object JavaDoc initLock = new Object JavaDoc();
74
75     /**
76      * Replaces all instances of oldString with newString in line.
77      *
78      * @param line the String to search to perform replacements on
79      * @param oldString the String that should be replaced by newString
80      * @param newString the String that will replace all instances of oldString
81      *
82      * @return a String will all instances of oldString replaced by newString
83      */

84     public static final String JavaDoc replace( String JavaDoc line, String JavaDoc oldString, String JavaDoc newString )
85     {
86         if (line == null) {
87             return null;
88         }
89         int i=0;
90         if ( ( i=line.indexOf( oldString, i ) ) >= 0 ) {
91             char [] line2 = line.toCharArray();
92             char [] newString2 = newString.toCharArray();
93             int oLength = oldString.length();
94             StringBuffer JavaDoc buf = new StringBuffer JavaDoc(line2.length);
95             buf.append(line2, 0, i).append(newString2);
96             i += oLength;
97             int j = i;
98             while( ( i=line.indexOf( oldString, i ) ) > 0 ) {
99                 buf.append(line2, j, i-j).append(newString2);
100                 i += oLength;
101                 j = i;
102             }
103             buf.append(line2, j, line2.length - j);
104             return buf.toString();
105         }
106         return line;
107     }
108
109     /**
110      * Replaces all instances of oldString with newString in line with the
111      * added feature that matches of newString in oldString ignore case.
112      *
113      * @param line the String to search to perform replacements on
114      * @param oldString the String that should be replaced by newString
115      * @param newString the String that will replace all instances of oldString
116      *
117      * @return a String will all instances of oldString replaced by newString
118      */

119     public static final String JavaDoc replaceIgnoreCase(String JavaDoc line, String JavaDoc oldString,
120             String JavaDoc newString)
121     {
122         if (line == null) {
123             return null;
124         }
125         String JavaDoc lcLine = line.toLowerCase();
126         String JavaDoc lcOldString = oldString.toLowerCase();
127         int i=0;
128         if ( ( i=lcLine.indexOf( lcOldString, i ) ) >= 0 ) {
129             char [] line2 = line.toCharArray();
130             char [] newString2 = newString.toCharArray();
131             int oLength = oldString.length();
132             StringBuffer JavaDoc buf = new StringBuffer JavaDoc(line2.length);
133             buf.append(line2, 0, i).append(newString2);
134             i += oLength;
135             int j = i;
136             while( ( i=lcLine.indexOf( lcOldString, i ) ) > 0 ) {
137                 buf.append(line2, j, i-j).append(newString2);
138                 i += oLength;
139                 j = i;
140             }
141             buf.append(line2, j, line2.length - j);
142             return buf.toString();
143         }
144         return line;
145     }
146
147    /**
148     * Replaces all instances of oldString with newString in line.
149     * The count Integer is updated with number of replaces.
150     *
151     * @param line the String to search to perform replacements on
152     * @param oldString the String that should be replaced by newString
153     * @param newString the String that will replace all instances of oldString
154     *
155     * @return a String will all instances of oldString replaced by newString
156     */

157     public static final String JavaDoc replace( String JavaDoc line, String JavaDoc oldString,
158             String JavaDoc newString, int[] count)
159     {
160         if (line == null) {
161             return null;
162         }
163         int i=0;
164         if ( ( i=line.indexOf( oldString, i ) ) >= 0 ) {
165             int counter = 0;
166             counter++;
167             char [] line2 = line.toCharArray();
168             char [] newString2 = newString.toCharArray();
169             int oLength = oldString.length();
170             StringBuffer JavaDoc buf = new StringBuffer JavaDoc(line2.length);
171             buf.append(line2, 0, i).append(newString2);
172             i += oLength;
173             int j = i;
174             while( ( i=line.indexOf( oldString, i ) ) > 0 ) {
175                 counter++;
176                 buf.append(line2, j, i-j).append(newString2);
177                 i += oLength;
178                 j = i;
179             }
180             buf.append(line2, j, line2.length - j);
181             count[0] = counter;
182             return buf.toString();
183         }
184         return line;
185     }
186
187     /**
188      * This method takes a string which may contain HTML tags (ie, &lt;b&gt;,
189      * &lt;table&gt;, etc) and converts the '&lt'' and '&gt;' characters to
190      * their HTML escape sequences.
191      *
192      * @param input the text to be converted.
193      * @return the input string with the characters '&lt;' and '&gt;' replaced
194      * with their HTML escape sequences.
195      */

196     public static final String JavaDoc escapeHTMLTags( String JavaDoc input ) {
197         //Check if the string is null or zero length -- if so, return
198
//what was sent in.
199
if( input == null || input.length() == 0 ) {
200             return input;
201         }
202         //Use a StringBuffer in lieu of String concatenation -- it is
203
//much more efficient this way.
204
StringBuffer JavaDoc buf = new StringBuffer JavaDoc(input.length());
205         char ch = ' ';
206         for( int i=0; i<input.length(); i++ ) {
207             ch = input.charAt(i);
208             if( ch == '<' ) {
209                 buf.append("&lt;");
210             }
211             else if( ch == '>' ) {
212                 buf.append("&gt;");
213             }
214             else {
215                 buf.append( ch );
216             }
217         }
218         return buf.toString();
219     }
220
221     /**
222      * Used by the hash method.
223      */

224     private static MessageDigest digest = null;
225
226     /**
227      * Hashes a String using the Md5 algorithm and returns the result as a
228      * String of hexadecimal numbers. This method is synchronized to avoid
229      * excessive MessageDigest object creation. If calling this method becomes
230      * a bottleneck in your code, you may wish to maintain a pool of
231      * MessageDigest objects instead of using this method.
232      * <p>
233      * A hash is a one-way function -- that is, given an
234      * input, an output is easily computed. However, given the output, the
235      * input is almost impossible to compute. This is useful for passwords
236      * since we can store the hash and a hacker will then have a very hard time
237      * determining the original password.
238      * <p>
239      * In Jive, every time a user logs in, we simply
240      * take their plain text password, compute the hash, and compare the
241      * generated hash to the stored hash. Since it is almost impossible that
242      * two passwords will generate the same hash, we know if the user gave us
243      * the correct password or not. The only negative to this system is that
244      * password recovery is basically impossible. Therefore, a reset password
245      * method is used instead.
246      *
247      * @param data the String to compute the hash of.
248      * @return a hashed version of the passed-in String
249      */

250     public synchronized static final String JavaDoc hash(String JavaDoc data) {
251         if (digest == null) {
252             try {
253                 digest = MessageDigest.getInstance("MD5");
254             }
255             catch (NoSuchAlgorithmException nsae) {
256                 System.err.println("Failed to load the MD5 MessageDigest. " +
257                 "Jive will be unable to function normally.");
258                 nsae.printStackTrace();
259             }
260         }
261         //Now, compute hash.
262
digest.update(data.getBytes());
263         return toHex(digest.digest());
264     }
265
266     /**
267      * Turns an array of bytes into a String representing each byte as an
268      * unsigned hex number.
269      * <p>
270      * Method by Santeri Paavolainen, Helsinki Finland 1996<br>
271      * (c) Santeri Paavolainen, Helsinki Finland 1996<br>
272      * Distributed under LGPL.
273      *
274      * @param hash an rray of bytes to convert to a hex-string
275      * @return generated hex string
276      */

277     public static final String JavaDoc toHex (byte hash[]) {
278         StringBuffer JavaDoc buf = new StringBuffer JavaDoc(hash.length * 2);
279         int i;
280
281         for (i = 0; i < hash.length; i++) {
282             if (((int) hash[i] & 0xff) < 0x10) {
283                 buf.append("0");
284             }
285             buf.append(Long.toString((int) hash[i] & 0xff, 16));
286         }
287         return buf.toString();
288     }
289
290     /**
291      * Converts a line of text into an array of lower case words. Words are
292      * delimited by the following characters: , .\r\n:/\+
293      * <p>
294      * In the future, this method should be changed to use a
295      * BreakIterator.wordInstance(). That class offers much more fexibility.
296      *
297      * @param text a String of text to convert into an array of words
298      * @return text broken up into an array of words.
299      */

300     public static final String JavaDoc [] toLowerCaseWordArray(String JavaDoc text) {
301         if (text == null || text.length() == 0) {
302                 return new String JavaDoc[0];
303         }
304         StringTokenizer tokens = new StringTokenizer(text, " ,\r\n.:/\\+");
305         String JavaDoc [] words = new String JavaDoc[tokens.countTokens()];
306         for (int i=0; i<words.length; i++) {
307             words[i] = tokens.nextToken().toLowerCase();
308         }
309         return words;
310     }
311
312     /**
313      * A list of some of the most common words. For searching and indexing, we
314      * often want to filter out these words since they just confuse searches.
315      * The list was not created scientifically so may be incomplete :)
316      */

317     private static final String JavaDoc [] commonWords = new String JavaDoc [] {
318         "a", "and", "as", "at", "be", "do", "i", "if", "in", "is", "it", "so",
319         "the", "to"
320     };
321     private static Map commonWordsMap = null;
322
323     /**
324      * Returns a new String array with some of the most common English words
325      * removed. The specific words removed are: a, and, as, at, be, do, i, if,
326      * in, is, it, so, the, to
327      */

328     public static final String JavaDoc [] removeCommonWords(String JavaDoc [] words) {
329         //See if common words map has been initialized. We don't statically
330
//initialize it to save some memory. Even though this a small savings,
331
//it adds up with hundreds of classes being loaded.
332
if (commonWordsMap == null) {
333             synchronized(initLock) {
334                 if (commonWordsMap == null) {
335                     commonWordsMap = new HashMap();
336                     for (int i=0; i<commonWords.length; i++) {
337                         commonWordsMap.put(commonWords[i], commonWords[i]);
338                     }
339                 }
340             }
341         }
342         //Now, add all words that aren't in the common map to results
343
ArrayList results = new ArrayList(words.length);
344         for (int i=0; i<words.length; i++) {
345             if (!commonWordsMap.containsKey(words[i])) {
346                 results.add(words[i]);
347             }
348         }
349         return (String JavaDoc[])results.toArray(new String JavaDoc[results.size()]);
350     }
351
352     /**
353      * Pseudo-random number generator object for use with randomString().
354      * The Random class is not considered to be cryptographically secure, so
355      * only use these random Strings for low to medium security applications.
356      */

357     private static Random randGen = null;
358
359     /**
360      * Array of numbers and letters of mixed case. Numbers appear in the list
361      * twice so that there is a more equal chance that a number will be picked.
362      * We can use the array to get a random number or letter by picking a random
363      * array index.
364      */

365     private static char[] numbersAndLetters = null;
366
367     /**
368      * Returns a random String of numbers and letters of the specified length.
369      * The method uses the Random class that is built-in to Java which is
370      * suitable for low to medium grade security uses. This means that the
371      * output is only pseudo random, i.e., each number is mathematically
372      * generated so is not truly random.<p>
373      *
374      * For every character in the returned String, there is an equal chance that
375      * it will be a letter or number. If a letter, there is an equal chance
376      * that it will be lower or upper case.<p>
377      *
378      * The specified length must be at least one. If not, the method will return
379      * null.
380      *
381      * @param length the desired length of the random String to return.
382      * @return a random String of numbers and letters of the specified length.
383      */

384     public static final String JavaDoc randomString(int length) {
385         if (length < 1) {
386             return null;
387         }
388         //Init of pseudo random number generator.
389
if (randGen == null) {
390             synchronized (initLock) {
391                 if (randGen == null) {
392                     randGen = new Random();
393                     //Also initialize the numbersAndLetters array
394
numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +
395                     "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
396                 }
397             }
398         }
399         //Create a char buffer to put random letters and numbers in.
400
char [] randBuffer = new char[length];
401         for (int i=0; i<randBuffer.length; i++) {
402             randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
403         }
404         return new String JavaDoc(randBuffer);
405     }
406
407    /**
408     * Intelligently chops a String at a word boundary (whitespace) that occurs
409     * at the specified index in the argument or before. However, if there is a
410     * newline character before <code>length</code>, the String will be chopped
411     * there. If no newline or whitespace is found in <code>string</code> up to
412     * the index <code>length</code>, the String will chopped at <code>length</code>.
413     * <p>
414     * For example, chopAtWord("This is a nice String", 10) will return
415     * "This is a" which is the first word boundary less than or equal to 10
416     * characters into the original String.
417     *
418     * @param string the String to chop.
419     * @param length the index in <code>string</code> to start looking for a
420     * whitespace boundary at.
421     * @return a substring of <code>string</code> whose length is less than or
422     * equal to <code>length</code>, and that is chopped at whitespace.
423     */

424     public static final String JavaDoc chopAtWord(String JavaDoc string, int length) {
425         if (string == null) {
426             return string;
427         }
428
429         char [] charArray = string.toCharArray();
430         int sLength = string.length();
431         if (length < sLength) {
432             sLength = length;
433         }
434
435         //First check if there is a newline character before length; if so,
436
//chop word there.
437
for (int i=0; i<sLength-1; i++) {
438             //Windows
439
if (charArray[i] == '\r' && charArray[i+1] == '\n') {
440                 return string.substring(0, i);
441             }
442             //Unix
443
else if (charArray[i] == '\n') {
444                 return string.substring(0, i);
445             }
446         }
447         //Also check boundary case of Unix newline
448
if (charArray[sLength-1] == '\n') {
449             return string.substring(0, sLength-1);
450         }
451
452         //Done checking for newline, now see if the total string is less than
453
//the specified chop point.
454
if (string.length() < length) {
455             return string;
456         }
457
458         //No newline, so chop at the first whitespace.
459
for (int i = length-1; i > 0; i--) {
460             if (charArray[i] == ' ') {
461                 return string.substring(0, i).trim();
462             }
463         }
464
465         //Did not find word boundary so return original String chopped at
466
//specified length.
467
return string.substring(0, length);
468     }
469
470     /**
471      * Highlights words in a string. Words matching ignores case. The actual
472      * higlighting method is specified with the start and end higlight tags.
473      * Those might be beginning and ending HTML bold tags, or anything else.
474      *
475      * @param string the String to highlight words in.
476      * @param words an array of words that should be highlighted in the string.
477      * @param startHighlight the tag that should be inserted to start highlighting.
478      * @param endHighlight the tag that should be inserted to end highlighting.
479      * @return a new String with the specified words highlighted.
480      */

481     public static final String JavaDoc highlightWords(String JavaDoc string, String JavaDoc[] words,
482         String JavaDoc startHighlight, String JavaDoc endHighlight)
483     {
484         if (string == null || words == null ||
485                 startHighlight == null || endHighlight == null)
486         {
487             return null;
488         }
489
490         //Iterate through each word.
491
for (int x=0; x<words.length; x++) {
492             //we want to ignore case.
493
String JavaDoc lcString = string.toLowerCase();
494             //using a char [] is more efficient
495
char [] string2 = string.toCharArray();
496             String JavaDoc word = words[x].toLowerCase();
497
498             //perform specialized replace logic
499
int i=0;
500             if ( ( i=lcString.indexOf( word, i ) ) >= 0 ) {
501                 int oLength = word.length();
502                 StringBuffer JavaDoc buf = new StringBuffer JavaDoc(string2.length);
503
504                 //we only want to highlight distinct words and not parts of
505
//larger words. The method used below mostly solves this. There
506
//are a few cases where it doesn't, but it's close enough.
507
boolean startSpace = false;
508                 char startChar = ' ';
509                 if (i-1 > 0) {
510                     startChar = string2[i-1];
511                     if (!Character.isLetter(startChar)) {
512                         startSpace = true;
513                     }
514                 }
515                 boolean endSpace = false;
516                 char endChar = ' ';
517                 if (i+oLength<string2.length) {
518                     endChar = string2[i+oLength];
519                     if (!Character.isLetter(endChar)) {
520                         endSpace = true;
521                     }
522                 }
523                 if ((startSpace && endSpace) || (i==0 && endSpace)) {
524                     buf.append(string2, 0, i);
525                     if (startSpace && startChar==' ') { buf.append(startChar); }
526                     buf.append(startHighlight);
527                     buf.append(string2, i, oLength).append(endHighlight);
528                     if (endSpace && endChar==' ') { buf.append(endChar); }
529                 }
530                 else {
531                     buf.append(string2, 0, i);
532                     buf.append(string2, i, oLength);
533                 }
534
535                 i += oLength;
536                 int j = i;
537                 while( ( i=lcString.indexOf( word, i ) ) > 0 ) {
538                     startSpace = false;
539                     startChar = string2[i-1];
540                     if (!Character.isLetter(startChar)) {
541                         startSpace = true;
542                     }
543
544                     endSpace = false;
545                     if (i+oLength<string2.length) {
546                         endChar = string2[i+oLength];
547                         if (!Character.isLetter(endChar)) {
548                             endSpace = true;
549                         }
550                     }
551                     if ((startSpace && endSpace) || i+oLength==string2.length) {
552                         buf.append(string2, j, i-j);
553                         if (startSpace && startChar==' ') { buf.append(startChar); }
554                         buf.append(startHighlight);
555                         buf.append(string2, i, oLength).append(endHighlight);
556                         if (endSpace && endChar==' ') { buf.append(endChar); }
557                     }
558                     else {
559                         buf.append(string2, j, i-j);
560                         buf.append(string2, i, oLength);
561                     }
562                     i += oLength;
563                     j = i;
564                 }
565                 buf.append(string2, j, string2.length - j);
566                 string = buf.toString();
567             }
568         }
569         return string;
570     }
571
572     /**
573      * Escapes all necessary characters in the String so that it can be used
574      * in an XML doc.
575      *
576      * @param string the string to escape.
577      * @return the string with appropriate characters escaped.
578      */

579     public static final String JavaDoc escapeForXML(String JavaDoc string) {
580         //Check if the string is null or zero length -- if so, return
581
//what was sent in.
582
if (string == null || string.length() == 0 ) {
583             return string;
584         }
585         char [] sArray = string.toCharArray();
586         StringBuffer JavaDoc buf = new StringBuffer JavaDoc(sArray.length);
587         char ch;
588         for (int i=0; i<sArray.length; i++) {
589             ch = sArray[i];
590             if(ch == '<') {
591                 buf.append("&lt;");
592             }
593             else if (ch == '&') {
594                 buf.append("&amp;");
595             }
596             else if (ch == '"') {
597                 buf.append("&quot;");
598             }
599             else {
600                 buf.append(ch);
601             }
602         }
603         return buf.toString();
604     }
605
606 }
607
Popular Tags