KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > nemesis > forum > util > StringUtils


1 /*
2  * NEMESIS-FORUM.
3  * Copyright (C) 2002 David Laurent(lithium2@free.fr). All rights reserved.
4  *
5  * Copyright (c) 2000 The Apache Software Foundation. All rights reserved.
6  *
7  * Copyright (C) 2001 Yasna.com. All rights reserved.
8  *
9  * Copyright (C) 2000 CoolServlets.com. All rights reserved.
10  *
11  * NEMESIS-FORUM. is free software; you can redistribute it and/or
12  * modify it under the terms of the Apache Software License, Version 1.1,
13  * or (at your option) any later version.
14  *
15  * NEMESIS-FORUM core framework, NEMESIS-FORUM backoffice, NEMESIS-FORUM frontoffice
16  * application are parts of NEMESIS-FORUM and are distributed under
17  * same terms of licence.
18  *
19  *
20  * NEMESIS-FORUM includes software developed by the Apache Software Foundation (http://www.apache.org/)
21  * and software developed by CoolServlets.com (http://www.coolservlets.com).
22  * and software developed by Yasna.com (http://www.yasna.com).
23  *
24  */

25
26
27 package org.nemesis.forum.util;
28
29 import java.security.MessageDigest JavaDoc;
30 import java.security.NoSuchAlgorithmException JavaDoc;
31 import java.util.ArrayList JavaDoc;
32 import java.util.HashMap JavaDoc;
33 import java.util.Map JavaDoc;
34 import java.util.Random JavaDoc;
35 import java.util.StringTokenizer JavaDoc;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 /**
40  * Utility class to peform common String manipulation algorithms.
41  */

42 public class StringUtils {
43     static protected Log log = LogFactory.getLog(StringUtils.class);
44     /**
45      * Initialization lock for the whole class. Init's only happen once per
46      * class load so this shouldn't be a bottleneck.
47      */

48     private static Object JavaDoc initLock = new Object JavaDoc();
49
50     /**
51      * Replaces all instances of oldString with newString in line.
52      *
53      * @param line the String to search to perform replacements on
54      * @param oldString the String that should be replaced by newString
55      * @param newString the String that will replace all instances of oldString
56      *
57      * @return a String will all instances of oldString replaced by newString
58      */

59     public static final String JavaDoc replace( String JavaDoc line, String JavaDoc oldString, String JavaDoc newString )
60     {
61         if (line == null) {
62             return null;
63         }
64         int i=0;
65         if ( ( i=line.indexOf( oldString, i ) ) >= 0 ) {
66             char [] line2 = line.toCharArray();
67             char [] newString2 = newString.toCharArray();
68             int oLength = oldString.length();
69             StringBuffer JavaDoc buf = new StringBuffer JavaDoc(line2.length);
70             buf.append(line2, 0, i).append(newString2);
71             i += oLength;
72             int j = i;
73             while( ( i=line.indexOf( oldString, i ) ) > 0 ) {
74                 buf.append(line2, j, i-j).append(newString2);
75                 i += oLength;
76                 j = i;
77             }
78             buf.append(line2, j, line2.length - j);
79             return buf.toString();
80         }
81         return line;
82     }
83
84     /**
85      * Replaces all instances of oldString with newString in line with the
86      * added feature that matches of newString in oldString ignore case.
87      *
88      * @param line the String to search to perform replacements on
89      * @param oldString the String that should be replaced by newString
90      * @param newString the String that will replace all instances of oldString
91      *
92      * @return a String will all instances of oldString replaced by newString
93      */

94     public static final String JavaDoc replaceIgnoreCase(String JavaDoc line, String JavaDoc oldString,
95             String JavaDoc newString)
96     {
97         if (line == null) {
98             return null;
99         }
100         String JavaDoc lcLine = line.toLowerCase();
101         String JavaDoc lcOldString = oldString.toLowerCase();
102         int i=0;
103         if ( ( i=lcLine.indexOf( lcOldString, i ) ) >= 0 ) {
104             char [] line2 = line.toCharArray();
105             char [] newString2 = newString.toCharArray();
106             int oLength = oldString.length();
107             StringBuffer JavaDoc buf = new StringBuffer JavaDoc(line2.length);
108             buf.append(line2, 0, i).append(newString2);
109             i += oLength;
110             int j = i;
111             while( ( i=lcLine.indexOf( lcOldString, i ) ) > 0 ) {
112                 buf.append(line2, j, i-j).append(newString2);
113                 i += oLength;
114                 j = i;
115             }
116             buf.append(line2, j, line2.length - j);
117             return buf.toString();
118         }
119         return line;
120     }
121
122    /**
123     * Replaces all instances of oldString with newString in line.
124     * The count Integer is updated with number of replaces.
125     *
126     * @param line the String to search to perform replacements on
127     * @param oldString the String that should be replaced by newString
128     * @param newString the String that will replace all instances of oldString
129     *
130     * @return a String will all instances of oldString replaced by newString
131     */

132     public static final String JavaDoc replace( String JavaDoc line, String JavaDoc oldString,
133             String JavaDoc newString, int[] count)
134     {
135         if (line == null) {
136             return null;
137         }
138         int i=0;
139         if ( ( i=line.indexOf( oldString, i ) ) >= 0 ) {
140             int counter = 0;
141             counter++;
142             char [] line2 = line.toCharArray();
143             char [] newString2 = newString.toCharArray();
144             int oLength = oldString.length();
145             StringBuffer JavaDoc buf = new StringBuffer JavaDoc(line2.length);
146             buf.append(line2, 0, i).append(newString2);
147             i += oLength;
148             int j = i;
149             while( ( i=line.indexOf( oldString, i ) ) > 0 ) {
150                 counter++;
151                 buf.append(line2, j, i-j).append(newString2);
152                 i += oLength;
153                 j = i;
154             }
155             buf.append(line2, j, line2.length - j);
156             count[0] = counter;
157             return buf.toString();
158         }
159         return line;
160     }
161
162     /**
163      * This method takes a string which may contain HTML tags (ie, <b>,
164      * <table>, etc) and converts the '&lt'' and '>' characters to
165      * their HTML escape sequences.
166      *
167      * @param input the text to be converted.
168      * @return the input string with the characters '<' and '>' replaced
169      * with their HTML escape sequences.
170      */

171     public static final String JavaDoc escapeHTMLTags( String JavaDoc input ) {
172         //Check if the string is null or zero length -- if so, return
173
//what was sent in.
174
if( input == null || input.length() == 0 ) {
175             return input;
176         }
177         //Use a StringBuffer in lieu of String concatenation -- it is
178
//much more efficient this way.
179
StringBuffer JavaDoc buf = new StringBuffer JavaDoc(input.length());
180         char ch = ' ';
181         for( int i=0; i<input.length(); i++ ) {
182             ch = input.charAt(i);
183             if( ch == '<' ) {
184                 buf.append("&lt;");
185             }
186             else if( ch == '>' ) {
187                 buf.append("&gt;");
188             }
189             else {
190                 buf.append( ch );
191             }
192         }
193         return buf.toString();
194     }
195
196     /**
197      * Used by the hash method.
198      */

199     private static MessageDigest JavaDoc digest = null;
200
201     /**
202      * Hashes a String using the Md5 algorithm and returns the result as a
203      * String of hexadecimal numbers. This method is synchronized to avoid
204      * excessive MessageDigest object creation. If calling this method becomes
205      * a bottleneck in your code, you may wish to maintain a pool of
206      * MessageDigest objects instead of using this method.
207      * <p>
208      * A hash is a one-way function -- that is, given an
209      * input, an output is easily computed. However, given the output, the
210      * input is almost impossible to compute. This is useful for passwords
211      * since we can store the hash and a hacker will then have a very hard time
212      * determining the original password.
213      * <p>
214      * every time a user logs in, we simply
215      * take their plain text password, compute the hash, and compare the
216      * generated hash to the stored hash. Since it is almost impossible that
217      * two passwords will generate the same hash, we know if the user gave us
218      * the correct password or not. The only negative to this system is that
219      * password recovery is basically impossible. Therefore, a reset password
220      * method is used instead.
221      *
222      * @param data the String to compute the hash of.
223      * @return a hashed version of the passed-in String
224      */

225     public synchronized static final String JavaDoc hash(String JavaDoc data) {
226         if (digest == null) {
227             try {
228                 digest = MessageDigest.getInstance("MD5");
229             }
230             catch (NoSuchAlgorithmException JavaDoc nsae) {
231                 log.error("Failed to load the MD5 MessageDigest. " +
232                 "will be unable to function normally.",nsae);
233                 
234             }
235         }
236         //Now, compute hash.
237
digest.update(data.getBytes());
238         return toHex(digest.digest());
239     }
240
241     /**
242      * Turns an array of bytes into a String representing each byte as an
243      * unsigned hex number.
244      * <p>
245      * Method by Santeri Paavolainen, Helsinki Finland 1996<br>
246      * (c) Santeri Paavolainen, Helsinki Finland 1996<br>
247      * Distributed under LGPL.
248      *
249      * @param hash an rray of bytes to convert to a hex-string
250      * @return generated hex string
251      */

252     public static final String JavaDoc toHex (byte hash[]) {
253         StringBuffer JavaDoc buf = new StringBuffer JavaDoc(hash.length * 2);
254         int i;
255
256         for (i = 0; i < hash.length; i++) {
257             if (((int) hash[i] & 0xff) < 0x10) {
258                 buf.append("0");
259             }
260             buf.append(Long.toString((int) hash[i] & 0xff, 16));
261         }
262         return buf.toString();
263     }
264
265     /**
266      * Converts a line of text into an array of lower case words. Words are
267      * delimited by the following characters: , .\r\n:/\+
268      * <p>
269      * In the future, this method should be changed to use a
270      * BreakIterator.wordInstance(). That class offers much more fexibility.
271      *
272      * @param text a String of text to convert into an array of words
273      * @return text broken up into an array of words.
274      */

275     public static final String JavaDoc [] toLowerCaseWordArray(String JavaDoc text) {
276         if (text == null || text.length() == 0) {
277                 return new String JavaDoc[0];
278         }
279         StringTokenizer JavaDoc tokens = new StringTokenizer JavaDoc(text, " ,\r\n.:/\\+");
280         String JavaDoc [] words = new String JavaDoc[tokens.countTokens()];
281         for (int i=0; i<words.length; i++) {
282             words[i] = tokens.nextToken().toLowerCase();
283         }
284         return words;
285     }
286
287     /**
288      * A list of some of the most common words. For searching and indexing, we
289      * often want to filter out these words since they just confuse searches.
290      * The list was not created scientifically so may be incomplete :)
291      */

292     private static final String JavaDoc [] commonWords = new String JavaDoc [] {
293         "a", "and", "as", "at", "be", "do", "i", "if", "in", "is", "it", "so",
294         "the", "to"
295     };
296     private static Map JavaDoc commonWordsMap = null;
297
298     /**
299      * Returns a new String array with some of the most common English words
300      * removed. The specific words removed are: a, and, as, at, be, do, i, if,
301      * in, is, it, so, the, to
302      */

303     public static final String JavaDoc [] removeCommonWords(String JavaDoc [] words) {
304         //See if common words map has been initialized. We don't statically
305
//initialize it to save some memory. Even though this a small savings,
306
//it adds up with hundreds of classes being loaded.
307
if (commonWordsMap == null) {
308             synchronized(initLock) {
309                 if (commonWordsMap == null) {
310                     commonWordsMap = new HashMap JavaDoc();
311                     for (int i=0; i<commonWords.length; i++) {
312                         commonWordsMap.put(commonWords[i], commonWords[i]);
313                     }
314                 }
315             }
316         }
317         //Now, add all words that aren't in the common map to results
318
ArrayList JavaDoc results = new ArrayList JavaDoc(words.length);
319         for (int i=0; i<words.length; i++) {
320             if (!commonWordsMap.containsKey(words[i])) {
321                 results.add(words[i]);
322             }
323         }
324         return (String JavaDoc[])results.toArray(new String JavaDoc[results.size()]);
325     }
326
327     /**
328      * Pseudo-random number generator object for use with randomString().
329      * The Random class is not considered to be cryptographically secure, so
330      * only use these random Strings for low to medium security applications.
331      */

332     private static Random JavaDoc randGen = null;
333
334     /**
335      * Array of numbers and letters of mixed case. Numbers appear in the list
336      * twice so that there is a more equal chance that a number will be picked.
337      * We can use the array to get a random number or letter by picking a random
338      * array index.
339      */

340     private static char[] numbersAndLetters = null;
341
342     /**
343      * Returns a random String of numbers and letters of the specified length.
344      * The method uses the Random class that is built-in to Java which is
345      * suitable for low to medium grade security uses. This means that the
346      * output is only pseudo random, i.e., each number is mathematically
347      * generated so is not truly random.<p>
348      *
349      * For every character in the returned String, there is an equal chance that
350      * it will be a letter or number. If a letter, there is an equal chance
351      * that it will be lower or upper case.<p>
352      *
353      * The specified length must be at least one. If not, the method will return
354      * null.
355      *
356      * @param length the desired length of the random String to return.
357      * @return a random String of numbers and letters of the specified length.
358      */

359     public static final String JavaDoc randomString(int length) {
360         if (length < 1) {
361             return null;
362         }
363         //Init of pseudo random number generator.
364
if (randGen == null) {
365             synchronized (initLock) {
366                 if (randGen == null) {
367                     randGen = new Random JavaDoc();
368                     //Also initialize the numbersAndLetters array
369
numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +
370                     "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
371                 }
372             }
373         }
374         //Create a char buffer to put random letters and numbers in.
375
char [] randBuffer = new char[length];
376         for (int i=0; i<randBuffer.length; i++) {
377             randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
378         }
379         return new String JavaDoc(randBuffer);
380     }
381
382    /**
383     * Intelligently chops a String at a word boundary (whitespace) that occurs
384     * at the specified index in the argument or before. However, if there is a
385     * newline character before <code>length</code>, the String will be chopped
386     * there. If no newline or whitespace is found in <code>string</code> up to
387     * the index <code>length</code>, the String will chopped at <code>length</code>.
388     * <p>
389     * For example, chopAtWord("This is a nice String", 10) will return
390     * "This is a" which is the first word boundary less than or equal to 10
391     * characters into the original String.
392     *
393     * @param string the String to chop.
394     * @param length the index in <code>string</code> to start looking for a
395     * whitespace boundary at.
396     * @return a substring of <code>string</code> whose length is less than or
397     * equal to <code>length</code>, and that is chopped at whitespace.
398     */

399     public static final String JavaDoc chopAtWord(String JavaDoc string, int length) {
400         if (string == null) {
401             return string;
402         }
403
404         char [] charArray = string.toCharArray();
405         int sLength = string.length();
406         if (length < sLength) {
407             sLength = length;
408         }
409
410         //First check if there is a newline character before length; if so,
411
//chop word there.
412
for (int i=0; i<sLength-1; i++) {
413             //Windows
414
if (charArray[i] == '\r' && charArray[i+1] == '\n') {
415                 return string.substring(0, i);
416             }
417             //Unix
418
else if (charArray[i] == '\n') {
419                 return string.substring(0, i);
420             }
421         }
422         //Also check boundary case of Unix newline
423
if (charArray[sLength-1] == '\n') {
424             return string.substring(0, sLength-1);
425         }
426
427         //Done checking for newline, now see if the total string is less than
428
//the specified chop point.
429
if (string.length() < length) {
430             return string;
431         }
432
433         //No newline, so chop at the first whitespace.
434
for (int i = length-1; i > 0; i--) {
435             if (charArray[i] == ' ') {
436                 return string.substring(0, i).trim();
437             }
438         }
439
440         //Did not find word boundary so return original String chopped at
441
//specified length.
442
return string.substring(0, length);
443     }
444
445     /**
446      * Highlights words in a string. Words matching ignores case. The actual
447      * higlighting method is specified with the start and end higlight tags.
448      * Those might be beginning and ending HTML bold tags, or anything else.
449      *
450      * @param string the String to highlight words in.
451      * @param words an array of words that should be highlighted in the string.
452      * @param startHighlight the tag that should be inserted to start highlighting.
453      * @param endHighlight the tag that should be inserted to end highlighting.
454      * @return a new String with the specified words highlighted.
455      */

456     public static final String JavaDoc highlightWords(String JavaDoc string, String JavaDoc[] words,
457         String JavaDoc startHighlight, String JavaDoc endHighlight)
458     {
459         if (string == null || words == null ||
460                 startHighlight == null || endHighlight == null)
461         {
462             return null;
463         }
464
465         //Iterate through each word.
466
for (int x=0; x<words.length; x++) {
467             //we want to ignore case.
468
String JavaDoc lcString = string.toLowerCase();
469             //using a char [] is more efficient
470
char [] string2 = string.toCharArray();
471             String JavaDoc word = words[x].toLowerCase();
472
473             //perform specialized replace logic
474
int i=0;
475             if ( ( i=lcString.indexOf( word, i ) ) >= 0 ) {
476                 int oLength = word.length();
477                 StringBuffer JavaDoc buf = new StringBuffer JavaDoc(string2.length);
478
479                 //we only want to highlight distinct words and not parts of
480
//larger words. The method used below mostly solves this. There
481
//are a few cases where it doesn't, but it's close enough.
482
boolean startSpace = false;
483                 char startChar = ' ';
484                 if (i-1 > 0) {
485                     startChar = string2[i-1];
486                     if (!Character.isLetter(startChar)) {
487                         startSpace = true;
488                     }
489                 }
490                 boolean endSpace = false;
491                 char endChar = ' ';
492                 if (i+oLength<string2.length) {
493                     endChar = string2[i+oLength];
494                     if (!Character.isLetter(endChar)) {
495                         endSpace = true;
496                     }
497                 }
498                 if ((startSpace && endSpace) || (i==0 && endSpace)) {
499                     buf.append(string2, 0, i);
500                     if (startSpace && startChar==' ') { buf.append(startChar); }
501                     buf.append(startHighlight);
502                     buf.append(string2, i, oLength).append(endHighlight);
503                     if (endSpace && endChar==' ') { buf.append(endChar); }
504                 }
505                 else {
506                     buf.append(string2, 0, i);
507                     buf.append(string2, i, oLength);
508                 }
509
510                 i += oLength;
511                 int j = i;
512                 while( ( i=lcString.indexOf( word, i ) ) > 0 ) {
513                     startSpace = false;
514                     startChar = string2[i-1];
515                     if (!Character.isLetter(startChar)) {
516                         startSpace = true;
517                     }
518
519                     endSpace = false;
520                     if (i+oLength<string2.length) {
521                         endChar = string2[i+oLength];
522                         if (!Character.isLetter(endChar)) {
523                             endSpace = true;
524                         }
525                     }
526                     if ((startSpace && endSpace) || i+oLength==string2.length) {
527                         buf.append(string2, j, i-j);
528                         if (startSpace && startChar==' ') { buf.append(startChar); }
529                         buf.append(startHighlight);
530                         buf.append(string2, i, oLength).append(endHighlight);
531                         if (endSpace && endChar==' ') { buf.append(endChar); }
532                     }
533                     else {
534                         buf.append(string2, j, i-j);
535                         buf.append(string2, i, oLength);
536                     }
537                     i += oLength;
538                     j = i;
539                 }
540                 buf.append(string2, j, string2.length - j);
541                 string = buf.toString();
542             }
543         }
544         return string;
545     }
546
547     /**
548      * Escapes all necessary characters in the String so that it can be used
549      * in an XML doc.
550      *
551      * @param string the string to escape.
552      * @return the string with appropriate characters escaped.
553      */

554     public static final String JavaDoc escapeForXML(String JavaDoc string) {
555         //Check if the string is null or zero length -- if so, return
556
//what was sent in.
557
if (string == null || string.length() == 0 ) {
558             return string;
559         }
560         char [] sArray = string.toCharArray();
561         StringBuffer JavaDoc buf = new StringBuffer JavaDoc(sArray.length);
562         char ch;
563         for (int i=0; i<sArray.length; i++) {
564             ch = sArray[i];
565             if(ch == '<') {
566                 buf.append("&lt;");
567             }
568             else if (ch == '&') {
569                 buf.append("&amp;");
570             }
571             else if (ch == '"') {
572                 buf.append("&quot;");
573             }
574             else {
575                 buf.append(ch);
576             }
577         }
578         return buf.toString();
579     }
580
581 }
582
Popular Tags