StringUtils


1   /*
2    * NEMESIS-FORUM.
3    * Copyright (C) 2002  David Laurent(lithium2@free.fr). All rights reserved.
4    * 
5    * Copyright (c) 2000 The Apache Software Foundation. All rights reserved.
6    * 
7    * Copyright (C) 2001 Yasna.com. All rights reserved.
8    * 
9    * Copyright (C) 2000 CoolServlets.com. All rights reserved.
10   * 
11   * NEMESIS-FORUM. is free software; you can redistribute it and/or
12   * modify it under the terms of the Apache Software License, Version 1.1,
13   * or (at your option) any later version.
14   * 
15   * NEMESIS-FORUM core framework, NEMESIS-FORUM backoffice, NEMESIS-FORUM frontoffice
16   * application are parts of NEMESIS-FORUM and are distributed under
17   * same terms of licence.
18   * 
19   * 
20   * NEMESIS-FORUM includes software developed by the Apache Software Foundation (http://www.apache.org/)
21   * and software developed by CoolServlets.com (http://www.coolservlets.com).
22   * and software developed by Yasna.com (http://www.yasna.com).
23   * 
24   */
25  
26  
27  package org.nemesis.forum.util;
28  
29  import java.security.MessageDigest  ;
30  import java.security.NoSuchAlgorithmException  ;
31  import java.util.ArrayList  ;
32  import java.util.HashMap  ;
33  import java.util.Map  ;
34  import java.util.Random  ;
35  import java.util.StringTokenizer  ;
36  
37  import org.apache.commons.logging.Log;
38  import org.apache.commons.logging.LogFactory;
39  /**
40   * Utility class to peform common String manipulation algorithms.
41   */
42  public class StringUtils {
43      static protected Log log = LogFactory.getLog(StringUtils.class);
44      /**
45       * Initialization lock for the whole class. Init's only happen once per
46       * class load so this shouldn't be a bottleneck.
47       */
48      private static Object   initLock = new Object  ();
49  
50      /**
51       * Replaces all instances of oldString with newString in line.
52       *
53       * @param line the String to search to perform replacements on
54       * @param oldString the String that should be replaced by newString
55       * @param newString the String that will replace all instances of oldString
56       *
57       * @return a String will all instances of oldString replaced by newString
58       */
59      public static final String   replace( String   line, String   oldString, String   newString )
60      {
61          if (line == null) {
62              return null;
63          }
64          int i=0;
65          if ( ( i=line.indexOf( oldString, i ) ) >= 0 ) {
66              char [] line2 = line.toCharArray();
67              char [] newString2 = newString.toCharArray();
68              int oLength = oldString.length();
69              StringBuffer   buf = new StringBuffer  (line2.length);
70              buf.append(line2, 0, i).append(newString2);
71              i += oLength;
72              int j = i;
73              while( ( i=line.indexOf( oldString, i ) ) > 0 ) {
74                  buf.append(line2, j, i-j).append(newString2);
75                  i += oLength;
76                  j = i;
77              }
78              buf.append(line2, j, line2.length - j);
79              return buf.toString();
80          }
81          return line;
82      }
83  
84      /**
85       * Replaces all instances of oldString with newString in line with the
86       * added feature that matches of newString in oldString ignore case.
87       *
88       * @param line the String to search to perform replacements on
89       * @param oldString the String that should be replaced by newString
90       * @param newString the String that will replace all instances of oldString
91       *
92       * @return a String will all instances of oldString replaced by newString
93       */
94      public static final String   replaceIgnoreCase(String   line, String   oldString,
95              String   newString)
96      {
97          if (line == null) {
98              return null;
99          }
100         String   lcLine = line.toLowerCase();
101         String   lcOldString = oldString.toLowerCase();
102         int i=0;
103         if ( ( i=lcLine.indexOf( lcOldString, i ) ) >= 0 ) {
104             char [] line2 = line.toCharArray();
105             char [] newString2 = newString.toCharArray();
106             int oLength = oldString.length();
107             StringBuffer   buf = new StringBuffer  (line2.length);
108             buf.append(line2, 0, i).append(newString2);
109             i += oLength;
110             int j = i;
111             while( ( i=lcLine.indexOf( lcOldString, i ) ) > 0 ) {
112                 buf.append(line2, j, i-j).append(newString2);
113                 i += oLength;
114                 j = i;
115             }
116             buf.append(line2, j, line2.length - j);
117             return buf.toString();
118         }
119         return line;
120     }
121 
122    /**
123     * Replaces all instances of oldString with newString in line.
124     * The count Integer is updated with number of replaces.
125     *
126     * @param line the String to search to perform replacements on
127     * @param oldString the String that should be replaced by newString
128     * @param newString the String that will replace all instances of oldString
129     *
130     * @return a String will all instances of oldString replaced by newString
131     */
132     public static final String   replace( String   line, String   oldString,
133             String   newString, int[] count)
134     {
135         if (line == null) {
136             return null;
137         }
138         int i=0;
139         if ( ( i=line.indexOf( oldString, i ) ) >= 0 ) {
140             int counter = 0;
141             counter++;
142             char [] line2 = line.toCharArray();
143             char [] newString2 = newString.toCharArray();
144             int oLength = oldString.length();
145             StringBuffer   buf = new StringBuffer  (line2.length);
146             buf.append(line2, 0, i).append(newString2);
147             i += oLength;
148             int j = i;
149             while( ( i=line.indexOf( oldString, i ) ) > 0 ) {
150                 counter++;
151                 buf.append(line2, j, i-j).append(newString2);
152                 i += oLength;
153                 j = i;
154             }
155             buf.append(line2, j, line2.length - j);
156             count[0] = counter;
157             return buf.toString();
158         }
159         return line;
160     }
161 
162     /**
163      * This method takes a string which may contain HTML tags (ie, &lt;b&gt;,
164      * &lt;table&gt;, etc) and converts the '&lt'' and '&gt;' characters to
165      * their HTML escape sequences.
166      *
167      * @param input the text to be converted.
168      * @return the input string with the characters '&lt;' and '&gt;' replaced
169      *  with their HTML escape sequences.
170      */
171     public static final String   escapeHTMLTags( String   input ) {
172         //Check if the string is null or zero length -- if so, return
173         //what was sent in.
174         if( input == null || input.length() == 0 ) {
175             return input;
176         }
177         //Use a StringBuffer in lieu of String concatenation -- it is
178         //much more efficient this way.
179         StringBuffer   buf = new StringBuffer  (input.length());
180         char ch = ' ';
181         for( int i=0; i<input.length(); i++ ) {
182             ch = input.charAt(i);
183             if( ch == '<' ) {
184                 buf.append("&lt;");
185             }
186             else if( ch == '>' ) {
187                 buf.append("&gt;");
188             }
189             else {
190                 buf.append( ch );
191             }
192         }
193         return buf.toString();
194     }
195 
196     /**
197      * Used by the hash method.
198      */
199     private static MessageDigest   digest = null;
200 
201     /**
202      * Hashes a String using the Md5 algorithm and returns the result as a
203      * String of hexadecimal numbers. This method is synchronized to avoid
204      * excessive MessageDigest object creation. If calling this method becomes
205      * a bottleneck in your code, you may wish to maintain a pool of
206      * MessageDigest objects instead of using this method.
207      * <p>
208      * A hash is a one-way function -- that is, given an
209      * input, an output is easily computed. However, given the output, the
210      * input is almost impossible to compute. This is useful for passwords
211      * since we can store the hash and a hacker will then have a very hard time
212      * determining the original password.
213      * <p>
214      * every time a user logs in, we simply
215      * take their plain text password, compute the hash, and compare the
216      * generated hash to the stored hash. Since it is almost impossible that
217      * two passwords will generate the same hash, we know if the user gave us
218      * the correct password or not. The only negative to this system is that
219      * password recovery is basically impossible. Therefore, a reset password
220      * method is used instead.
221      *
222      * @param data the String to compute the hash of.
223      * @return a hashed version of the passed-in String
224      */
225     public synchronized static final String   hash(String   data) {
226         if (digest == null) {
227             try {
228                 digest = MessageDigest.getInstance("MD5");
229             }
230             catch (NoSuchAlgorithmException   nsae) {
231                 log.error("Failed to load the MD5 MessageDigest. " +
232                 "will be unable to function normally.",nsae);
233                 
234             }
235         }
236         //Now, compute hash.
237         digest.update(data.getBytes());
238         return toHex(digest.digest());
239     }
240 
241     /**
242      * Turns an array of bytes into a String representing each byte as an
243      * unsigned hex number.
244      * <p>
245      * Method by Santeri Paavolainen, Helsinki Finland 1996<br>
246      * (c) Santeri Paavolainen, Helsinki Finland 1996<br>
247      * Distributed under LGPL.
248      *
249      * @param hash an rray of bytes to convert to a hex-string
250      * @return generated hex string
251      */
252     public static final String   toHex (byte hash[]) {
253         StringBuffer   buf = new StringBuffer  (hash.length * 2);
254         int i;
255 
256         for (i = 0; i < hash.length; i++) {
257             if (((int) hash[i] & 0xff) < 0x10) {
258                 buf.append("0");
259             }
260             buf.append(Long.toString((int) hash[i] & 0xff, 16));
261         }
262         return buf.toString();
263     }
264 
265     /**
266      * Converts a line of text into an array of lower case words. Words are
267      * delimited by the following characters: , .\r\n:/\+
268      * <p>
269      * In the future, this method should be changed to use a
270      * BreakIterator.wordInstance(). That class offers much more fexibility.
271      *
272      * @param text a String of text to convert into an array of words
273      * @return text broken up into an array of words.
274      */
275     public static final String   [] toLowerCaseWordArray(String   text) {
276         if (text == null || text.length() == 0) {
277                 return new String  [0];
278         }
279         StringTokenizer   tokens = new StringTokenizer  (text, " ,\r\n.:/\\+");
280         String   [] words = new String  [tokens.countTokens()];
281         for (int i=0; i<words.length; i++) {
282             words[i] = tokens.nextToken().toLowerCase();
283         }
284         return words;
285     }
286 
287     /**
288      * A list of some of the most common words. For searching and indexing, we
289      * often want to filter out these words since they just confuse searches.
290      * The list was not created scientifically so may be incomplete :)
291      */
292     private static final String   [] commonWords =  new String   [] {
293         "a", "and", "as", "at", "be", "do", "i", "if", "in", "is", "it", "so",
294         "the", "to"
295     };
296     private static Map   commonWordsMap = null;
297 
298     /**
299      * Returns a new String array with some of the most common English words
300      * removed. The specific words removed are: a, and, as, at, be, do, i, if,
301      * in, is, it, so, the, to
302      */
303     public static final String   [] removeCommonWords(String   [] words) {
304         //See if common words map has been initialized. We don't statically
305         //initialize it to save some memory. Even though this a small savings,
306         //it adds up with hundreds of classes being loaded.
307         if (commonWordsMap == null) {
308             synchronized(initLock) {
309                 if (commonWordsMap == null) {
310                     commonWordsMap = new HashMap  ();
311                     for (int i=0; i<commonWords.length; i++) {
312                         commonWordsMap.put(commonWords[i], commonWords[i]);
313                     }
314                 }
315             }
316         }
317         //Now, add all words that aren't in the common map to results
318         ArrayList   results = new ArrayList  (words.length);
319         for (int i=0; i<words.length; i++) {
320             if (!commonWordsMap.containsKey(words[i])) {
321                 results.add(words[i]);
322             }
323         }
324         return (String  [])results.toArray(new String  [results.size()]);
325     }
326 
327     /**
328      * Pseudo-random number generator object for use with randomString().
329      * The Random class is not considered to be cryptographically secure, so
330      * only use these random Strings for low to medium security applications.
331      */
332     private static Random   randGen = null;
333 
334     /**
335      * Array of numbers and letters of mixed case. Numbers appear in the list
336      * twice so that there is a more equal chance that a number will be picked.
337      * We can use the array to get a random number or letter by picking a random
338      * array index.
339      */
340     private static char[] numbersAndLetters = null;
341 
342     /**
343      * Returns a random String of numbers and letters of the specified length.
344      * The method uses the Random class that is built-in to Java which is
345      * suitable for low to medium grade security uses. This means that the
346      * output is only pseudo random, i.e., each number is mathematically
347      * generated so is not truly random.<p>
348      *
349      * For every character in the returned String, there is an equal chance that
350      * it will be a letter or number. If a letter, there is an equal chance
351      * that it will be lower or upper case.<p>
352      *
353      * The specified length must be at least one. If not, the method will return
354      * null.
355      *
356      * @param length the desired length of the random String to return.
357      * @return a random String of numbers and letters of the specified length.
358      */
359     public static final String   randomString(int length) {
360         if (length < 1) {
361             return null;
362         }
363         //Init of pseudo random number generator.
364         if (randGen == null) {
365             synchronized (initLock) {
366                 if (randGen == null) {
367                     randGen = new Random  ();
368                     //Also initialize the numbersAndLetters array
369                     numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +
370                     "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
371                 }
372             }
373         }
374         //Create a char buffer to put random letters and numbers in.
375         char [] randBuffer = new char[length];
376         for (int i=0; i<randBuffer.length; i++) {
377             randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
378         }
379         return new String  (randBuffer);
380     }
381 
382    /**
383     * Intelligently chops a String at a word boundary (whitespace) that occurs
384     * at the specified index in the argument or before. However, if there is a
385     * newline character before <code>length</code>, the String will be chopped
386     * there. If no newline or whitespace is found in <code>string</code> up to
387     * the index <code>length</code>, the String will chopped at <code>length</code>.
388     * <p>
389     * For example, chopAtWord("This is a nice String", 10) will return
390     * "This is a" which is the first word boundary less than or equal to 10
391     * characters into the original String.
392     *
393     * @param string the String to chop.
394     * @param length the index in <code>string</code> to start looking for a
395     *       whitespace boundary at.
396     * @return a substring of <code>string</code> whose length is less than or
397     *       equal to <code>length</code>, and that is chopped at whitespace.
398     */
399     public static final String   chopAtWord(String   string, int length) {
400         if (string == null) {
401             return string;
402         }
403 
404         char [] charArray = string.toCharArray();
405         int sLength = string.length();
406         if (length < sLength) {
407             sLength = length;
408         }
409 
410         //First check if there is a newline character before length; if so,
411         //chop word there.
412         for (int i=0; i<sLength-1; i++) {
413             //Windows
414             if (charArray[i] == '\r' && charArray[i+1] == '\n') {
415                 return string.substring(0, i);
416             }
417             //Unix
418             else if (charArray[i] == '\n') {
419                 return string.substring(0, i);
420             }
421         }
422         //Also check boundary case of Unix newline
423         if (charArray[sLength-1] == '\n') {
424             return string.substring(0, sLength-1);
425         }
426 
427         //Done checking for newline, now see if the total string is less than
428         //the specified chop point.
429         if (string.length() < length) {
430             return string;
431         }
432 
433         //No newline, so chop at the first whitespace.
434         for (int i = length-1; i > 0; i--) {
435             if (charArray[i] == ' ') {
436                 return string.substring(0, i).trim();
437             }
438         }
439 
440         //Did not find word boundary so return original String chopped at
441         //specified length.
442         return string.substring(0, length);
443     }
444 
445     /**
446      * Highlights words in a string. Words matching ignores case. The actual
447      * higlighting method is specified with the start and end higlight tags.
448      * Those might be beginning and ending HTML bold tags, or anything else.
449      *
450      * @param string the String to highlight words in.
451      * @param words an array of words that should be highlighted in the string.
452      * @param startHighlight the tag that should be inserted to start highlighting.
453      * @param endHighlight the tag that should be inserted to end highlighting.
454      * @return a new String with the specified words highlighted.
455      */
456     public static final String   highlightWords(String   string, String  [] words,
457         String   startHighlight, String   endHighlight)
458     {
459         if (string == null || words == null ||
460                 startHighlight == null || endHighlight == null)
461         {
462             return null;
463         }
464 
465         //Iterate through each word.
466         for (int x=0; x<words.length; x++) {
467             //we want to ignore case.
468             String   lcString = string.toLowerCase();
469             //using a char [] is more efficient
470             char [] string2 = string.toCharArray();
471             String   word = words[x].toLowerCase();
472 
473             //perform specialized replace logic
474             int i=0;
475             if ( ( i=lcString.indexOf( word, i ) ) >= 0 ) {
476                 int oLength = word.length();
477                 StringBuffer   buf = new StringBuffer  (string2.length);
478 
479                 //we only want to highlight distinct words and not parts of
480                 //larger words. The method used below mostly solves this. There
481                 //are a few cases where it doesn't, but it's close enough.
482                 boolean startSpace = false;
483                 char startChar = ' ';
484                 if (i-1 > 0) {
485                     startChar = string2[i-1];
486                     if (!Character.isLetter(startChar)) {
487                         startSpace = true;
488                     }
489                 }
490                 boolean endSpace = false;
491                 char endChar = ' ';
492                 if (i+oLength<string2.length) {
493                     endChar = string2[i+oLength];
494                     if (!Character.isLetter(endChar))  {
495                         endSpace = true;
496                     }
497                 }
498                 if ((startSpace && endSpace) || (i==0 && endSpace)) {
499                     buf.append(string2, 0, i);
500                     if (startSpace && startChar==' ') { buf.append(startChar); }
501                     buf.append(startHighlight);
502                     buf.append(string2, i, oLength).append(endHighlight);
503                     if (endSpace && endChar==' ') { buf.append(endChar); }
504                 }
505                 else {
506                     buf.append(string2, 0, i);
507                     buf.append(string2, i, oLength);
508                 }
509 
510                 i += oLength;
511                 int j = i;
512                 while( ( i=lcString.indexOf( word, i ) ) > 0 ) {
513                     startSpace = false;
514                     startChar = string2[i-1];
515                     if (!Character.isLetter(startChar)) {
516                         startSpace = true;
517                     }
518 
519                     endSpace = false;
520                     if (i+oLength<string2.length) {
521                         endChar = string2[i+oLength];
522                         if (!Character.isLetter(endChar))  {
523                             endSpace = true;
524                         }
525                     }
526                     if ((startSpace && endSpace) || i+oLength==string2.length) {
527                         buf.append(string2, j, i-j);
528                         if (startSpace && startChar==' ') { buf.append(startChar); }
529                         buf.append(startHighlight);
530                         buf.append(string2, i, oLength).append(endHighlight);
531                         if (endSpace && endChar==' ') { buf.append(endChar); }
532                     }
533                     else {
534                         buf.append(string2, j, i-j);
535                         buf.append(string2, i, oLength);
536                     }
537                     i += oLength;
538                     j = i;
539                 }
540                 buf.append(string2, j, string2.length - j);
541                 string = buf.toString();
542             }
543         }
544         return string;
545     }
546 
547     /**
548      * Escapes all necessary characters in the String so that it can be used
549      * in an XML doc.
550      *
551      * @param string the string to escape.
552      * @return the string with appropriate characters escaped.
553      */
554     public static final String   escapeForXML(String   string) {
555         //Check if the string is null or zero length -- if so, return
556         //what was sent in.
557         if (string == null || string.length() == 0 ) {
558             return string;
559         }
560         char [] sArray = string.toCharArray();
561         StringBuffer   buf = new StringBuffer  (sArray.length);
562         char ch;
563         for (int i=0; i<sArray.length; i++) {
564             ch = sArray[i];
565             if(ch == '<') {
566                 buf.append("&lt;");
567             }
568             else if (ch == '&') {
569                 buf.append("&amp;");
570             }
571             else if (ch == '"') {
572                 buf.append("&quot;");
573             }
574             else {
575                 buf.append(ch);
576             }
577         }
578         return buf.toString();
579     }
580 
581 }
582
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags