StringUtils


1   /**
2    * $RCSfile: StringUtils.java,v $
3    * $Revision: 1.3 $
4    * $Date: 2006/01/07 00:21:06 $
5    *
6    * Copyright (C) 2000 CoolServlets.com. All rights reserved.
7    *
8    * ===================================================================
9    * The Apache Software License, Version 1.1
10   *
11   * Redistribution and use in source and binary forms, with or without
12   * modification, are permitted provided that the following conditions
13   * are met:
14   *
15   * 1. Redistributions of source code must retain the above copyright
16   *    notice, this list of conditions and the following disclaimer.
17   *
18   * 2. Redistributions in binary form must reproduce the above copyright
19   *    notice, this list of conditions and the following disclaimer in
20   *    the documentation and/or other materials provided with the
21   *    distribution.
22   *
23   * 3. The end-user documentation included with the redistribution,
24   *    if any, must include the following acknowledgment:
25   *       "This product includes software developed by
26   *        CoolServlets.com (http://www.Yasna.com)."
27   *    Alternately, this acknowledgment may appear in the software itself,
28   *    if and wherever such third-party acknowledgments normally appear.
29   *
30   * 4. The names "Jive" and "CoolServlets.com" must not be used to
31   *    endorse or promote products derived from this software without
32   *    prior written permission. For written permission, please
33   *    contact webmaster@Yasna.com.
34   *
35   * 5. Products derived from this software may not be called "Jive",
36   *    nor may "Jive" appear in their name, without prior written
37   *    permission of CoolServlets.com.
38   *
39   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
41   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
42   * DISCLAIMED.  IN NO EVENT SHALL COOLSERVLETS.COM OR
43   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
45   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
46   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
47   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
49   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50   * SUCH DAMAGE.
51   * ====================================================================
52   *
53   * This software consists of voluntary contributions made by many
54   * individuals on behalf of CoolServlets.com. For more information
55   * on CoolServlets.com, please see <http://www.Yasna.com>.
56   */
57  
58  package com.Yasna.util;
59  
60  import java.security.*;
61  import java.text.*;
62  import java.util.*;
63  
64  /**
65   * Utility class to peform common String manipulation algorithms.
66   */
67  public class StringUtils {
68  
69      /**
70       * Initialization lock for the whole class. Init's only happen once per
71       * class load so this shouldn't be a bottleneck.
72       */
73      private static Object   initLock = new Object  ();
74  
75      /**
76       * Replaces all instances of oldString with newString in line.
77       *
78       * @param line the String to search to perform replacements on
79       * @param oldString the String that should be replaced by newString
80       * @param newString the String that will replace all instances of oldString
81       *
82       * @return a String will all instances of oldString replaced by newString
83       */
84      public static final String   replace( String   line, String   oldString, String   newString )
85      {
86          if (line == null) {
87              return null;
88          }
89          int i=0;
90          if ( ( i=line.indexOf( oldString, i ) ) >= 0 ) {
91              char [] line2 = line.toCharArray();
92              char [] newString2 = newString.toCharArray();
93              int oLength = oldString.length();
94              StringBuffer   buf = new StringBuffer  (line2.length);
95              buf.append(line2, 0, i).append(newString2);
96              i += oLength;
97              int j = i;
98              while( ( i=line.indexOf( oldString, i ) ) > 0 ) {
99                  buf.append(line2, j, i-j).append(newString2);
100                 i += oLength;
101                 j = i;
102             }
103             buf.append(line2, j, line2.length - j);
104             return buf.toString();
105         }
106         return line;
107     }
108 
109     /**
110      * Replaces all instances of oldString with newString in line with the
111      * added feature that matches of newString in oldString ignore case.
112      *
113      * @param line the String to search to perform replacements on
114      * @param oldString the String that should be replaced by newString
115      * @param newString the String that will replace all instances of oldString
116      *
117      * @return a String will all instances of oldString replaced by newString
118      */
119     public static final String   replaceIgnoreCase(String   line, String   oldString,
120             String   newString)
121     {
122         if (line == null) {
123             return null;
124         }
125         String   lcLine = line.toLowerCase();
126         String   lcOldString = oldString.toLowerCase();
127         int i=0;
128         if ( ( i=lcLine.indexOf( lcOldString, i ) ) >= 0 ) {
129             char [] line2 = line.toCharArray();
130             char [] newString2 = newString.toCharArray();
131             int oLength = oldString.length();
132             StringBuffer   buf = new StringBuffer  (line2.length);
133             buf.append(line2, 0, i).append(newString2);
134             i += oLength;
135             int j = i;
136             while( ( i=lcLine.indexOf( lcOldString, i ) ) > 0 ) {
137                 buf.append(line2, j, i-j).append(newString2);
138                 i += oLength;
139                 j = i;
140             }
141             buf.append(line2, j, line2.length - j);
142             return buf.toString();
143         }
144         return line;
145     }
146 
147    /**
148     * Replaces all instances of oldString with newString in line.
149     * The count Integer is updated with number of replaces.
150     *
151     * @param line the String to search to perform replacements on
152     * @param oldString the String that should be replaced by newString
153     * @param newString the String that will replace all instances of oldString
154     *
155     * @return a String will all instances of oldString replaced by newString
156     */
157     public static final String   replace( String   line, String   oldString,
158             String   newString, int[] count)
159     {
160         if (line == null) {
161             return null;
162         }
163         int i=0;
164         if ( ( i=line.indexOf( oldString, i ) ) >= 0 ) {
165             int counter = 0;
166             counter++;
167             char [] line2 = line.toCharArray();
168             char [] newString2 = newString.toCharArray();
169             int oLength = oldString.length();
170             StringBuffer   buf = new StringBuffer  (line2.length);
171             buf.append(line2, 0, i).append(newString2);
172             i += oLength;
173             int j = i;
174             while( ( i=line.indexOf( oldString, i ) ) > 0 ) {
175                 counter++;
176                 buf.append(line2, j, i-j).append(newString2);
177                 i += oLength;
178                 j = i;
179             }
180             buf.append(line2, j, line2.length - j);
181             count[0] = counter;
182             return buf.toString();
183         }
184         return line;
185     }
186 
187     /**
188      * This method takes a string which may contain HTML tags (ie, &lt;b&gt;,
189      * &lt;table&gt;, etc) and converts the '&lt'' and '&gt;' characters to
190      * their HTML escape sequences.
191      *
192      * @param input the text to be converted.
193      * @return the input string with the characters '&lt;' and '&gt;' replaced
194      *  with their HTML escape sequences.
195      */
196     public static final String   escapeHTMLTags( String   input ) {
197         //Check if the string is null or zero length -- if so, return
198         //what was sent in.
199         if( input == null || input.length() == 0 ) {
200             return input;
201         }
202         //Use a StringBuffer in lieu of String concatenation -- it is
203         //much more efficient this way.
204         StringBuffer   buf = new StringBuffer  (input.length());
205         char ch = ' ';
206         for( int i=0; i<input.length(); i++ ) {
207             ch = input.charAt(i);
208             if( ch == '<' ) {
209                 buf.append("&lt;");
210             }
211             else if( ch == '>' ) {
212                 buf.append("&gt;");
213             }
214             else {
215                 buf.append( ch );
216             }
217         }
218         return buf.toString();
219     }
220 
221     /**
222      * Used by the hash method.
223      */
224     private static MessageDigest digest = null;
225 
226     /**
227      * Hashes a String using the Md5 algorithm and returns the result as a
228      * String of hexadecimal numbers. This method is synchronized to avoid
229      * excessive MessageDigest object creation. If calling this method becomes
230      * a bottleneck in your code, you may wish to maintain a pool of
231      * MessageDigest objects instead of using this method.
232      * <p>
233      * A hash is a one-way function -- that is, given an
234      * input, an output is easily computed. However, given the output, the
235      * input is almost impossible to compute. This is useful for passwords
236      * since we can store the hash and a hacker will then have a very hard time
237      * determining the original password.
238      * <p>
239      * In Jive, every time a user logs in, we simply
240      * take their plain text password, compute the hash, and compare the
241      * generated hash to the stored hash. Since it is almost impossible that
242      * two passwords will generate the same hash, we know if the user gave us
243      * the correct password or not. The only negative to this system is that
244      * password recovery is basically impossible. Therefore, a reset password
245      * method is used instead.
246      *
247      * @param data the String to compute the hash of.
248      * @return a hashed version of the passed-in String
249      */
250     public synchronized static final String   hash(String   data) {
251         if (digest == null) {
252             try {
253                 digest = MessageDigest.getInstance("MD5");
254             }
255             catch (NoSuchAlgorithmException nsae) {
256                 System.err.println("Failed to load the MD5 MessageDigest. " +
257                 "Jive will be unable to function normally.");
258                 nsae.printStackTrace();
259             }
260         }
261         //Now, compute hash.
262         digest.update(data.getBytes());
263         return toHex(digest.digest());
264     }
265 
266     /**
267      * Turns an array of bytes into a String representing each byte as an
268      * unsigned hex number.
269      * <p>
270      * Method by Santeri Paavolainen, Helsinki Finland 1996<br>
271      * (c) Santeri Paavolainen, Helsinki Finland 1996<br>
272      * Distributed under LGPL.
273      *
274      * @param hash an rray of bytes to convert to a hex-string
275      * @return generated hex string
276      */
277     public static final String   toHex (byte hash[]) {
278         StringBuffer   buf = new StringBuffer  (hash.length * 2);
279         int i;
280 
281         for (i = 0; i < hash.length; i++) {
282             if (((int) hash[i] & 0xff) < 0x10) {
283                 buf.append("0");
284             }
285             buf.append(Long.toString((int) hash[i] & 0xff, 16));
286         }
287         return buf.toString();
288     }
289 
290     /**
291      * Converts a line of text into an array of lower case words. Words are
292      * delimited by the following characters: , .\r\n:/\+
293      * <p>
294      * In the future, this method should be changed to use a
295      * BreakIterator.wordInstance(). That class offers much more fexibility.
296      *
297      * @param text a String of text to convert into an array of words
298      * @return text broken up into an array of words.
299      */
300     public static final String   [] toLowerCaseWordArray(String   text) {
301         if (text == null || text.length() == 0) {
302                 return new String  [0];
303         }
304         StringTokenizer tokens = new StringTokenizer(text, " ,\r\n.:/\\+");
305         String   [] words = new String  [tokens.countTokens()];
306         for (int i=0; i<words.length; i++) {
307             words[i] = tokens.nextToken().toLowerCase();
308         }
309         return words;
310     }
311 
312     /**
313      * A list of some of the most common words. For searching and indexing, we
314      * often want to filter out these words since they just confuse searches.
315      * The list was not created scientifically so may be incomplete :)
316      */
317     private static final String   [] commonWords =  new String   [] {
318         "a", "and", "as", "at", "be", "do", "i", "if", "in", "is", "it", "so",
319         "the", "to"
320     };
321     private static Map commonWordsMap = null;
322 
323     /**
324      * Returns a new String array with some of the most common English words
325      * removed. The specific words removed are: a, and, as, at, be, do, i, if,
326      * in, is, it, so, the, to
327      */
328     public static final String   [] removeCommonWords(String   [] words) {
329         //See if common words map has been initialized. We don't statically
330         //initialize it to save some memory. Even though this a small savings,
331         //it adds up with hundreds of classes being loaded.
332         if (commonWordsMap == null) {
333             synchronized(initLock) {
334                 if (commonWordsMap == null) {
335                     commonWordsMap = new HashMap();
336                     for (int i=0; i<commonWords.length; i++) {
337                         commonWordsMap.put(commonWords[i], commonWords[i]);
338                     }
339                 }
340             }
341         }
342         //Now, add all words that aren't in the common map to results
343         ArrayList results = new ArrayList(words.length);
344         for (int i=0; i<words.length; i++) {
345             if (!commonWordsMap.containsKey(words[i])) {
346                 results.add(words[i]);
347             }
348         }
349         return (String  [])results.toArray(new String  [results.size()]);
350     }
351 
352     /**
353      * Pseudo-random number generator object for use with randomString().
354      * The Random class is not considered to be cryptographically secure, so
355      * only use these random Strings for low to medium security applications.
356      */
357     private static Random randGen = null;
358 
359     /**
360      * Array of numbers and letters of mixed case. Numbers appear in the list
361      * twice so that there is a more equal chance that a number will be picked.
362      * We can use the array to get a random number or letter by picking a random
363      * array index.
364      */
365     private static char[] numbersAndLetters = null;
366 
367     /**
368      * Returns a random String of numbers and letters of the specified length.
369      * The method uses the Random class that is built-in to Java which is
370      * suitable for low to medium grade security uses. This means that the
371      * output is only pseudo random, i.e., each number is mathematically
372      * generated so is not truly random.<p>
373      *
374      * For every character in the returned String, there is an equal chance that
375      * it will be a letter or number. If a letter, there is an equal chance
376      * that it will be lower or upper case.<p>
377      *
378      * The specified length must be at least one. If not, the method will return
379      * null.
380      *
381      * @param length the desired length of the random String to return.
382      * @return a random String of numbers and letters of the specified length.
383      */
384     public static final String   randomString(int length) {
385         if (length < 1) {
386             return null;
387         }
388         //Init of pseudo random number generator.
389         if (randGen == null) {
390             synchronized (initLock) {
391                 if (randGen == null) {
392                     randGen = new Random();
393                     //Also initialize the numbersAndLetters array
394                     numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +
395                     "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
396                 }
397             }
398         }
399         //Create a char buffer to put random letters and numbers in.
400         char [] randBuffer = new char[length];
401         for (int i=0; i<randBuffer.length; i++) {
402             randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
403         }
404         return new String  (randBuffer);
405     }
406 
407    /**
408     * Intelligently chops a String at a word boundary (whitespace) that occurs
409     * at the specified index in the argument or before. However, if there is a
410     * newline character before <code>length</code>, the String will be chopped
411     * there. If no newline or whitespace is found in <code>string</code> up to
412     * the index <code>length</code>, the String will chopped at <code>length</code>.
413     * <p>
414     * For example, chopAtWord("This is a nice String", 10) will return
415     * "This is a" which is the first word boundary less than or equal to 10
416     * characters into the original String.
417     *
418     * @param string the String to chop.
419     * @param length the index in <code>string</code> to start looking for a
420     *       whitespace boundary at.
421     * @return a substring of <code>string</code> whose length is less than or
422     *       equal to <code>length</code>, and that is chopped at whitespace.
423     */
424     public static final String   chopAtWord(String   string, int length) {
425         if (string == null) {
426             return string;
427         }
428 
429         char [] charArray = string.toCharArray();
430         int sLength = string.length();
431         if (length < sLength) {
432             sLength = length;
433         }
434 
435         //First check if there is a newline character before length; if so,
436         //chop word there.
437         for (int i=0; i<sLength-1; i++) {
438             //Windows
439             if (charArray[i] == '\r' && charArray[i+1] == '\n') {
440                 return string.substring(0, i);
441             }
442             //Unix
443             else if (charArray[i] == '\n') {
444                 return string.substring(0, i);
445             }
446         }
447         //Also check boundary case of Unix newline
448         if (charArray[sLength-1] == '\n') {
449             return string.substring(0, sLength-1);
450         }
451 
452         //Done checking for newline, now see if the total string is less than
453         //the specified chop point.
454         if (string.length() < length) {
455             return string;
456         }
457 
458         //No newline, so chop at the first whitespace.
459         for (int i = length-1; i > 0; i--) {
460             if (charArray[i] == ' ') {
461                 return string.substring(0, i).trim();
462             }
463         }
464 
465         //Did not find word boundary so return original String chopped at
466         //specified length.
467         return string.substring(0, length);
468     }
469 
470     /**
471      * Highlights words in a string. Words matching ignores case. The actual
472      * higlighting method is specified with the start and end higlight tags.
473      * Those might be beginning and ending HTML bold tags, or anything else.
474      *
475      * @param string the String to highlight words in.
476      * @param words an array of words that should be highlighted in the string.
477      * @param startHighlight the tag that should be inserted to start highlighting.
478      * @param endHighlight the tag that should be inserted to end highlighting.
479      * @return a new String with the specified words highlighted.
480      */
481     public static final String   highlightWords(String   string, String  [] words,
482         String   startHighlight, String   endHighlight)
483     {
484         if (string == null || words == null ||
485                 startHighlight == null || endHighlight == null)
486         {
487             return null;
488         }
489 
490         //Iterate through each word.
491         for (int x=0; x<words.length; x++) {
492             //we want to ignore case.
493             String   lcString = string.toLowerCase();
494             //using a char [] is more efficient
495             char [] string2 = string.toCharArray();
496             String   word = words[x].toLowerCase();
497 
498             //perform specialized replace logic
499             int i=0;
500             if ( ( i=lcString.indexOf( word, i ) ) >= 0 ) {
501                 int oLength = word.length();
502                 StringBuffer   buf = new StringBuffer  (string2.length);
503 
504                 //we only want to highlight distinct words and not parts of
505                 //larger words. The method used below mostly solves this. There
506                 //are a few cases where it doesn't, but it's close enough.
507                 boolean startSpace = false;
508                 char startChar = ' ';
509                 if (i-1 > 0) {
510                     startChar = string2[i-1];
511                     if (!Character.isLetter(startChar)) {
512                         startSpace = true;
513                     }
514                 }
515                 boolean endSpace = false;
516                 char endChar = ' ';
517                 if (i+oLength<string2.length) {
518                     endChar = string2[i+oLength];
519                     if (!Character.isLetter(endChar))  {
520                         endSpace = true;
521                     }
522                 }
523                 if ((startSpace && endSpace) || (i==0 && endSpace)) {
524                     buf.append(string2, 0, i);
525                     if (startSpace && startChar==' ') { buf.append(startChar); }
526                     buf.append(startHighlight);
527                     buf.append(string2, i, oLength).append(endHighlight);
528                     if (endSpace && endChar==' ') { buf.append(endChar); }
529                 }
530                 else {
531                     buf.append(string2, 0, i);
532                     buf.append(string2, i, oLength);
533                 }
534 
535                 i += oLength;
536                 int j = i;
537                 while( ( i=lcString.indexOf( word, i ) ) > 0 ) {
538                     startSpace = false;
539                     startChar = string2[i-1];
540                     if (!Character.isLetter(startChar)) {
541                         startSpace = true;
542                     }
543 
544                     endSpace = false;
545                     if (i+oLength<string2.length) {
546                         endChar = string2[i+oLength];
547                         if (!Character.isLetter(endChar))  {
548                             endSpace = true;
549                         }
550                     }
551                     if ((startSpace && endSpace) || i+oLength==string2.length) {
552                         buf.append(string2, j, i-j);
553                         if (startSpace && startChar==' ') { buf.append(startChar); }
554                         buf.append(startHighlight);
555                         buf.append(string2, i, oLength).append(endHighlight);
556                         if (endSpace && endChar==' ') { buf.append(endChar); }
557                     }
558                     else {
559                         buf.append(string2, j, i-j);
560                         buf.append(string2, i, oLength);
561                     }
562                     i += oLength;
563                     j = i;
564                 }
565                 buf.append(string2, j, string2.length - j);
566                 string = buf.toString();
567             }
568         }
569         return string;
570     }
571 
572     /**
573      * Escapes all necessary characters in the String so that it can be used
574      * in an XML doc.
575      *
576      * @param string the string to escape.
577      * @return the string with appropriate characters escaped.
578      */
579     public static final String   escapeForXML(String   string) {
580         //Check if the string is null or zero length -- if so, return
581         //what was sent in.
582         if (string == null || string.length() == 0 ) {
583             return string;
584         }
585         char [] sArray = string.toCharArray();
586         StringBuffer   buf = new StringBuffer  (sArray.length);
587         char ch;
588         for (int i=0; i<sArray.length; i++) {
589             ch = sArray[i];
590             if(ch == '<') {
591                 buf.append("&lt;");
592             }
593             else if (ch == '&') {
594                 buf.append("&amp;");
595             }
596             else if (ch == '"') {
597                 buf.append("&quot;");
598             }
599             else {
600                 buf.append(ch);
601             }
602         }
603         return buf.toString();
604     }
605 
606 }
607
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags