KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > roller > util > Utilities


1 package org.roller.util;
2
3 import java.io.BufferedInputStream JavaDoc;
4 import java.io.BufferedOutputStream JavaDoc;
5 import java.io.File JavaDoc;
6 import java.io.FileInputStream JavaDoc;
7 import java.io.FileOutputStream JavaDoc;
8 import java.io.IOException JavaDoc;
9 import java.io.InputStream JavaDoc;
10 import java.io.OutputStream JavaDoc;
11 import java.io.UnsupportedEncodingException JavaDoc;
12 import java.net.URLEncoder JavaDoc;
13 import java.security.MessageDigest JavaDoc;
14 import java.util.Date JavaDoc;
15 import java.util.NoSuchElementException JavaDoc;
16 import java.util.StringTokenizer JavaDoc;
17 import java.util.regex.Matcher JavaDoc;
18 import java.util.regex.Pattern JavaDoc;
19
20 import org.apache.commons.lang.StringEscapeUtils;
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23
24 /**
25  * General purpose utilities.
26  *
27  * <pre>
28  * Includes TextToHTML methods dondated by Erik Thauvin - "Donated to the
29  * Roller Weblogger project for publication under the terms of the Roller
30  * Software License.
31  * Copyright (C) 2002-2003 by Erik C. Thauvin (erik@thauvin.net).
32  * All rights reserved.
33  * </pre>
34  *
35  * @author David M Johnson
36  * @author Lance Lavandowska
37  * @author Matt Raible (added encryption methods)
38  */

39 public class Utilities
40 {
41     /** The <code>Log</code> instance for this class. */
42     private static Log mLogger = LogFactory.getLog(Utilities.class);
43     
44     /** Pattern for matching HTML links */
45     private static Pattern JavaDoc mLinkPattern =
46         Pattern.compile("<a HREF=.*?>", Pattern.CASE_INSENSITIVE);
47     
48     /**
49      * Utility methods for calling StringUtils since it cannot be
50      * instantiated and Utilties can.
51      */

52     public static boolean isNotEmpty(String JavaDoc str)
53     {
54         return StringUtils.isNotEmpty(str);
55     }
56     
57     //------------------------------------------------------------------------
58
/** Strip jsessionid off of a URL */
59     public static String JavaDoc stripJsessionId( String JavaDoc url )
60     {
61         // Strip off jsessionid found in referer URL
62
int startPos = url.indexOf(";jsessionid=");
63         if ( startPos != -1 )
64         {
65             int endPos = url.indexOf("?",startPos);
66             if ( endPos == -1 )
67             {
68                 url = url.substring(0,startPos);
69             }
70             else
71             {
72                 url = url.substring(0,startPos)
73                     + url.substring(endPos,url.length());
74             }
75         }
76         return url;
77     }
78
79     //------------------------------------------------------------------------
80
/**
81      * Escape, but do not replace HTML.
82      * The default behaviour is to escape ampersands.
83      */

84     public static String JavaDoc escapeHTML(String JavaDoc s)
85     {
86         return escapeHTML(s, true);
87     }
88
89     //------------------------------------------------------------------------
90
/**
91      * Escape, but do not replace HTML.
92      * @param escapseAmpersand Optionally escape
93      * ampersands (&amp;).
94      */

95     public static String JavaDoc escapeHTML(String JavaDoc s, boolean escapeAmpersand)
96     {
97         // got to do amp's first so we don't double escape
98
if (escapeAmpersand)
99         {
100             s = stringReplace(s, "&", "&amp;");
101         }
102         s = stringReplace(s, "&nbsp;", " ");
103         s = stringReplace(s, "\"", "&quot;");
104         s = stringReplace(s, "<", "&lt;");
105         s = stringReplace(s, ">", "&gt;");
106         return s;
107     }
108
109     //------------------------------------------------------------------------
110
/**
111      * Remove occurences of html, defined as any text
112      * between the characters "&lt;" and "&gt;". Replace
113      * any HTML tags with a space.
114      */

115     public static String JavaDoc removeHTML(String JavaDoc str)
116     {
117         return removeHTML(str, true);
118     }
119     
120     /**
121      * Remove occurences of html, defined as any text
122      * between the characters "&lt;" and "&gt;".
123      * Optionally replace HTML tags with a space.
124      *
125      * @param str
126      * @param addSpace
127      * @return
128      */

129     public static String JavaDoc removeHTML(String JavaDoc str, boolean addSpace)
130     {
131         if (str == null) return "";
132         StringBuffer JavaDoc ret = new StringBuffer JavaDoc(str.length());
133         int start = 0;
134         int beginTag = str.indexOf("<");
135         int endTag = 0;
136         if (beginTag == -1)
137             return str;
138
139         while (beginTag >= start)
140         {
141             if (beginTag > 0)
142             {
143                 ret.append(str.substring(start, beginTag));
144                 
145                 // replace each tag with a space (looks better)
146
if (addSpace) ret.append(" ");
147             }
148             endTag = str.indexOf(">", beginTag);
149             
150             // if endTag found move "cursor" forward
151
if (endTag > -1)
152             {
153                 start = endTag + 1;
154                 beginTag = str.indexOf("<", start);
155             }
156             // if no endTag found, get rest of str and break
157
else
158             {
159                 ret.append(str.substring(beginTag));
160                 break;
161             }
162         }
163         // append everything after the last endTag
164
if (endTag > -1 && endTag + 1 < str.length())
165         {
166             ret.append(str.substring(endTag + 1));
167         }
168         return ret.toString().trim();
169     }
170
171     //------------------------------------------------------------------------
172
/** Run both removeHTML and escapeHTML on a string.
173      * @param s String to be run through removeHTML and escapeHTML.
174      * @return String with HTML removed and HTML special characters escaped.
175      */

176     public static String JavaDoc removeAndEscapeHTML( String JavaDoc s )
177     {
178         if ( s==null ) return "";
179         else return Utilities.escapeHTML( Utilities.removeHTML(s) );
180     }
181
182     //------------------------------------------------------------------------
183
/**
184      * Autoformat.
185      */

186     public static String JavaDoc autoformat(String JavaDoc s)
187     {
188         String JavaDoc ret = StringUtils.replace(s, "\n", "<br />");
189         return ret;
190     }
191     
192     //------------------------------------------------------------------------
193
/**
194      * Format date in ISO-8601 format.
195      */

196     public static String JavaDoc formatIso8601Date(Date JavaDoc d)
197     {
198         return DateUtil.formatIso8601(d);
199     }
200
201     //------------------------------------------------------------------------
202
/**
203      * Format date in ISO-8601 format.
204      */

205     public static String JavaDoc formatIso8601Day(Date JavaDoc d)
206     {
207         return DateUtil.formatIso8601Day(d);
208     }
209
210     //------------------------------------------------------------------------
211
/**
212      * Return a date in RFC-822 format.
213      */

214     public static String JavaDoc formatRfc822Date(Date JavaDoc date)
215     {
216         return DateUtil.formatRfc822(date);
217     }
218
219     //------------------------------------------------------------------------
220
/**
221      * Return a date in RFC-822 format.
222      */

223     public static String JavaDoc format8charsDate(Date JavaDoc date)
224     {
225         return DateUtil.format8chars(date);
226     }
227
228     //------------------------------------------------------------------------
229
/**
230      * Replaces occurences of non-alphanumeric characters with an underscore.
231      */

232     public static String JavaDoc replaceNonAlphanumeric(String JavaDoc str)
233     {
234         return replaceNonAlphanumeric(str, '_');
235     }
236
237     //------------------------------------------------------------------------
238
/**
239      * Replaces occurences of non-alphanumeric characters with a
240      * supplied char.
241      */

242     public static String JavaDoc replaceNonAlphanumeric(String JavaDoc str, char subst)
243     {
244         StringBuffer JavaDoc ret = new StringBuffer JavaDoc(str.length());
245         char[] testChars = str.toCharArray();
246         for (int i = 0; i < testChars.length; i++)
247         {
248             if (Character.isLetterOrDigit(testChars[i]))
249             {
250                 ret.append(testChars[i]);
251             }
252             else
253             {
254                 ret.append( subst );
255             }
256         }
257         return ret.toString();
258     }
259
260     //------------------------------------------------------------------------
261
/**
262      * Remove occurences of non-alphanumeric characters.
263      */

264     public static String JavaDoc removeNonAlphanumeric(String JavaDoc str)
265     {
266         StringBuffer JavaDoc ret = new StringBuffer JavaDoc(str.length());
267         char[] testChars = str.toCharArray();
268         for (int i = 0; i < testChars.length; i++)
269         {
270             // MR: Allow periods in page links
271
if (Character.isLetterOrDigit(testChars[i]) ||
272                 testChars[i] == '.')
273             {
274                 ret.append(testChars[i]);
275             }
276         }
277         return ret.toString();
278     }
279
280     //------------------------------------------------------------------------
281
/**
282      * @param pathArray
283      * @return
284      */

285     public static String JavaDoc stringArrayToString(String JavaDoc[] stringArray, String JavaDoc delim)
286     {
287         String JavaDoc ret = "";
288         for (int i = 0; i < stringArray.length; i++)
289         {
290             if (ret.length() > 0)
291                 ret = ret + delim + stringArray[i];
292             else
293                 ret = stringArray[i];
294         }
295         return ret;
296     }
297     
298     //------------------------------------------------------------------------
299
/**
300      * Replace occurrences of str1 in string str with str2
301      */

302     public static String JavaDoc stringReplace(String JavaDoc str, String JavaDoc str1, String JavaDoc str2)
303     {
304         String JavaDoc ret = StringUtils.replace(str,str1,str2);
305         return ret;
306     }
307
308     //------------------------------------------------------------------------
309
/**
310      * Replace occurrences of str1 in string str with str2
311      * @param str String to operate on
312      * @param str1 String to be replaced
313      * @param str2 String to be used as replacement
314      * @param maxCount Number of times to replace, 0 for all
315      */

316     public static String JavaDoc stringReplace(
317         String JavaDoc str,
318         String JavaDoc str1,
319         String JavaDoc str2,
320         int maxCount)
321     {
322         String JavaDoc ret = StringUtils.replace(str,str1,str2,maxCount);
323         return ret;
324     }
325
326     //--------------------------------------------------------------------------
327
/** Convert string to string array. */
328     public static String JavaDoc[] stringToStringArray(String JavaDoc instr, String JavaDoc delim)
329         throws NoSuchElementException JavaDoc, NumberFormatException JavaDoc
330     {
331         StringTokenizer JavaDoc toker = new StringTokenizer JavaDoc(instr, delim);
332         String JavaDoc stringArray[] = new String JavaDoc[toker.countTokens()];
333         int i = 0;
334
335         while (toker.hasMoreTokens())
336         {
337             stringArray[i++] = toker.nextToken();
338         }
339         return stringArray;
340     }
341
342     //--------------------------------------------------------------------------
343
/** Convert string to integer array. */
344     public static int[] stringToIntArray(String JavaDoc instr, String JavaDoc delim)
345         throws NoSuchElementException JavaDoc, NumberFormatException JavaDoc
346     {
347         StringTokenizer JavaDoc toker = new StringTokenizer JavaDoc(instr, delim);
348         int intArray[] = new int[toker.countTokens()];
349         int i = 0;
350
351         while (toker.hasMoreTokens())
352         {
353             String JavaDoc sInt = toker.nextToken();
354             int nInt = Integer.parseInt(sInt);
355             intArray[i++] = new Integer JavaDoc(nInt).intValue();
356         }
357         return intArray;
358     }
359
360     //-------------------------------------------------------------------
361
/** Convert integer array to a string. */
362     public static String JavaDoc intArrayToString(int[] intArray)
363     {
364         String JavaDoc ret = "";
365         for (int i = 0; i < intArray.length; i++)
366         {
367             if (ret.length() > 0)
368                 ret = ret + "," + Integer.toString(intArray[i]);
369             else
370                 ret = Integer.toString(intArray[i]);
371         }
372         return ret;
373     }
374
375     //------------------------------------------------------------------------
376
public static void copyFile(File JavaDoc from, File JavaDoc to) throws IOException JavaDoc
377     {
378         InputStream JavaDoc in = null;
379         OutputStream JavaDoc out = null;
380
381         try
382         {
383             in = new FileInputStream JavaDoc(from);
384         }
385         catch (IOException JavaDoc ex)
386         {
387             throw new IOException JavaDoc(
388                 "Utilities.copyFile: opening input stream '"
389                     + from.getPath()
390                     + "', "
391                     + ex.getMessage());
392         }
393
394         try
395         {
396             out = new FileOutputStream JavaDoc(to);
397         }
398         catch (Exception JavaDoc ex)
399         {
400             try
401             {
402                 in.close();
403             }
404             catch (IOException JavaDoc ex1)
405             {
406             }
407             throw new IOException JavaDoc(
408                 "Utilities.copyFile: opening output stream '"
409                     + to.getPath()
410                     + "', "
411                     + ex.getMessage());
412         }
413
414         copyInputToOutput(in, out, from.length());
415     }
416
417     //------------------------------------------------------------------------
418
/**
419      * Utility method to copy an input stream to an output stream.
420      * Wraps both streams in buffers. Ensures right numbers of bytes copied.
421      */

422     public static void copyInputToOutput(
423         InputStream JavaDoc input,
424         OutputStream JavaDoc output,
425         long byteCount)
426         throws IOException JavaDoc
427     {
428         int bytes;
429         long length;
430
431         BufferedInputStream JavaDoc in = new BufferedInputStream JavaDoc(input);
432         BufferedOutputStream JavaDoc out = new BufferedOutputStream JavaDoc(output);
433
434         byte[] buffer;
435         buffer = new byte[8192];
436
437         for (length = byteCount; length > 0;)
438         {
439             bytes = (int) (length > 8192 ? 8192 : length);
440
441             try
442             {
443                 bytes = in.read(buffer, 0, bytes);
444             }
445             catch (IOException JavaDoc ex)
446             {
447                 try
448                 {
449                     in.close();
450                     out.close();
451                 }
452                 catch (IOException JavaDoc ex1)
453                 {
454                 }
455                 throw new IOException JavaDoc(
456                     "Reading input stream, " + ex.getMessage());
457             }
458
459             if (bytes < 0)
460                 break;
461
462             length -= bytes;
463
464             try
465             {
466                 out.write(buffer, 0, bytes);
467             }
468             catch (IOException JavaDoc ex)
469             {
470                 try
471                 {
472                     in.close();
473                     out.close();
474                 }
475                 catch (IOException JavaDoc ex1)
476                 {
477                 }
478                 throw new IOException JavaDoc(
479                     "Writing output stream, " + ex.getMessage());
480             }
481         }
482
483         try
484         {
485             in.close();
486             out.close();
487         }
488         catch (IOException JavaDoc ex)
489         {
490             throw new IOException JavaDoc("Closing file streams, " + ex.getMessage());
491         }
492     }
493
494     //------------------------------------------------------------------------
495
public static void copyInputToOutput(
496         InputStream JavaDoc input,
497         OutputStream JavaDoc output)
498         throws IOException JavaDoc
499     {
500         BufferedInputStream JavaDoc in = new BufferedInputStream JavaDoc(input);
501         BufferedOutputStream JavaDoc out = new BufferedOutputStream JavaDoc(output);
502         byte buffer[] = new byte[8192];
503         for (int count = 0; count != -1;)
504         {
505             count = in.read(buffer, 0, 8192);
506             if (count != -1)
507                 out.write(buffer, 0, count);
508         }
509
510         try
511         {
512             in.close();
513             out.close();
514         }
515         catch (IOException JavaDoc ex)
516         {
517             throw new IOException JavaDoc("Closing file streams, " + ex.getMessage());
518         }
519     }
520     
521     /**
522      * Encode a string using algorithm specified in web.xml and return the
523      * resulting encrypted password. If exception, the plain credentials
524      * string is returned
525      *
526      * @param password Password or other credentials to use in authenticating
527      * this username
528      * @param algorithm Algorithm used to do the digest
529      *
530      * @return encypted password based on the algorithm.
531      */

532     public static String JavaDoc encodePassword(String JavaDoc password, String JavaDoc algorithm)
533     {
534         byte[] unencodedPassword = password.getBytes();
535
536         MessageDigest JavaDoc md = null;
537
538         try
539         {
540             // first create an instance, given the provider
541
md = MessageDigest.getInstance(algorithm);
542         }
543         catch (Exception JavaDoc e)
544         {
545             mLogger.error("Exception: " + e);
546             return password;
547         }
548
549         md.reset();
550
551         // call the update method one or more times
552
// (useful when you don't know the size of your data, eg. stream)
553
md.update(unencodedPassword);
554
555         // now calculate the hash
556
byte[] encodedPassword = md.digest();
557
558         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
559
560         for (int i = 0; i < encodedPassword.length; i++)
561         {
562             if ((encodedPassword[i] & 0xff) < 0x10)
563             {
564                 buf.append("0");
565             }
566
567             buf.append(Long.toString(encodedPassword[i] & 0xff, 16));
568         }
569
570         return buf.toString();
571     }
572
573     /**
574      * Encode a string using Base64 encoding. Used when storing passwords
575      * as cookies.
576      *
577      * This is weak encoding in that anyone can use the decodeString
578      * routine to reverse the encoding.
579      *
580      * @param str
581      * @return String
582      * @throws IOException
583      */

584     public static String JavaDoc encodeString(String JavaDoc str) throws IOException JavaDoc
585     {
586         sun.misc.BASE64Encoder encoder = new sun.misc.BASE64Encoder();
587         String JavaDoc encodedStr = encoder.encodeBuffer(str.getBytes());
588
589         return (encodedStr.trim());
590     }
591
592     /**
593      * Decode a string using Base64 encoding.
594      *
595      * @param str
596      * @return String
597      * @throws IOException
598      */

599     public static String JavaDoc decodeString(String JavaDoc str) throws IOException JavaDoc
600     {
601         sun.misc.BASE64Decoder dec = new sun.misc.BASE64Decoder();
602         String JavaDoc value = new String JavaDoc(dec.decodeBuffer(str));
603
604         return (value);
605     }
606     
607     /**
608      * Strips HTML and truncates.
609      */

610     public static String JavaDoc truncate(
611             String JavaDoc str, int lower, int upper, String JavaDoc appendToEnd)
612     {
613         // strip markup from the string
614
String JavaDoc str2 = removeHTML(str, false);
615         
616         // quickly adjust the upper if it is set lower than 'lower'
617
if (upper < lower)
618         {
619             upper = lower;
620         }
621         
622         // now determine if the string fits within the upper limit
623
// if it does, go straight to return, do not pass 'go' and collect $200
624
if(str2.length() > upper)
625         {
626             // the magic location int
627
int loc;
628         
629             // first we determine where the next space appears after lower
630
loc = str2.lastIndexOf(' ', upper);
631             
632             // now we'll see if the location is greater than the lower limit
633
if(loc >= lower)
634             {
635                 // yes it was, so we'll cut it off here
636
str2 = str2.substring(0, loc);
637             }
638             else
639             {
640                 // no it wasnt, so we'll cut it off at the upper limit
641
str2 = str2.substring(0, upper);
642                 loc = upper;
643             }
644            
645             // the string was truncated, so we append the appendToEnd String
646
str2 = str2 + appendToEnd;
647         }
648         
649         return str2;
650     }
651     
652     /**
653      * This method based on code from the String taglib at Apache Jakarta:
654      * http://cvs.apache.org/viewcvs/jakarta-taglibs/string/src/org/apache/taglibs/string/util/StringW.java?rev=1.16&content-type=text/vnd.viewcvs-markup
655      * Copyright (c) 1999 The Apache Software Foundation.
656      * Author: Henri Yandell bayard@generationjava.com
657      *
658      * @param str
659      * @param lower
660      * @param upper
661      * @param appendToEnd
662      * @return
663      */

664     public static String JavaDoc truncateNicely(String JavaDoc str, int lower, int upper, String JavaDoc appendToEnd)
665     {
666         // strip markup from the string
667
String JavaDoc str2 = removeHTML(str, false);
668         boolean diff = (str2.length() < str.length());
669         
670         // quickly adjust the upper if it is set lower than 'lower'
671
if(upper < lower) {
672             upper = lower;
673         }
674         
675         // now determine if the string fits within the upper limit
676
// if it does, go straight to return, do not pass 'go' and collect $200
677
if(str2.length() > upper) {
678             // the magic location int
679
int loc;
680         
681             // first we determine where the next space appears after lower
682
loc = str2.lastIndexOf(' ', upper);
683             
684             // now we'll see if the location is greater than the lower limit
685
if(loc >= lower) {
686                 // yes it was, so we'll cut it off here
687
str2 = str2.substring(0, loc);
688             } else {
689                 // no it wasnt, so we'll cut it off at the upper limit
690
str2 = str2.substring(0, upper);
691                 loc = upper;
692             }
693             
694             // HTML was removed from original str
695
if (diff)
696             {
697                 
698                 // location of last space in truncated string
699
loc = str2.lastIndexOf(' ', loc);
700                 
701                 // get last "word" in truncated string (add 1 to loc to eliminate space
702
String JavaDoc str3 = str2.substring(loc+1);
703                 
704                 // find this fragment in original str, from 'loc' position
705
loc = str.indexOf(str3, loc) + str3.length();
706                 
707                 // get truncated string from original str, given new 'loc'
708
str2 = str.substring(0, loc);
709                 
710                 // get all the HTML from original str after loc
711
str3 = extractHTML(str.substring(loc));
712                 
713                 // remove any tags which generate visible HTML
714
// This call is unecessary, all HTML has already been stripped
715
//str3 = removeVisibleHTMLTags(str3);
716

717                 // append the appendToEnd String and
718
// add extracted HTML back onto truncated string
719
str = str2 + appendToEnd + str3;
720             }
721             else
722             {
723                 // the string was truncated, so we append the appendToEnd String
724
str = str2 + appendToEnd;
725             }
726     
727         }
728         
729         return str;
730     }
731     
732     public static String JavaDoc truncateText(String JavaDoc str, int lower, int upper, String JavaDoc appendToEnd)
733     {
734         // strip markup from the string
735
String JavaDoc str2 = removeHTML(str, false);
736         boolean diff = (str2.length() < str.length());
737         
738         // quickly adjust the upper if it is set lower than 'lower'
739
if(upper < lower) {
740             upper = lower;
741         }
742         
743         // now determine if the string fits within the upper limit
744
// if it does, go straight to return, do not pass 'go' and collect $200
745
if(str2.length() > upper) {
746             // the magic location int
747
int loc;
748         
749             // first we determine where the next space appears after lower
750
loc = str2.lastIndexOf(' ', upper);
751             
752             // now we'll see if the location is greater than the lower limit
753
if(loc >= lower) {
754                 // yes it was, so we'll cut it off here
755
str2 = str2.substring(0, loc);
756             } else {
757                 // no it wasnt, so we'll cut it off at the upper limit
758
str2 = str2.substring(0, upper);
759                 loc = upper;
760             }
761             // the string was truncated, so we append the appendToEnd String
762
str = str2 + appendToEnd;
763         }
764         return str;
765     }
766     
767     /**
768      * @param str
769      * @return
770      */

771     private static String JavaDoc stripLineBreaks(String JavaDoc str)
772     {
773         // TODO: use a string buffer, ignore case !
774
str = str.replaceAll("<br>", "");
775         str = str.replaceAll("<br/>", "");
776         str = str.replaceAll("<br />", "");
777         str = str.replaceAll("<p></p>", "");
778         str = str.replaceAll("<p/>","");
779         str = str.replaceAll("<p />","");
780         return str;
781     }
782     
783     /**
784      * Need need to get rid of any user-visible HTML tags once all text has been
785      * removed such as &lt;BR&gt;. This sounds like a better approach than removing
786      * all HTML tags and taking the chance to leave some tags un-closed.
787      *
788      * WARNING: this method has serious performance problems a
789      *
790      * @author Alexis Moussine-Pouchkine <alexis.moussine-pouchkine@france.sun.com>
791      * @author Lance Lavandowska
792      * @param str the String object to modify
793      * @return the new String object without the HTML "visible" tags
794      */

795     private static String JavaDoc removeVisibleHTMLTags(String JavaDoc str)
796     {
797         str = stripLineBreaks(str);
798         StringBuffer JavaDoc result = new StringBuffer JavaDoc(str);
799         StringBuffer JavaDoc lcresult = new StringBuffer JavaDoc(str.toLowerCase());
800
801         // <img should take care of smileys
802
String JavaDoc[] visibleTags = {"<img"}; // are there others to add?
803
int stringIndex;
804         for ( int j = 0 ; j < visibleTags.length ; j++ ) {
805             while ( (stringIndex = lcresult.indexOf(visibleTags[j])) != -1 ) {
806                 if ( visibleTags[j].endsWith(">") ) {
807                     result.delete(stringIndex, stringIndex+visibleTags[j].length() );
808                     lcresult.delete(stringIndex, stringIndex+visibleTags[j].length() );
809                 } else {
810                     // need to delete everything up until next closing '>', for <img for instance
811
int endIndex = result.indexOf(">", stringIndex);
812                     if (endIndex > -1) {
813                         // only delete it if we find the end! If we don't the HTML may be messed up, but we
814
// can't safely delete anything.
815
result.delete(stringIndex, endIndex + 1 );
816                         lcresult.delete(stringIndex, endIndex + 1 );
817                     }
818                 }
819             }
820         }
821
822         // TODO: This code is buggy by nature. It doesn't deal with nesting of tags properly.
823
// remove certain elements with open & close tags
824
String JavaDoc[] openCloseTags = {"li", "a", "div", "h1", "h2", "h3", "h4"}; // more ?
825
for (int j = 0; j < openCloseTags.length; j++)
826         {
827             // could this be better done with a regular expression?
828
String JavaDoc closeTag = "</"+openCloseTags[j]+">";
829             int lastStringIndex = 0;
830             while ( (stringIndex = lcresult.indexOf( "<"+openCloseTags[j], lastStringIndex)) > -1)
831             {
832                 lastStringIndex = stringIndex;
833                 // Try to find the matching closing tag (ignores possible nesting!)
834
int endIndex = lcresult.indexOf(closeTag, stringIndex);
835                 if (endIndex > -1) {
836                     // If we found it delete it.
837
result.delete(stringIndex, endIndex+closeTag.length());
838                     lcresult.delete(stringIndex, endIndex+closeTag.length());
839                 } else {
840                     // Try to see if it is a self-closed empty content tag, i.e. closed with />.
841
endIndex = lcresult.indexOf(">", stringIndex);
842                     int nextStart = lcresult.indexOf("<", stringIndex+1);
843                     if (endIndex > stringIndex && lcresult.charAt(endIndex-1) == '/' && (endIndex < nextStart || nextStart == -1)) {
844                         // Looks like it, so remove it.
845
result.delete(stringIndex, endIndex + 1);
846                         lcresult.delete(stringIndex, endIndex + 1);
847
848                     }
849                 }
850             }
851         }
852         
853         return result.toString();
854     }
855
856     /**
857      * Extract (keep) JUST the HTML from the String.
858      * @param str
859      * @return
860      */

861     public static String JavaDoc extractHTML(String JavaDoc str)
862     {
863         if (str == null) return "";
864         StringBuffer JavaDoc ret = new StringBuffer JavaDoc(str.length());
865         int start = 0;
866         int beginTag = str.indexOf("<");
867         int endTag = 0;
868         if (beginTag == -1)
869             return str;
870
871         while (beginTag >= start)
872         {
873             endTag = str.indexOf(">", beginTag);
874             
875             // if endTag found, keep tag
876
if (endTag > -1)
877             {
878                 ret.append( str.substring(beginTag, endTag+1) );
879                 
880                 // move start forward and find another tag
881
start = endTag + 1;
882                 beginTag = str.indexOf("<", start);
883             }
884             // if no endTag found, break
885
else
886             {
887                 break;
888             }
889         }
890         return ret.toString();
891     }
892
893     
894     public static String JavaDoc hexEncode(String JavaDoc str)
895     {
896         if (StringUtils.isEmpty(str)) return str;
897         
898         return RegexUtil.encode(str);
899     }
900     
901     public static String JavaDoc encodeEmail(String JavaDoc str)
902     {
903         return RegexUtil.encodeEmail(str);
904     }
905
906     /**
907      * Converts a character to HTML or XML entity.
908      *
909      * @param ch The character to convert.
910      * @param xml Convert the character to XML if set to true.
911      * @author Erik C. Thauvin
912      *
913      * @return The converted string.
914      */

915     public static final String JavaDoc charToHTML(char ch, boolean xml)
916     {
917         int c;
918
919         // Convert left bracket
920
if (ch == '<')
921         {
922             return ("&lt;");
923         }
924
925         // Convert left bracket
926
else if (ch == '>')
927         {
928             return ("&gt;");
929         }
930
931         // Convert ampersand
932
else if (ch == '&')
933         {
934             return ("&amp;");
935         }
936
937         // Commented out to eliminate redundant numeric character codes (ROL-507)
938
// High-ASCII character
939
//else if (ch >= 128)
940
//{
941
//c = ch;
942
//return ("&#" + c + ';');
943
//}
944

945         // Convert double quote
946
else if (xml && (ch == '"'))
947         {
948             return ("&quot;");
949         }
950
951         // Convert single quote
952
else if (xml && (ch == '\''))
953         {
954             return ("&#39;");
955         }
956
957         // No conversion
958
else
959         {
960             // Return character as string
961
return (String.valueOf(ch));
962         }
963     }
964
965     /**
966      * Converts a text string to HTML or XML entities.
967      *
968      * @author Erik C. Thauvin
969      * @param text The string to convert.
970      * @param xml Convert the string to XML if set to true.
971      *
972      * @return The converted string.
973      */

974     public static final String JavaDoc textToHTML(String JavaDoc text, boolean xml)
975     {
976         final StringBuffer JavaDoc html = new StringBuffer JavaDoc();
977
978         // Loop thru each characters of the text
979
for (int i = 0; i < text.length(); i++)
980         {
981             // Convert character to HTML/XML
982
html.append(charToHTML(text.charAt(i), xml));
983         }
984
985         // Return HTML/XML string
986
return html.toString();
987     }
988
989     /**
990      * Converts a text string to HTML or XML entities.
991      *
992      * @param text The string to convert.
993      * @author Erik C. Thauvin
994      * @return The converted string.
995      */

996     public static final String JavaDoc textToHTML(String JavaDoc text)
997     {
998         return textToHTML(text, false);
999     }
1000
1001    /**
1002     * Converts a text string to XML entities.
1003     *
1004     * @param text The string to convert.
1005     * @author Erik C. Thauvin
1006     * @return The converted string.
1007     */

1008    public static final String JavaDoc textToXML(String JavaDoc text)
1009    {
1010        return textToHTML(text, true);
1011    }
1012
1013    /**
1014     * Converts a text string to HTML or XML entities.
1015     * @param text The string to convert.
1016     * @return The converted string.
1017     */

1018    public static final String JavaDoc textToCDATA(String JavaDoc text)
1019    {
1020        final StringBuffer JavaDoc html = new StringBuffer JavaDoc();
1021
1022        // Loop thru each characters of the text
1023
for (int i = 0; i < text.length(); i++)
1024        {
1025            // Convert character to HTML/XML
1026
html.append(charToCDATA(text.charAt(i)));
1027        }
1028
1029        // Return HTML/XML string
1030
return html.toString();
1031    }
1032
1033    /**
1034     * Converts a character to CDATA character.
1035     * @param ch The character to convert.
1036     * @return The converted string.
1037     */

1038    public static final String JavaDoc charToCDATA(char ch)
1039    {
1040        int c;
1041
1042        if (ch >= 128)
1043        {
1044            c = ch;
1045
1046            return ("&#" + c + ';');
1047        }
1048
1049        // No conversion
1050
else
1051        {
1052            // Return character as string
1053
return (String.valueOf(ch));
1054        }
1055    }
1056    
1057    public static final String JavaDoc encode(String JavaDoc s)
1058    {
1059        try
1060        {
1061            return URLEncoder.encode(s, "utf-8");
1062        }
1063        catch (UnsupportedEncodingException JavaDoc e)
1064        {
1065            return s;
1066        }
1067    }
1068
1069    /**
1070     * @param string
1071     * @return
1072     */

1073    public static int stringToInt(String JavaDoc string)
1074    {
1075        try
1076        {
1077            return Integer.valueOf(string).intValue();
1078        }
1079        catch (NumberFormatException JavaDoc e)
1080        {
1081            mLogger.debug("Invalid Integer:" + string);
1082        }
1083        return 0;
1084    }
1085    
1086    /**
1087     * Code (stolen from Pebble) to add rel="nofollow" string to all links in HTML.
1088     */

1089    public static String JavaDoc addNofollow(String JavaDoc html)
1090    {
1091        if (html == null || html.length() == 0)
1092        {
1093            return html;
1094        }
1095        Matcher JavaDoc m = mLinkPattern.matcher(html);
1096        StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
1097        while (m.find())
1098        {
1099            int start = m.start();
1100            int end = m.end();
1101            String JavaDoc link = html.substring(start, end);
1102            buf.append(html.substring(0, start));
1103            if (link.indexOf("rel=\"nofollow\"") == -1)
1104            {
1105                buf.append(
1106                    link.substring(0, link.length() - 1) + " rel=\"nofollow\">");
1107            }
1108            else
1109            {
1110                buf.append(link);
1111            }
1112            html = html.substring(end, html.length());
1113            m = mLinkPattern.matcher(html);
1114        }
1115        buf.append(html);
1116        return buf.toString();
1117    }
1118    
1119    public static String JavaDoc unescapeHTML(String JavaDoc str)
1120    {
1121        return StringEscapeUtils.unescapeHtml(str);
1122    }
1123}
1124
Popular Tags