KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > Ostermiller > util > StringHelper


1 /*
2  * Static String formatting and query routines.
3  * Copyright (C) 2001,2002 Stephen Ostermiller
4  * http://ostermiller.org/contact.pl?regarding=Java+Utilities
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * See COPYING.TXT for details.
17  */

18
19 package com.Ostermiller.util;
20
21 import java.util.HashMap JavaDoc;
22 import java.util.regex.Pattern JavaDoc;
23
24 /**
25  * Utilities for String formatting, manipulation, and queries.
26  * More information about this class is available from <a target="_top" HREF=
27  * "http://ostermiller.org/utils/StringHelper.html">ostermiller.org</a>.
28  *
29  * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
30  * @since ostermillerutils 1.00.00
31  */

32 public class StringHelper {
33
34     /**
35      * Pad the beginning of the given String with spaces until
36      * the String is of the given length.
37      * <p>
38      * If a String is longer than the desired length,
39      * it will not be truncated, however no padding
40      * will be added.
41      *
42      * @param s String to be padded.
43      * @param length desired length of result.
44      * @return padded String.
45      * @throws NullPointerException if s is null.
46      *
47      * @since ostermillerutils 1.00.00
48      */

49     public static String JavaDoc prepad(String JavaDoc s, int length){
50         return prepad(s, length, ' ');
51     }
52
53     /**
54      * Pre-pend the given character to the String until
55      * the result is the desired length.
56      * <p>
57      * If a String is longer than the desired length,
58      * it will not be truncated, however no padding
59      * will be added.
60      *
61      * @param s String to be padded.
62      * @param length desired length of result.
63      * @param c padding character.
64      * @return padded String.
65      * @throws NullPointerException if s is null.
66      *
67      * @since ostermillerutils 1.00.00
68      */

69     public static String JavaDoc prepad(String JavaDoc s, int length, char c){
70         int needed = length - s.length();
71         if (needed <= 0){
72             return s;
73         }
74         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(length);
75         for (int i=0; i<needed; i++){
76             sb.append(c);
77         }
78         sb.append(s);
79         return (sb.toString());
80     }
81
82     /**
83      * Pad the end of the given String with spaces until
84      * the String is of the given length.
85      * <p>
86      * If a String is longer than the desired length,
87      * it will not be truncated, however no padding
88      * will be added.
89      *
90      * @param s String to be padded.
91      * @param length desired length of result.
92      * @return padded String.
93      * @throws NullPointerException if s is null.
94      *
95      * @since ostermillerutils 1.00.00
96      */

97     public static String JavaDoc postpad(String JavaDoc s, int length){
98         return postpad(s, length, ' ');
99     }
100
101     /**
102      * Append the given character to the String until
103      * the result is the desired length.
104      * <p>
105      * If a String is longer than the desired length,
106      * it will not be truncated, however no padding
107      * will be added.
108      *
109      * @param s String to be padded.
110      * @param length desired length of result.
111      * @param c padding character.
112      * @return padded String.
113      * @throws NullPointerException if s is null.
114      *
115      * @since ostermillerutils 1.00.00
116      */

117     public static String JavaDoc postpad(String JavaDoc s, int length, char c){
118         int needed = length - s.length();
119         if (needed <= 0){
120             return s;
121         }
122         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(length);
123         sb.append(s);
124         for (int i=0; i<needed; i++){
125             sb.append(c);
126         }
127         return (sb.toString());
128     }
129
130     /**
131      * Pad the beginning and end of the given String with spaces until
132      * the String is of the given length. The result is that the original
133      * String is centered in the middle of the new string.
134      * <p>
135      * If the number of characters to pad is even, then the padding
136      * will be split evenly between the beginning and end, otherwise,
137      * the extra character will be added to the end.
138      * <p>
139      * If a String is longer than the desired length,
140      * it will not be truncated, however no padding
141      * will be added.
142      *
143      * @param s String to be padded.
144      * @param length desired length of result.
145      * @return padded String.
146      * @throws NullPointerException if s is null.
147      *
148      * @since ostermillerutils 1.00.00
149      */

150     public static String JavaDoc midpad(String JavaDoc s, int length){
151         return midpad(s, length, ' ');
152     }
153
154     /**
155      * Pad the beginning and end of the given String with the given character
156      * until the result is the desired length. The result is that the original
157      * String is centered in the middle of the new string.
158      * <p>
159      * If the number of characters to pad is even, then the padding
160      * will be split evenly between the beginning and end, otherwise,
161      * the extra character will be added to the end.
162      * <p>
163      * If a String is longer than the desired length,
164      * it will not be truncated, however no padding
165      * will be added.
166      *
167      * @param s String to be padded.
168      * @param length desired length of result.
169      * @param c padding character.
170      * @return padded String.
171      * @throws NullPointerException if s is null.
172      *
173      * @since ostermillerutils 1.00.00
174      */

175     public static String JavaDoc midpad(String JavaDoc s, int length, char c){
176         int needed = length - s.length();
177         if (needed <= 0){
178             return s;
179         }
180         int beginning = needed / 2;
181         int end = beginning + needed % 2;
182         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(length);
183         for (int i=0; i<beginning; i++){
184             sb.append(c);
185         }
186         sb.append(s);
187         for (int i=0; i<end; i++){
188             sb.append(c);
189         }
190         return (sb.toString());
191     }
192
193     /**
194      * Split the given String into tokens.
195      * <P>
196      * This method is meant to be similar to the split
197      * function in other programming languages but it does
198      * not use regular expressions. Rather the String is
199      * split on a single String literal.
200      * <P>
201      * Unlike java.util.StringTokenizer which accepts
202      * multiple character tokens as delimiters, the delimiter
203      * here is a single String literal.
204      * <P>
205      * Each null token is returned as an empty String.
206      * Delimiters are never returned as tokens.
207      * <P>
208      * If there is no delimiter because it is either empty or
209      * null, the only element in the result is the original String.
210      * <P>
211      * StringHelper.split("1-2-3", "-");<br>
212      * result: {"1", "2", "3"}<br>
213      * StringHelper.split("-1--2-", "-");<br>
214      * result: {"", "1", ,"", "2", ""}<br>
215      * StringHelper.split("123", "");<br>
216      * result: {"123"}<br>
217      * StringHelper.split("1-2---3----4", "--");<br>
218      * result: {"1-2", "-3", "", "4"}<br>
219      *
220      * @param s String to be split.
221      * @param delimiter String literal on which to split.
222      * @return an array of tokens.
223      * @throws NullPointerException if s is null.
224      *
225      * @since ostermillerutils 1.00.00
226      */

227     public static String JavaDoc[] split(String JavaDoc s, String JavaDoc delimiter){
228         int delimiterLength;
229         // the next statement has the side effect of throwing a null pointer
230
// exception if s is null.
231
int stringLength = s.length();
232         if (delimiter == null || (delimiterLength = delimiter.length()) == 0){
233             // it is not inherently clear what to do if there is no delimiter
234
// On one hand it would make sense to return each character because
235
// the null String can be found between each pair of characters in
236
// a String. However, it can be found many times there and we don'
237
// want to be returning multiple null tokens.
238
// returning the whole String will be defined as the correct behavior
239
// in this instance.
240
return new String JavaDoc[] {s};
241         }
242
243         // a two pass solution is used because a one pass solution would
244
// require the possible resizing and copying of memory structures
245
// In the worst case it would have to be resized n times with each
246
// resize having a O(n) copy leading to an O(n^2) algorithm.
247

248         int count;
249         int start;
250         int end;
251
252         // Scan s and count the tokens.
253
count = 0;
254         start = 0;
255         while((end = s.indexOf(delimiter, start)) != -1) {
256             count++;
257             start = end + delimiterLength;
258         }
259         count++;
260
261         // allocate an array to return the tokens,
262
// we now know how big it should be
263
String JavaDoc[] result = new String JavaDoc[count];
264
265         // Scan s again, but this time pick out the tokens
266
count = 0;
267         start = 0;
268         while((end = s.indexOf(delimiter, start)) != -1) {
269             result[count] = (s.substring(start, end));
270             count++;
271             start = end + delimiterLength;
272         }
273         end = stringLength;
274         result[count] = s.substring(start, end);
275
276         return (result);
277     }
278
279     /**
280      * Replace occurrences of a substring.
281      *
282      * StringHelper.replace("1-2-3", "-", "|");<br>
283      * result: "1|2|3"<br>
284      * StringHelper.replace("-1--2-", "-", "|");<br>
285      * result: "|1||2|"<br>
286      * StringHelper.replace("123", "", "|");<br>
287      * result: "123"<br>
288      * StringHelper.replace("1-2---3----4", "--", "|");<br>
289      * result: "1-2|-3||4"<br>
290      * StringHelper.replace("1-2---3----4", "--", "---");<br>
291      * result: "1-2----3------4"<br>
292      *
293      * @param s String to be modified.
294      * @param find String to find.
295      * @param replace String to replace.
296      * @return a string with all the occurrences of the string to find replaced.
297      * @throws NullPointerException if s is null.
298      *
299      * @since ostermillerutils 1.00.00
300      */

301     public static String JavaDoc replace(String JavaDoc s, String JavaDoc find, String JavaDoc replace){
302         int findLength;
303         // the next statement has the side effect of throwing a null pointer
304
// exception if s is null.
305
int stringLength = s.length();
306         if (find == null || (findLength = find.length()) == 0){
307             // If there is nothing to find, we won't try and find it.
308
return s;
309         }
310         if (replace == null){
311             // a null string and an empty string are the same
312
// for replacement purposes.
313
replace = "";
314         }
315         int replaceLength = replace.length();
316
317         // We need to figure out how long our resulting string will be.
318
// This is required because without it, the possible resizing
319
// and copying of memory structures could lead to an unacceptable runtime.
320
// In the worst case it would have to be resized n times with each
321
// resize having a O(n) copy leading to an O(n^2) algorithm.
322
int length;
323         if (findLength == replaceLength){
324             // special case in which we don't need to count the replacements
325
// because the count falls out of the length formula.
326
length = stringLength;
327         } else {
328             int count;
329             int start;
330             int end;
331
332             // Scan s and count the number of times we find our target.
333
count = 0;
334             start = 0;
335             while((end = s.indexOf(find, start)) != -1) {
336                 count++;
337                 start = end + findLength;
338             }
339             if (count == 0){
340                 // special case in which on first pass, we find there is nothing
341
// to be replaced. No need to do a second pass or create a string buffer.
342
return s;
343             }
344             length = stringLength - (count * (findLength - replaceLength));
345         }
346
347         int start = 0;
348         int end = s.indexOf(find, start);
349         if (end == -1){
350             // nothing was found in the string to replace.
351
// we can get this if the find and replace strings
352
// are the same length because we didn't check before.
353
// in this case, we will return the original string
354
return s;
355         }
356         // it looks like we actually have something to replace
357
// *sigh* allocate memory for it.
358
StringBuffer JavaDoc sb = new StringBuffer JavaDoc(length);
359
360         // Scan s and do the replacements
361
while (end != -1) {
362             sb.append(s.substring(start, end));
363             sb.append(replace);
364             start = end + findLength;
365             end = s.indexOf(find, start);
366         }
367         end = stringLength;
368         sb.append(s.substring(start, end));
369
370         return (sb.toString());
371     }
372
373     /**
374      * Replaces characters that may be confused by a HTML
375      * parser with their equivalent character entity references.
376      * <p>
377      * Any data that will appear as text on a web page should
378      * be be escaped. This is especially important for data
379      * that comes from untrusted sources such as Internet users.
380      * A common mistake in CGI programming is to ask a user for
381      * data and then put that data on a web page. For example:<pre>
382      * Server: What is your name?
383      * User: &lt;b&gt;Joe&lt;b&gt;
384      * Server: Hello <b>Joe</b>, Welcome</pre>
385      * If the name is put on the page without checking that it doesn't
386      * contain HTML code or without sanitizing that HTML code, the user
387      * could reformat the page, insert scripts, and control the the
388      * content on your web server.
389      * <p>
390      * This method will replace HTML characters such as &gt; with their
391      * HTML entity reference (&amp;gt;) so that the html parser will
392      * be sure to interpret them as plain text rather than HTML or script.
393      * <p>
394      * This method should be used for both data to be displayed in text
395      * in the html document, and data put in form elements. For example:<br>
396      * <code>&lt;html&gt;&lt;body&gt;<i>This in not a &amp;lt;tag&amp;gt;
397      * in HTML</i>&lt;/body&gt;&lt;/html&gt;</code><br>
398      * and<br>
399      * <code>&lt;form&gt;&lt;input type="hidden" name="date" value="<i>This data could
400      * be &amp;quot;malicious&amp;quot;</i>"&gt;&lt;/form&gt;</code><br>
401      * In the second example, the form data would be properly be resubmitted
402      * to your cgi script in the URLEncoded format:<br>
403      * <code><i>This data could be %22malicious%22</i></code>
404      *
405      * @param s String to be escaped
406      * @return escaped String
407      * @throws NullPointerException if s is null.
408      *
409      * @since ostermillerutils 1.00.00
410      */

411     public static String JavaDoc escapeHTML(String JavaDoc s){
412         int length = s.length();
413         int newLength = length;
414         boolean someCharacterEscaped = false;
415         // first check for characters that might
416
// be dangerous and calculate a length
417
// of the string that has escapes.
418
for (int i=0; i<length; i++){
419             char c = s.charAt(i);
420             int cint = 0xffff & c;
421             if (cint < 32){
422                 switch(c){
423                     case '\r':
424                     case '\n':
425                     case '\t':
426                     case '\f':{
427                     } break;
428                     default: {
429                         newLength -= 1;
430                         someCharacterEscaped = true;
431                     }
432                 }
433             } else {
434                 switch(c){
435                     case '\"':{
436                         newLength += 5;
437                         someCharacterEscaped = true;
438                     } break;
439                     case '&':
440                     case '\'':{
441                         newLength += 4;
442                         someCharacterEscaped = true;
443                     } break;
444                     case '<':
445                     case '>':{
446                         newLength += 3;
447                         someCharacterEscaped = true;
448                     } break;
449                 }
450             }
451         }
452         if (!someCharacterEscaped){
453             // nothing to escape in the string
454
return s;
455         }
456         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(newLength);
457         for (int i=0; i<length; i++){
458             char c = s.charAt(i);
459             int cint = 0xffff & c;
460             if (cint < 32){
461                 switch(c){
462                     case '\r':
463                     case '\n':
464                     case '\t':
465                     case '\f':{
466                         sb.append(c);
467                     } break;
468                     default: {
469                         // Remove this character
470
}
471                 }
472             } else {
473                 switch(c){
474                     case '\"':{
475                         sb.append("&quot;");
476                     } break;
477                     case '\'':{
478                         sb.append("&#39;");
479                     } break;
480                     case '&':{
481                         sb.append("&amp;");
482                     } break;
483                     case '<':{
484                         sb.append("&lt;");
485                     } break;
486                     case '>':{
487                         sb.append("&gt;");
488                     } break;
489                     default: {
490                         sb.append(c);
491                     }
492                 }
493             }
494         }
495         return sb.toString();
496     }
497
498     /**
499      * Replaces characters that may be confused by an SQL
500      * parser with their equivalent escape characters.
501      * <p>
502      * Any data that will be put in an SQL query should
503      * be be escaped. This is especially important for data
504      * that comes from untrusted sources such as Internet users.
505      * <p>
506      * For example if you had the following SQL query:<br>
507      * <code>"SELECT * FROM addresses WHERE name='" + name + "' AND private='N'"</code><br>
508      * Without this function a user could give <code>" OR 1=1 OR ''='"</code>
509      * as their name causing the query to be:<br>
510      * <code>"SELECT * FROM addresses WHERE name='' OR 1=1 OR ''='' AND private='N'"</code><br>
511      * which will give all addresses, including private ones.<br>
512      * Correct usage would be:<br>
513      * <code>"SELECT * FROM addresses WHERE name='" + StringHelper.escapeSQL(name) + "' AND private='N'"</code><br>
514      * <p>
515      * Another way to avoid this problem is to use a PreparedStatement
516      * with appropriate placeholders.
517      *
518      * @param s String to be escaped
519      * @return escaped String
520      * @throws NullPointerException if s is null.
521      *
522      * @since ostermillerutils 1.00.00
523      */

524     public static String JavaDoc escapeSQL(String JavaDoc s){
525         int length = s.length();
526         int newLength = length;
527         // first check for characters that might
528
// be dangerous and calculate a length
529
// of the string that has escapes.
530
for (int i=0; i<length; i++){
531             char c = s.charAt(i);
532             switch(c){
533                 case '\\':
534                 case '\"':
535                 case '\'':
536                 case '\0':{
537                     newLength += 1;
538                 } break;
539             }
540         }
541         if (length == newLength){
542             // nothing to escape in the string
543
return s;
544         }
545         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(newLength);
546         for (int i=0; i<length; i++){
547             char c = s.charAt(i);
548             switch(c){
549                 case '\\':{
550                     sb.append("\\\\");
551                 } break;
552                 case '\"':{
553                     sb.append("\\\"");
554                 } break;
555                 case '\'':{
556                     sb.append("\\\'");
557                 } break;
558                 case '\0':{
559                     sb.append("\\0");
560                 } break;
561                 default: {
562                     sb.append(c);
563                 }
564             }
565         }
566         return sb.toString();
567     }
568
569     /**
570      * Replaces characters that are not allowed in a Java style
571      * string literal with their escape characters. Specifically
572      * quote ("), single quote ('), new line (\n), carriage return (\r),
573      * and backslash (\), and tab (\t) are escaped.
574      *
575      * @param s String to be escaped
576      * @return escaped String
577      * @throws NullPointerException if s is null.
578      *
579      * @since ostermillerutils 1.00.00
580      */

581     public static String JavaDoc escapeJavaLiteral(String JavaDoc s){
582         int length = s.length();
583         int newLength = length;
584         // first check for characters that might
585
// be dangerous and calculate a length
586
// of the string that has escapes.
587
for (int i=0; i<length; i++){
588             char c = s.charAt(i);
589             switch(c){
590                 case '\"':
591                 case '\'':
592                 case '\n':
593                 case '\r':
594                 case '\t':
595                 case '\\':{
596                     newLength += 1;
597                 } break;
598             }
599         }
600         if (length == newLength){
601             // nothing to escape in the string
602
return s;
603         }
604         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(newLength);
605         for (int i=0; i<length; i++){
606             char c = s.charAt(i);
607             switch(c){
608                 case '\"':{
609                     sb.append("\\\"");
610                 } break;
611                 case '\'':{
612                     sb.append("\\\'");
613                 } break;
614                 case '\n':{
615                     sb.append("\\n");
616                 } break;
617                 case '\r':{
618                     sb.append("\\r");
619                 } break;
620                 case '\t':{
621                     sb.append("\\t");
622                 } break;
623                 case '\\':{
624                     sb.append("\\\\");
625                 } break;
626                 default: {
627                     sb.append(c);
628                 }
629             }
630         }
631         return sb.toString();
632     }
633
634     /**
635      * Trim any of the characters contained in the second
636      * string from the beginning and end of the first.
637      *
638      * @param s String to be trimmed.
639      * @param c list of characters to trim from s.
640      * @return trimmed String.
641      * @throws NullPointerException if s is null.
642      *
643      * @since ostermillerutils 1.00.00
644      */

645     public static String JavaDoc trim(String JavaDoc s, String JavaDoc c){
646         int length = s.length();
647         if (c == null){
648             return s;
649         }
650         int cLength = c.length();
651         if (c.length() == 0){
652             return s;
653         }
654         int start = 0;
655         int end = length;
656         boolean found; // trim-able character found.
657
int i;
658         // Start from the beginning and find the
659
// first non-trim-able character.
660
found = false;
661         for (i=0; !found && i<length; i++){
662             char ch = s.charAt(i);
663             found = true;
664             for (int j=0; found && j<cLength; j++){
665                 if (c.charAt(j) == ch) found = false;
666             }
667         }
668         // if all characters are trim-able.
669
if (!found) return "";
670         start = i-1;
671         // Start from the end and find the
672
// last non-trim-able character.
673
found = false;
674         for (i=length-1; !found && i>=0; i--){
675             char ch = s.charAt(i);
676             found = true;
677             for (int j=0; found && j<cLength; j++){
678                 if (c.charAt(j) == ch) found = false;
679             }
680         }
681         end = i+2;
682         return s.substring(start, end);
683     }
684
685     private static HashMap JavaDoc<String JavaDoc, Integer JavaDoc> htmlEntities = new HashMap JavaDoc<String JavaDoc, Integer JavaDoc>();
686     static {
687         htmlEntities.put("nbsp", new Integer JavaDoc(160));
688         htmlEntities.put("iexcl", new Integer JavaDoc(161));
689         htmlEntities.put("cent", new Integer JavaDoc(162));
690         htmlEntities.put("pound", new Integer JavaDoc(163));
691         htmlEntities.put("curren", new Integer JavaDoc(164));
692         htmlEntities.put("yen", new Integer JavaDoc(165));
693         htmlEntities.put("brvbar", new Integer JavaDoc(166));
694         htmlEntities.put("sect", new Integer JavaDoc(167));
695         htmlEntities.put("uml", new Integer JavaDoc(168));
696         htmlEntities.put("copy", new Integer JavaDoc(169));
697         htmlEntities.put("ordf", new Integer JavaDoc(170));
698         htmlEntities.put("laquo", new Integer JavaDoc(171));
699         htmlEntities.put("not", new Integer JavaDoc(172));
700         htmlEntities.put("shy", new Integer JavaDoc(173));
701         htmlEntities.put("reg", new Integer JavaDoc(174));
702         htmlEntities.put("macr", new Integer JavaDoc(175));
703         htmlEntities.put("deg", new Integer JavaDoc(176));
704         htmlEntities.put("plusmn", new Integer JavaDoc(177));
705         htmlEntities.put("sup2", new Integer JavaDoc(178));
706         htmlEntities.put("sup3", new Integer JavaDoc(179));
707         htmlEntities.put("acute", new Integer JavaDoc(180));
708         htmlEntities.put("micro", new Integer JavaDoc(181));
709         htmlEntities.put("para", new Integer JavaDoc(182));
710         htmlEntities.put("middot", new Integer JavaDoc(183));
711         htmlEntities.put("cedil", new Integer JavaDoc(184));
712         htmlEntities.put("sup1", new Integer JavaDoc(185));
713         htmlEntities.put("ordm", new Integer JavaDoc(186));
714         htmlEntities.put("raquo", new Integer JavaDoc(187));
715         htmlEntities.put("frac14", new Integer JavaDoc(188));
716         htmlEntities.put("frac12", new Integer JavaDoc(189));
717         htmlEntities.put("frac34", new Integer JavaDoc(190));
718         htmlEntities.put("iquest", new Integer JavaDoc(191));
719         htmlEntities.put("Agrave", new Integer JavaDoc(192));
720         htmlEntities.put("Aacute", new Integer JavaDoc(193));
721         htmlEntities.put("Acirc", new Integer JavaDoc(194));
722         htmlEntities.put("Atilde", new Integer JavaDoc(195));
723         htmlEntities.put("Auml", new Integer JavaDoc(196));
724         htmlEntities.put("Aring", new Integer JavaDoc(197));
725         htmlEntities.put("AElig", new Integer JavaDoc(198));
726         htmlEntities.put("Ccedil", new Integer JavaDoc(199));
727         htmlEntities.put("Egrave", new Integer JavaDoc(200));
728         htmlEntities.put("Eacute", new Integer JavaDoc(201));
729         htmlEntities.put("Ecirc", new Integer JavaDoc(202));
730         htmlEntities.put("Euml", new Integer JavaDoc(203));
731         htmlEntities.put("Igrave", new Integer JavaDoc(204));
732         htmlEntities.put("Iacute", new Integer JavaDoc(205));
733         htmlEntities.put("Icirc", new Integer JavaDoc(206));
734         htmlEntities.put("Iuml", new Integer JavaDoc(207));
735         htmlEntities.put("ETH", new Integer JavaDoc(208));
736         htmlEntities.put("Ntilde", new Integer JavaDoc(209));
737         htmlEntities.put("Ograve", new Integer JavaDoc(210));
738         htmlEntities.put("Oacute", new Integer JavaDoc(211));
739         htmlEntities.put("Ocirc", new Integer JavaDoc(212));
740         htmlEntities.put("Otilde", new Integer JavaDoc(213));
741         htmlEntities.put("Ouml", new Integer JavaDoc(214));
742         htmlEntities.put("times", new Integer JavaDoc(215));
743         htmlEntities.put("Oslash", new Integer JavaDoc(216));
744         htmlEntities.put("Ugrave", new Integer JavaDoc(217));
745         htmlEntities.put("Uacute", new Integer JavaDoc(218));
746         htmlEntities.put("Ucirc", new Integer JavaDoc(219));
747         htmlEntities.put("Uuml", new Integer JavaDoc(220));
748         htmlEntities.put("Yacute", new Integer JavaDoc(221));
749         htmlEntities.put("THORN", new Integer JavaDoc(222));
750         htmlEntities.put("szlig", new Integer JavaDoc(223));
751         htmlEntities.put("agrave", new Integer JavaDoc(224));
752         htmlEntities.put("aacute", new Integer JavaDoc(225));
753         htmlEntities.put("acirc", new Integer JavaDoc(226));
754         htmlEntities.put("atilde", new Integer JavaDoc(227));
755         htmlEntities.put("auml", new Integer JavaDoc(228));
756         htmlEntities.put("aring", new Integer JavaDoc(229));
757         htmlEntities.put("aelig", new Integer JavaDoc(230));
758         htmlEntities.put("ccedil", new Integer JavaDoc(231));
759         htmlEntities.put("egrave", new Integer JavaDoc(232));
760         htmlEntities.put("eacute", new Integer JavaDoc(233));
761         htmlEntities.put("ecirc", new Integer JavaDoc(234));
762         htmlEntities.put("euml", new Integer JavaDoc(235));
763         htmlEntities.put("igrave", new Integer JavaDoc(236));
764         htmlEntities.put("iacute", new Integer JavaDoc(237));
765         htmlEntities.put("icirc", new Integer JavaDoc(238));
766         htmlEntities.put("iuml", new Integer JavaDoc(239));
767         htmlEntities.put("eth", new Integer JavaDoc(240));
768         htmlEntities.put("ntilde", new Integer JavaDoc(241));
769         htmlEntities.put("ograve", new Integer JavaDoc(242));
770         htmlEntities.put("oacute", new Integer JavaDoc(243));
771         htmlEntities.put("ocirc", new Integer JavaDoc(244));
772         htmlEntities.put("otilde", new Integer JavaDoc(245));
773         htmlEntities.put("ouml", new Integer JavaDoc(246));
774         htmlEntities.put("divide", new Integer JavaDoc(247));
775         htmlEntities.put("oslash", new Integer JavaDoc(248));
776         htmlEntities.put("ugrave", new Integer JavaDoc(249));
777         htmlEntities.put("uacute", new Integer JavaDoc(250));
778         htmlEntities.put("ucirc", new Integer JavaDoc(251));
779         htmlEntities.put("uuml", new Integer JavaDoc(252));
780         htmlEntities.put("yacute", new Integer JavaDoc(253));
781         htmlEntities.put("thorn", new Integer JavaDoc(254));
782         htmlEntities.put("yuml", new Integer JavaDoc(255));
783         htmlEntities.put("fnof", new Integer JavaDoc(402));
784         htmlEntities.put("Alpha", new Integer JavaDoc(913));
785         htmlEntities.put("Beta", new Integer JavaDoc(914));
786         htmlEntities.put("Gamma", new Integer JavaDoc(915));
787         htmlEntities.put("Delta", new Integer JavaDoc(916));
788         htmlEntities.put("Epsilon", new Integer JavaDoc(917));
789         htmlEntities.put("Zeta", new Integer JavaDoc(918));
790         htmlEntities.put("Eta", new Integer JavaDoc(919));
791         htmlEntities.put("Theta", new Integer JavaDoc(920));
792         htmlEntities.put("Iota", new Integer JavaDoc(921));
793         htmlEntities.put("Kappa", new Integer JavaDoc(922));
794         htmlEntities.put("Lambda", new Integer JavaDoc(923));
795         htmlEntities.put("Mu", new Integer JavaDoc(924));
796         htmlEntities.put("Nu", new Integer JavaDoc(925));
797         htmlEntities.put("Xi", new Integer JavaDoc(926));
798         htmlEntities.put("Omicron", new Integer JavaDoc(927));
799         htmlEntities.put("Pi", new Integer JavaDoc(928));
800         htmlEntities.put("Rho", new Integer JavaDoc(929));
801         htmlEntities.put("Sigma", new Integer JavaDoc(931));
802         htmlEntities.put("Tau", new Integer JavaDoc(932));
803         htmlEntities.put("Upsilon", new Integer JavaDoc(933));
804         htmlEntities.put("Phi", new Integer JavaDoc(934));
805         htmlEntities.put("Chi", new Integer JavaDoc(935));
806         htmlEntities.put("Psi", new Integer JavaDoc(936));
807         htmlEntities.put("Omega", new Integer JavaDoc(937));
808         htmlEntities.put("alpha", new Integer JavaDoc(945));
809         htmlEntities.put("beta", new Integer JavaDoc(946));
810         htmlEntities.put("gamma", new Integer JavaDoc(947));
811         htmlEntities.put("delta", new Integer JavaDoc(948));
812         htmlEntities.put("epsilon", new Integer JavaDoc(949));
813         htmlEntities.put("zeta", new Integer JavaDoc(950));
814         htmlEntities.put("eta", new Integer JavaDoc(951));
815         htmlEntities.put("theta", new Integer JavaDoc(952));
816         htmlEntities.put("iota", new Integer JavaDoc(953));
817         htmlEntities.put("kappa", new Integer JavaDoc(954));
818         htmlEntities.put("lambda", new Integer JavaDoc(955));
819         htmlEntities.put("mu", new Integer JavaDoc(956));
820         htmlEntities.put("nu", new Integer JavaDoc(957));
821         htmlEntities.put("xi", new Integer JavaDoc(958));
822         htmlEntities.put("omicron", new Integer JavaDoc(959));
823         htmlEntities.put("pi", new Integer JavaDoc(960));
824         htmlEntities.put("rho", new Integer JavaDoc(961));
825         htmlEntities.put("sigmaf", new Integer JavaDoc(962));
826         htmlEntities.put("sigma", new Integer JavaDoc(963));
827         htmlEntities.put("tau", new Integer JavaDoc(964));
828         htmlEntities.put("upsilon", new Integer JavaDoc(965));
829         htmlEntities.put("phi", new Integer JavaDoc(966));
830         htmlEntities.put("chi", new Integer JavaDoc(967));
831         htmlEntities.put("psi", new Integer JavaDoc(968));
832         htmlEntities.put("omega", new Integer JavaDoc(969));
833         htmlEntities.put("thetasym", new Integer JavaDoc(977));
834         htmlEntities.put("upsih", new Integer JavaDoc(978));
835         htmlEntities.put("piv", new Integer JavaDoc(982));
836         htmlEntities.put("bull", new Integer JavaDoc(8226));
837         htmlEntities.put("hellip", new Integer JavaDoc(8230));
838         htmlEntities.put("prime", new Integer JavaDoc(8242));
839         htmlEntities.put("Prime", new Integer JavaDoc(8243));
840         htmlEntities.put("oline", new Integer JavaDoc(8254));
841         htmlEntities.put("frasl", new Integer JavaDoc(8260));
842         htmlEntities.put("weierp", new Integer JavaDoc(8472));
843         htmlEntities.put("image", new Integer JavaDoc(8465));
844         htmlEntities.put("real", new Integer JavaDoc(8476));
845         htmlEntities.put("trade", new Integer JavaDoc(8482));
846         htmlEntities.put("alefsym", new Integer JavaDoc(8501));
847         htmlEntities.put("larr", new Integer JavaDoc(8592));
848         htmlEntities.put("uarr", new Integer JavaDoc(8593));
849         htmlEntities.put("rarr", new Integer JavaDoc(8594));
850         htmlEntities.put("darr", new Integer JavaDoc(8595));
851         htmlEntities.put("harr", new Integer JavaDoc(8596));
852         htmlEntities.put("crarr", new Integer JavaDoc(8629));
853         htmlEntities.put("lArr", new Integer JavaDoc(8656));
854         htmlEntities.put("uArr", new Integer JavaDoc(8657));
855         htmlEntities.put("rArr", new Integer JavaDoc(8658));
856         htmlEntities.put("dArr", new Integer JavaDoc(8659));
857         htmlEntities.put("hArr", new Integer JavaDoc(8660));
858         htmlEntities.put("forall", new Integer JavaDoc(8704));
859         htmlEntities.put("part", new Integer JavaDoc(8706));
860         htmlEntities.put("exist", new Integer JavaDoc(8707));
861         htmlEntities.put("empty", new Integer JavaDoc(8709));
862         htmlEntities.put("nabla", new Integer JavaDoc(8711));
863         htmlEntities.put("isin", new Integer JavaDoc(8712));
864         htmlEntities.put("notin", new Integer JavaDoc(8713));
865         htmlEntities.put("ni", new Integer JavaDoc(8715));
866         htmlEntities.put("prod", new Integer JavaDoc(8719));
867         htmlEntities.put("sum", new Integer JavaDoc(8721));
868         htmlEntities.put("minus", new Integer JavaDoc(8722));
869         htmlEntities.put("lowast", new Integer JavaDoc(8727));
870         htmlEntities.put("radic", new Integer JavaDoc(8730));
871         htmlEntities.put("prop", new Integer JavaDoc(8733));
872         htmlEntities.put("infin", new Integer JavaDoc(8734));
873         htmlEntities.put("ang", new Integer JavaDoc(8736));
874         htmlEntities.put("and", new Integer JavaDoc(8743));
875         htmlEntities.put("or", new Integer JavaDoc(8744));
876         htmlEntities.put("cap", new Integer JavaDoc(8745));
877         htmlEntities.put("cup", new Integer JavaDoc(8746));
878         htmlEntities.put("int", new Integer JavaDoc(8747));
879         htmlEntities.put("there4", new Integer JavaDoc(8756));
880         htmlEntities.put("sim", new Integer JavaDoc(8764));
881         htmlEntities.put("cong", new Integer JavaDoc(8773));
882         htmlEntities.put("asymp", new Integer JavaDoc(8776));
883         htmlEntities.put("ne", new Integer JavaDoc(8800));
884         htmlEntities.put("equiv", new Integer JavaDoc(8801));
885         htmlEntities.put("le", new Integer JavaDoc(8804));
886         htmlEntities.put("ge", new Integer JavaDoc(8805));
887         htmlEntities.put("sub", new Integer JavaDoc(8834));
888         htmlEntities.put("sup", new Integer JavaDoc(8835));
889         htmlEntities.put("nsub", new Integer JavaDoc(8836));
890         htmlEntities.put("sube", new Integer JavaDoc(8838));
891         htmlEntities.put("supe", new Integer JavaDoc(8839));
892         htmlEntities.put("oplus", new Integer JavaDoc(8853));
893         htmlEntities.put("otimes", new Integer JavaDoc(8855));
894         htmlEntities.put("perp", new Integer JavaDoc(8869));
895         htmlEntities.put("sdot", new Integer JavaDoc(8901));
896         htmlEntities.put("lceil", new Integer JavaDoc(8968));
897         htmlEntities.put("rceil", new Integer JavaDoc(8969));
898         htmlEntities.put("lfloor", new Integer JavaDoc(8970));
899         htmlEntities.put("rfloor", new Integer JavaDoc(8971));
900         htmlEntities.put("lang", new Integer JavaDoc(9001));
901         htmlEntities.put("rang", new Integer JavaDoc(9002));
902         htmlEntities.put("loz", new Integer JavaDoc(9674));
903         htmlEntities.put("spades", new Integer JavaDoc(9824));
904         htmlEntities.put("clubs", new Integer JavaDoc(9827));
905         htmlEntities.put("hearts", new Integer JavaDoc(9829));
906         htmlEntities.put("diams", new Integer JavaDoc(9830));
907         htmlEntities.put("quot", new Integer JavaDoc(34));
908         htmlEntities.put("amp", new Integer JavaDoc(38));
909         htmlEntities.put("lt", new Integer JavaDoc(60));
910         htmlEntities.put("gt", new Integer JavaDoc(62));
911         htmlEntities.put("OElig", new Integer JavaDoc(338));
912         htmlEntities.put("oelig", new Integer JavaDoc(339));
913         htmlEntities.put("Scaron", new Integer JavaDoc(352));
914         htmlEntities.put("scaron", new Integer JavaDoc(353));
915         htmlEntities.put("Yuml", new Integer JavaDoc(376));
916         htmlEntities.put("circ", new Integer JavaDoc(710));
917         htmlEntities.put("tilde", new Integer JavaDoc(732));
918         htmlEntities.put("ensp", new Integer JavaDoc(8194));
919         htmlEntities.put("emsp", new Integer JavaDoc(8195));
920         htmlEntities.put("thinsp", new Integer JavaDoc(8201));
921         htmlEntities.put("zwnj", new Integer JavaDoc(8204));
922         htmlEntities.put("zwj", new Integer JavaDoc(8205));
923         htmlEntities.put("lrm", new Integer JavaDoc(8206));
924         htmlEntities.put("rlm", new Integer JavaDoc(8207));
925         htmlEntities.put("ndash", new Integer JavaDoc(8211));
926         htmlEntities.put("mdash", new Integer JavaDoc(8212));
927         htmlEntities.put("lsquo", new Integer JavaDoc(8216));
928         htmlEntities.put("rsquo", new Integer JavaDoc(8217));
929         htmlEntities.put("sbquo", new Integer JavaDoc(8218));
930         htmlEntities.put("ldquo", new Integer JavaDoc(8220));
931         htmlEntities.put("rdquo", new Integer JavaDoc(8221));
932         htmlEntities.put("bdquo", new Integer JavaDoc(8222));
933         htmlEntities.put("dagger", new Integer JavaDoc(8224));
934         htmlEntities.put("Dagger", new Integer JavaDoc(8225));
935         htmlEntities.put("permil", new Integer JavaDoc(8240));
936         htmlEntities.put("lsaquo", new Integer JavaDoc(8249));
937         htmlEntities.put("rsaquo", new Integer JavaDoc(8250));
938         htmlEntities.put("euro", new Integer JavaDoc(8364));
939     }
940
941     /**
942      * Turn any HTML escape entities in the string into
943      * characters and return the resulting string.
944      *
945      * @param s String to be unescaped.
946      * @return unescaped String.
947      * @throws NullPointerException if s is null.
948      *
949      * @since ostermillerutils 1.00.00
950      */

951     public static String JavaDoc unescapeHTML(String JavaDoc s){
952         StringBuffer JavaDoc result = new StringBuffer JavaDoc(s.length());
953         int ampInd = s.indexOf("&");
954         int lastEnd = 0;
955         while (ampInd >= 0){
956             int nextAmp = s.indexOf("&", ampInd+1);
957             int nextSemi = s.indexOf(";", ampInd+1);
958             if (nextSemi != -1 && (nextAmp == -1 || nextSemi < nextAmp)){
959                 int value = -1;
960                 String JavaDoc escape = s.substring(ampInd+1,nextSemi);
961                 try {
962                     if (escape.startsWith("#")){
963                         value = Integer.parseInt(escape.substring(1), 10);
964                     } else {
965                         if (htmlEntities.containsKey(escape)){
966                             value = ((Integer JavaDoc)(htmlEntities.get(escape))).intValue();
967                         }
968                     }
969                 } catch (NumberFormatException JavaDoc x){
970                 }
971                 result.append(s.substring(lastEnd, ampInd));
972                 lastEnd = nextSemi + 1;
973                 if (value >= 0 && value <= 0xffff){
974                     result.append((char)value);
975                 } else {
976                     result.append("&").append(escape).append(";");
977                 }
978             }
979             ampInd = nextAmp;
980         }
981         result.append(s.substring(lastEnd));
982         return result.toString();
983     }
984
985     /**
986      * Escapes characters that have special meaning to
987      * regular expressions
988      *
989      * @param s String to be escaped
990      * @return escaped String
991      * @throws NullPointerException if s is null.
992      *
993      * @since ostermillerutils 1.02.25
994      */

995     public static String JavaDoc escapeRegularExpressionLiteral(String JavaDoc s){
996         // According to the documentation in the Pattern class:
997
//
998
// The backslash character ('\') serves to introduce escaped constructs,
999
// as defined in the table above, as well as to quote characters that
1000
// otherwise would be interpreted as unescaped constructs. Thus the
1001
// expression \\ matches a single backslash and \{ matches a left brace.
1002
//
1003
// It is an error to use a backslash prior to any alphabetic character
1004
// that does not denote an escaped construct; these are reserved for future
1005
// extensions to the regular-expression language. A backslash may be used
1006
// prior to a non-alphabetic character regardless of whether that character
1007
// is part of an unescaped construct.
1008
//
1009
// As a result, escape everything except [0-9a-zA-Z]
1010

1011        int length = s.length();
1012        int newLength = length;
1013        // first check for characters that might
1014
// be dangerous and calculate a length
1015
// of the string that has escapes.
1016
for (int i=0; i<length; i++){
1017            char c = s.charAt(i);
1018            if (!((c>='0' && c<='9') || (c>='A' && c<='Z') || (c>='a' && c<='z'))){
1019                newLength += 1;
1020            }
1021        }
1022        if (length == newLength){
1023            // nothing to escape in the string
1024
return s;
1025        }
1026        StringBuffer JavaDoc sb = new StringBuffer JavaDoc(newLength);
1027        for (int i=0; i<length; i++){
1028            char c = s.charAt(i);
1029            if (!((c>='0' && c<='9') || (c>='A' && c<='Z') || (c>='a' && c<='z'))){
1030                sb.append('\\');
1031            }
1032            sb.append(c);
1033        }
1034        return sb.toString();
1035    }
1036
1037    /**
1038     * Build a regular expression that is each of the terms or'd together.
1039     *
1040     * @param terms a list of search terms.
1041     * @param sb place to build the regular expression.
1042     * @throws IllegalArgumentException if the length of terms is zero.
1043     *
1044     * @since ostermillerutils 1.02.25
1045     */

1046    private static void buildFindAnyPattern(String JavaDoc[] terms, StringBuffer JavaDoc sb){
1047        if (terms.length == 0) throw new IllegalArgumentException JavaDoc("There must be at least one term to find.");
1048        sb.append("(?:");
1049        for (int i=0; i<terms.length; i++){
1050            if (i>0) sb.append("|");
1051            sb.append("(?:");
1052            sb.append(escapeRegularExpressionLiteral(terms[i]));
1053            sb.append(")");
1054        }
1055        sb.append(")");
1056    }
1057
1058    /**
1059     * Compile a pattern that can will match a string if the string
1060     * contains any of the given terms.
1061     * <p>
1062     * Usage:<br>
1063     * <code>boolean b = getContainsAnyPattern(terms).matcher(s).matches();</code>
1064     * <p>
1065     * If multiple strings are matched against the same set of terms,
1066     * it is more efficient to reuse the pattern returned by this function.
1067     *
1068     * @param terms Array of search strings.
1069     * @return Compiled pattern that can be used to match a string to see if it contains any of the terms.
1070     *
1071     * @since ostermillerutils 1.02.25
1072     */

1073    public static Pattern JavaDoc getContainsAnyPattern(String JavaDoc[] terms){
1074        StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1075        sb.append("(?s).*");
1076        buildFindAnyPattern(terms, sb);
1077        sb.append(".*");
1078        return Pattern.compile(sb.toString());
1079    }
1080
1081    /**
1082     * Compile a pattern that can will match a string if the string
1083     * equals any of the given terms.
1084     * <p>
1085     * Usage:<br>
1086     * <code>boolean b = getEqualsAnyPattern(terms).matcher(s).matches();</code>
1087     * <p>
1088     * If multiple strings are matched against the same set of terms,
1089     * it is more efficient to reuse the pattern returned by this function.
1090     *
1091     * @param terms Array of search strings.
1092     * @return Compiled pattern that can be used to match a string to see if it equals any of the terms.
1093     *
1094     * @since ostermillerutils 1.02.25
1095     */

1096     public static Pattern JavaDoc getEqualsAnyPattern(String JavaDoc[] terms){
1097        StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1098        sb.append("(?s)\\A");
1099        buildFindAnyPattern(terms, sb);
1100        sb.append("\\z");
1101        return Pattern.compile(sb.toString());
1102    }
1103
1104    /**
1105     * Compile a pattern that can will match a string if the string
1106     * starts with any of the given terms.
1107     * <p>
1108     * Usage:<br>
1109     * <code>boolean b = getStartsWithAnyPattern(terms).matcher(s).matches();</code>
1110     * <p>
1111     * If multiple strings are matched against the same set of terms,
1112     * it is more efficient to reuse the pattern returned by this function.
1113     *
1114     * @param terms Array of search strings.
1115     * @return Compiled pattern that can be used to match a string to see if it starts with any of the terms.
1116     *
1117     * @since ostermillerutils 1.02.25
1118     */

1119     public static Pattern JavaDoc getStartsWithAnyPattern(String JavaDoc[] terms){
1120        StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1121        sb.append("(?s)\\A");
1122        buildFindAnyPattern(terms, sb);
1123        sb.append(".*");
1124        return Pattern.compile(sb.toString());
1125    }
1126
1127    /**
1128     * Compile a pattern that can will match a string if the string
1129     * ends with any of the given terms.
1130     * <p>
1131     * Usage:<br>
1132     * <code>boolean b = getEndsWithAnyPattern(terms).matcher(s).matches();</code>
1133     * <p>
1134     * If multiple strings are matched against the same set of terms,
1135     * it is more efficient to reuse the pattern returned by this function.
1136     *
1137     * @param terms Array of search strings.
1138     * @return Compiled pattern that can be used to match a string to see if it ends with any of the terms.
1139     *
1140     * @since ostermillerutils 1.02.25
1141     */

1142    public static Pattern JavaDoc getEndsWithAnyPattern(String JavaDoc[] terms){
1143        StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1144        sb.append("(?s).*");
1145        buildFindAnyPattern(terms, sb);
1146        sb.append("\\z");
1147        return Pattern.compile(sb.toString());
1148    }
1149
1150    /**
1151     * Compile a pattern that can will match a string if the string
1152     * contains any of the given terms.
1153     * <p>
1154     * Case is ignored when matching using Unicode case rules.
1155     * <p>
1156     * Usage:<br>
1157     * <code>boolean b = getContainsAnyPattern(terms).matcher(s).matches();</code>
1158     * <p>
1159     * If multiple strings are matched against the same set of terms,
1160     * it is more efficient to reuse the pattern returned by this function.
1161     *
1162     * @param terms Array of search strings.
1163     * @return Compiled pattern that can be used to match a string to see if it contains any of the terms.
1164     *
1165     * @since ostermillerutils 1.02.25
1166     */

1167    public static Pattern JavaDoc getContainsAnyIgnoreCasePattern(String JavaDoc[] terms){
1168        StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1169        sb.append("(?i)(?u)(?s).*");
1170        buildFindAnyPattern(terms, sb);
1171        sb.append(".*");
1172        return Pattern.compile(sb.toString());
1173    }
1174
1175    /**
1176     * Compile a pattern that can will match a string if the string
1177     * equals any of the given terms.
1178     * <p>
1179     * Case is ignored when matching using Unicode case rules.
1180     * <p>
1181     * Usage:<br>
1182     * <code>boolean b = getEqualsAnyPattern(terms).matcher(s).matches();</code>
1183     * <p>
1184     * If multiple strings are matched against the same set of terms,
1185     * it is more efficient to reuse the pattern returned by this function.
1186     *
1187     * @param terms Array of search strings.
1188     * @return Compiled pattern that can be used to match a string to see if it equals any of the terms.
1189     *
1190     * @since ostermillerutils 1.02.25
1191     */

1192     public static Pattern JavaDoc getEqualsAnyIgnoreCasePattern(String JavaDoc[] terms){
1193        StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1194        sb.append("(?i)(?u)(?s)\\A");
1195        buildFindAnyPattern(terms, sb);
1196        sb.append("\\z");
1197        return Pattern.compile(sb.toString());
1198    }
1199
1200    /**
1201     * Compile a pattern that can will match a string if the string
1202     * starts with any of the given terms.
1203     * <p>
1204     * Case is ignored when matching using Unicode case rules.
1205     * <p>
1206     * Usage:<br>
1207     * <code>boolean b = getStartsWithAnyPattern(terms).matcher(s).matches();</code>
1208     * <p>
1209     * If multiple strings are matched against the same set of terms,
1210     * it is more efficient to reuse the pattern returned by this function.
1211     *
1212     * @param terms Array of search strings.
1213     * @return Compiled pattern that can be used to match a string to see if it starts with any of the terms.
1214     *
1215     * @since ostermillerutils 1.02.25
1216     */

1217     public static Pattern JavaDoc getStartsWithAnyIgnoreCasePattern(String JavaDoc[] terms){
1218        StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1219        sb.append("(?i)(?u)(?s)\\A");
1220        buildFindAnyPattern(terms, sb);
1221        sb.append(".*");
1222        return Pattern.compile(sb.toString());
1223    }
1224
1225    /**
1226     * Compile a pattern that can will match a string if the string
1227     * ends with any of the given terms.
1228     * <p>
1229     * Case is ignored when matching using Unicode case rules.
1230     * <p>
1231     * Usage:<br>
1232     * <code>boolean b = getEndsWithAnyPattern(terms).matcher(s).matches();</code>
1233     * <p>
1234     * If multiple strings are matched against the same set of terms,
1235     * it is more efficient to reuse the pattern returned by this function.
1236     *
1237     * @param terms Array of search strings.
1238     * @return Compiled pattern that can be used to match a string to see if it ends with any of the terms.
1239     *
1240     * @since ostermillerutils 1.02.25
1241     */

1242    public static Pattern JavaDoc getEndsWithAnyIgnoreCasePattern(String JavaDoc[] terms){
1243        StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1244        sb.append("(?i)(?u)(?s).*");
1245        buildFindAnyPattern(terms, sb);
1246        sb.append("\\z");
1247        return Pattern.compile(sb.toString());
1248    }
1249
1250    /**
1251     * Tests to see if the given string contains any of the given terms.
1252     * <p>
1253     * This implementation is more efficient than the brute force approach
1254     * of testing the string against each of the terms. It instead compiles
1255     * a single regular expression that can test all the terms at once, and
1256     * uses that expression against the string.
1257     * <p>
1258     * This is a convenience method. If multiple strings are tested against
1259     * the same set of terms, it is more efficient not to compile the regular
1260     * expression multiple times.
1261     * @see #getContainsAnyPattern(String[])
1262     *
1263     * @param s String that may contain any of the given terms.
1264     * @param terms list of substrings that may be contained in the given string.
1265     * @return true iff one of the terms is a substring of the given string.
1266     *
1267     * @since ostermillerutils 1.02.25
1268     */

1269    public static boolean containsAny(String JavaDoc s, String JavaDoc[] terms){
1270        return getContainsAnyPattern(terms).matcher(s).matches();
1271    }
1272
1273    /**
1274     * Tests to see if the given string equals any of the given terms.
1275     * <p>
1276     * This implementation is more efficient than the brute force approach
1277     * of testing the string against each of the terms. It instead compiles
1278     * a single regular expression that can test all the terms at once, and
1279     * uses that expression against the string.
1280     * <p>
1281     * This is a convenience method. If multiple strings are tested against
1282     * the same set of terms, it is more efficient not to compile the regular
1283     * expression multiple times.
1284     * @see #getEqualsAnyPattern(String[])
1285     *
1286     * @param s String that may equal any of the given terms.
1287     * @param terms list of strings that may equal the given string.
1288     * @return true iff one of the terms is equal to the given string.
1289     *
1290     * @since ostermillerutils 1.02.25
1291     */

1292    public static boolean equalsAny(String JavaDoc s, String JavaDoc[] terms){
1293        return getEqualsAnyPattern(terms).matcher(s).matches();
1294    }
1295
1296    /**
1297     * Tests to see if the given string starts with any of the given terms.
1298     * <p>
1299     * This implementation is more efficient than the brute force approach
1300     * of testing the string against each of the terms. It instead compiles
1301     * a single regular expression that can test all the terms at once, and
1302     * uses that expression against the string.
1303     * <p>
1304     * This is a convenience method. If multiple strings are tested against
1305     * the same set of terms, it is more efficient not to compile the regular
1306     * expression multiple times.
1307     * @see #getStartsWithAnyPattern(String[])
1308     *
1309     * @param s String that may start with any of the given terms.
1310     * @param terms list of strings that may start with the given string.
1311     * @return true iff the given string starts with one of the given terms.
1312     *
1313     * @since ostermillerutils 1.02.25
1314     */

1315    public static boolean startsWithAny(String JavaDoc s, String JavaDoc[] terms){
1316        return getStartsWithAnyPattern(terms).matcher(s).matches();
1317    }
1318
1319    /**
1320     * Tests to see if the given string ends with any of the given terms.
1321     * <p>
1322     * This implementation is more efficient than the brute force approach
1323     * of testing the string against each of the terms. It instead compiles
1324     * a single regular expression that can test all the terms at once, and
1325     * uses that expression against the string.
1326     * <p>
1327     * This is a convenience method. If multiple strings are tested against
1328     * the same set of terms, it is more efficient not to compile the regular
1329     * expression multiple times.
1330     * @see #getEndsWithAnyPattern(String[])
1331     *
1332     * @param s String that may end with any of the given terms.
1333     * @param terms list of strings that may end with the given string.
1334     * @return true iff the given string ends with one of the given terms.
1335     *
1336     * @since ostermillerutils 1.02.25
1337     */

1338    public static boolean endsWithAny(String JavaDoc s, String JavaDoc[] terms){
1339        return getEndsWithAnyPattern(terms).matcher(s).matches();
1340    }
1341
1342    /**
1343     * Tests to see if the given string contains any of the given terms.
1344     * <p>
1345     * Case is ignored when matching using Unicode case rules.
1346     * <p>
1347     * This implementation is more efficient than the brute force approach
1348     * of testing the string against each of the terms. It instead compiles
1349     * a single regular expression that can test all the terms at once, and
1350     * uses that expression against the string.
1351     * <p>
1352     * This is a convenience method. If multiple strings are tested against
1353     * the same set of terms, it is more efficient not to compile the regular
1354     * expression multiple times.
1355     * @see #getContainsAnyIgnoreCasePattern(String[])
1356     *
1357     * @param s String that may contain any of the given terms.
1358     * @param terms list of substrings that may be contained in the given string.
1359     * @return true iff one of the terms is a substring of the given string.
1360     *
1361     * @since ostermillerutils 1.02.25
1362     */

1363    public static boolean containsAnyIgnoreCase(String JavaDoc s, String JavaDoc[] terms){
1364        return getContainsAnyIgnoreCasePattern(terms).matcher(s).matches();
1365    }
1366
1367    /**
1368     * Tests to see if the given string equals any of the given terms.
1369     * <p>
1370     * Case is ignored when matching using Unicode case rules.
1371     * <p>
1372     * This implementation is more efficient than the brute force approach
1373     * of testing the string against each of the terms. It instead compiles
1374     * a single regular expression that can test all the terms at once, and
1375     * uses that expression against the string.
1376     * <p>
1377     * This is a convenience method. If multiple strings are tested against
1378     * the same set of terms, it is more efficient not to compile the regular
1379     * expression multiple times.
1380     * @see #getEqualsAnyIgnoreCasePattern(String[])
1381     *
1382     * @param s String that may equal any of the given terms.
1383     * @param terms list of strings that may equal the given string.
1384     * @return true iff one of the terms is equal to the given string.
1385     *
1386     * @since ostermillerutils 1.02.25
1387     */

1388    public static boolean equalsAnyIgnoreCase(String JavaDoc s, String JavaDoc[] terms){
1389        return getEqualsAnyIgnoreCasePattern(terms).matcher(s).matches();
1390    }
1391
1392    /**
1393     * Tests to see if the given string starts with any of the given terms.
1394     * <p>
1395     * Case is ignored when matching using Unicode case rules.
1396     * <p>
1397     * This implementation is more efficient than the brute force approach
1398     * of testing the string against each of the terms. It instead compiles
1399     * a single regular expression that can test all the terms at once, and
1400     * uses that expression against the string.
1401     * <p>
1402     * This is a convenience method. If multiple strings are tested against
1403     * the same set of terms, it is more efficient not to compile the regular
1404     * expression multiple times.
1405     * @see #getStartsWithAnyIgnoreCasePattern(String[])
1406     *
1407     * @param s String that may start with any of the given terms.
1408     * @param terms list of strings that may start with the given string.
1409     * @return true iff the given string starts with one of the given terms.
1410     *
1411     * @since ostermillerutils 1.02.25
1412     */

1413    public static boolean startsWithAnyIgnoreCase(String JavaDoc s, String JavaDoc[] terms){
1414        return getStartsWithAnyIgnoreCasePattern(terms).matcher(s).matches();
1415    }
1416
1417    /**
1418     * Tests to see if the given string ends with any of the given terms.
1419     * <p>
1420     * Case is ignored when matching using Unicode case rules.
1421     * <p>
1422     * This implementation is more efficient than the brute force approach
1423     * of testing the string against each of the terms. It instead compiles
1424     * a single regular expression that can test all the terms at once, and
1425     * uses that expression against the string.
1426     * <p>
1427     * This is a convenience method. If multiple strings are tested against
1428     * the same set of terms, it is more efficient not to compile the regular
1429     * expression multiple times.
1430     * @see #getEndsWithAnyIgnoreCasePattern(String[])
1431     *
1432     * @param s String that may end with any of the given terms.
1433     * @param terms list of strings that may end with the given string.
1434     * @return true iff the given string ends with one of the given terms.
1435     *
1436     * @since ostermillerutils 1.02.25
1437     */

1438    public static boolean endsWithAnyIgnoreCase(String JavaDoc s, String JavaDoc[] terms){
1439        return getEndsWithAnyIgnoreCasePattern(terms).matcher(s).matches();
1440    }
1441}
1442
Popular Tags