StringHelper


1   /*
2    * Static String formatting and query routines.
3    * Copyright (C) 2001,2002 Stephen Ostermiller
4    * http://ostermiller.org/contact.pl?regarding=Java+Utilities
5    *
6    * This program is free software; you can redistribute it and/or modify
7    * it under the terms of the GNU General Public License as published by
8    * the Free Software Foundation; either version 2 of the License, or
9    * (at your option) any later version.
10   *
11   * This program is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   * GNU General Public License for more details.
15   *
16   * See COPYING.TXT for details.
17   */
18  
19  package com.Ostermiller.util;
20  
21  import java.util.HashMap  ;
22  import java.util.regex.Pattern  ;
23  
24  /**
25   * Utilities for String formatting, manipulation, and queries.
26   * More information about this class is available from <a target="_top" HREF=
27   * "http://ostermiller.org/utils/StringHelper.html">ostermiller.org</a>.
28   *
29   * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
30   * @since ostermillerutils 1.00.00
31   */
32  public class StringHelper {
33  
34      /**
35       * Pad the beginning of the given String with spaces until
36       * the String is of the given length.
37       * <p>
38       * If a String is longer than the desired length,
39       * it will not be truncated, however no padding
40       * will be added.
41       *
42       * @param s String to be padded.
43       * @param length desired length of result.
44       * @return padded String.
45       * @throws NullPointerException if s is null.
46       *
47       * @since ostermillerutils 1.00.00
48       */
49      public static String   prepad(String   s, int length){
50          return prepad(s, length, ' ');
51      }
52  
53      /**
54       * Pre-pend the given character to the String until
55       * the result is the desired length.
56       * <p>
57       * If a String is longer than the desired length,
58       * it will not be truncated, however no padding
59       * will be added.
60       *
61       * @param s String to be padded.
62       * @param length desired length of result.
63       * @param c padding character.
64       * @return padded String.
65       * @throws NullPointerException if s is null.
66       *
67       * @since ostermillerutils 1.00.00
68       */
69      public static String   prepad(String   s, int length, char c){
70          int needed = length - s.length();
71          if (needed <= 0){
72              return s;
73          }
74          StringBuffer   sb = new StringBuffer  (length);
75          for (int i=0; i<needed; i++){
76              sb.append(c);
77          }
78          sb.append(s);
79          return (sb.toString());
80      }
81  
82      /**
83       * Pad the end of the given String with spaces until
84       * the String is of the given length.
85       * <p>
86       * If a String is longer than the desired length,
87       * it will not be truncated, however no padding
88       * will be added.
89       *
90       * @param s String to be padded.
91       * @param length desired length of result.
92       * @return padded String.
93       * @throws NullPointerException if s is null.
94       *
95       * @since ostermillerutils 1.00.00
96       */
97      public static String   postpad(String   s, int length){
98          return postpad(s, length, ' ');
99      }
100 
101     /**
102      * Append the given character to the String until
103      * the result is  the desired length.
104      * <p>
105      * If a String is longer than the desired length,
106      * it will not be truncated, however no padding
107      * will be added.
108      *
109      * @param s String to be padded.
110      * @param length desired length of result.
111      * @param c padding character.
112      * @return padded String.
113      * @throws NullPointerException if s is null.
114      *
115      * @since ostermillerutils 1.00.00
116      */
117     public static String   postpad(String   s, int length, char c){
118         int needed = length - s.length();
119         if (needed <= 0){
120             return s;
121         }
122         StringBuffer   sb = new StringBuffer  (length);
123         sb.append(s);
124         for (int i=0; i<needed; i++){
125             sb.append(c);
126         }
127         return (sb.toString());
128     }
129 
130     /**
131      * Pad the beginning and end of the given String with spaces until
132      * the String is of the given length.  The result is that the original
133      * String is centered in the middle of the new string.
134      * <p>
135      * If the number of characters to pad is even, then the padding
136      * will be split evenly between the beginning and end, otherwise,
137      * the extra character will be added to the end.
138      * <p>
139      * If a String is longer than the desired length,
140      * it will not be truncated, however no padding
141      * will be added.
142      *
143      * @param s String to be padded.
144      * @param length desired length of result.
145      * @return padded String.
146      * @throws NullPointerException if s is null.
147      *
148      * @since ostermillerutils 1.00.00
149      */
150     public static String   midpad(String   s, int length){
151         return midpad(s, length, ' ');
152     }
153 
154     /**
155      * Pad the beginning and end of the given String with the given character
156      * until the result is  the desired length.  The result is that the original
157      * String is centered in the middle of the new string.
158      * <p>
159      * If the number of characters to pad is even, then the padding
160      * will be split evenly between the beginning and end, otherwise,
161      * the extra character will be added to the end.
162      * <p>
163      * If a String is longer than the desired length,
164      * it will not be truncated, however no padding
165      * will be added.
166      *
167      * @param s String to be padded.
168      * @param length desired length of result.
169      * @param c padding character.
170      * @return padded String.
171      * @throws NullPointerException if s is null.
172      *
173      * @since ostermillerutils 1.00.00
174      */
175     public static String   midpad(String   s, int length, char c){
176         int needed = length - s.length();
177         if (needed <= 0){
178             return s;
179         }
180         int beginning = needed / 2;
181         int end = beginning + needed % 2;
182         StringBuffer   sb = new StringBuffer  (length);
183         for (int i=0; i<beginning; i++){
184             sb.append(c);
185         }
186         sb.append(s);
187         for (int i=0; i<end; i++){
188             sb.append(c);
189         }
190         return (sb.toString());
191     }
192 
193     /**
194      * Split the given String into tokens.
195      * <P>
196      * This method is meant to be similar to the split
197      * function in other programming languages but it does
198      * not use regular expressions.  Rather the String is
199      * split on a single String literal.
200      * <P>
201      * Unlike java.util.StringTokenizer which accepts
202      * multiple character tokens as delimiters, the delimiter
203      * here is a single String literal.
204      * <P>
205      * Each null token is returned as an empty String.
206      * Delimiters are never returned as tokens.
207      * <P>
208      * If there is no delimiter because it is either empty or
209      * null, the only element in the result is the original String.
210      * <P>
211      * StringHelper.split("1-2-3", "-");<br>
212      * result: {"1", "2", "3"}<br>
213      * StringHelper.split("-1--2-", "-");<br>
214      * result: {"", "1", ,"", "2", ""}<br>
215      * StringHelper.split("123", "");<br>
216      * result: {"123"}<br>
217      * StringHelper.split("1-2---3----4", "--");<br>
218      * result: {"1-2", "-3", "", "4"}<br>
219      *
220      * @param s String to be split.
221      * @param delimiter String literal on which to split.
222      * @return an array of tokens.
223      * @throws NullPointerException if s is null.
224      *
225      * @since ostermillerutils 1.00.00
226      */
227     public static String  [] split(String   s, String   delimiter){
228         int delimiterLength;
229         // the next statement has the side effect of throwing a null pointer
230         // exception if s is null.
231         int stringLength = s.length();
232         if (delimiter == null || (delimiterLength = delimiter.length()) == 0){
233             // it is not inherently clear what to do if there is no delimiter
234             // On one hand it would make sense to return each character because
235             // the null String can be found between each pair of characters in
236             // a String.  However, it can be found many times there and we don'
237             // want to be returning multiple null tokens.
238             // returning the whole String will be defined as the correct behavior
239             // in this instance.
240             return new String  [] {s};
241         }
242 
243         // a two pass solution is used because a one pass solution would
244         // require the possible resizing and copying of memory structures
245         // In the worst case it would have to be resized n times with each
246         // resize having a O(n) copy leading to an O(n^2) algorithm.
247 
248         int count;
249         int start;
250         int end;
251 
252         // Scan s and count the tokens.
253         count = 0;
254         start = 0;
255         while((end = s.indexOf(delimiter, start)) != -1) {
256             count++;
257             start = end + delimiterLength;
258         }
259         count++;
260 
261         // allocate an array to return the tokens,
262         // we now know how big it should be
263         String  [] result = new String  [count];
264 
265         // Scan s again, but this time pick out the tokens
266         count = 0;
267         start = 0;
268         while((end = s.indexOf(delimiter, start)) != -1) {
269             result[count] = (s.substring(start, end));
270             count++;
271             start = end + delimiterLength;
272         }
273         end = stringLength;
274         result[count] = s.substring(start, end);
275 
276         return (result);
277     }
278 
279     /**
280      * Replace occurrences of a substring.
281      *
282      * StringHelper.replace("1-2-3", "-", "|");<br>
283      * result: "1|2|3"<br>
284      * StringHelper.replace("-1--2-", "-", "|");<br>
285      * result: "|1||2|"<br>
286      * StringHelper.replace("123", "", "|");<br>
287      * result: "123"<br>
288      * StringHelper.replace("1-2---3----4", "--", "|");<br>
289      * result: "1-2|-3||4"<br>
290      * StringHelper.replace("1-2---3----4", "--", "---");<br>
291      * result: "1-2----3------4"<br>
292      *
293      * @param s String to be modified.
294      * @param find String to find.
295      * @param replace String to replace.
296      * @return a string with all the occurrences of the string to find replaced.
297      * @throws NullPointerException if s is null.
298      *
299      * @since ostermillerutils 1.00.00
300      */
301     public static String   replace(String   s, String   find, String   replace){
302         int findLength;
303         // the next statement has the side effect of throwing a null pointer
304         // exception if s is null.
305         int stringLength = s.length();
306         if (find == null || (findLength = find.length()) == 0){
307             // If there is nothing to find, we won't try and find it.
308             return s;
309         }
310         if (replace == null){
311             // a null string and an empty string are the same
312             // for replacement purposes.
313             replace = "";
314         }
315         int replaceLength = replace.length();
316 
317         // We need to figure out how long our resulting string will be.
318         // This is required because without it, the possible resizing
319         // and copying of memory structures could lead to an unacceptable runtime.
320         // In the worst case it would have to be resized n times with each
321         // resize having a O(n) copy leading to an O(n^2) algorithm.
322         int length;
323         if (findLength == replaceLength){
324             // special case in which we don't need to count the replacements
325             // because the count falls out of the length formula.
326             length = stringLength;
327         } else {
328             int count;
329             int start;
330             int end;
331 
332             // Scan s and count the number of times we find our target.
333             count = 0;
334             start = 0;
335             while((end = s.indexOf(find, start)) != -1) {
336                 count++;
337                 start = end + findLength;
338             }
339             if (count == 0){
340                 // special case in which on first pass, we find there is nothing
341                 // to be replaced.  No need to do a second pass or create a string buffer.
342                 return s;
343             }
344             length = stringLength - (count * (findLength - replaceLength));
345         }
346 
347         int start = 0;
348         int end = s.indexOf(find, start);
349         if (end == -1){
350             // nothing was found in the string to replace.
351             // we can get this if the find and replace strings
352             // are the same length because we didn't check before.
353             // in this case, we will return the original string
354             return s;
355         }
356         // it looks like we actually have something to replace
357         // *sigh* allocate memory for it.
358         StringBuffer   sb = new StringBuffer  (length);
359 
360         // Scan s and do the replacements
361         while (end != -1) {
362             sb.append(s.substring(start, end));
363             sb.append(replace);
364             start = end + findLength;
365             end = s.indexOf(find, start);
366         }
367         end = stringLength;
368         sb.append(s.substring(start, end));
369 
370         return (sb.toString());
371     }
372 
373     /**
374      * Replaces characters that may be confused by a HTML
375      * parser with their equivalent character entity references.
376      * <p>
377      * Any data that will appear as text on a web page should
378      * be be escaped.  This is especially important for data
379      * that comes from untrusted sources such as Internet users.
380      * A common mistake in CGI programming is to ask a user for
381      * data and then put that data on a web page.  For example:<pre>
382      * Server: What is your name?
383      * User: &lt;b&gt;Joe&lt;b&gt;
384      * Server: Hello <b>Joe</b>, Welcome</pre>
385      * If the name is put on the page without checking that it doesn't
386      * contain HTML code or without sanitizing that HTML code, the user
387      * could reformat the page, insert scripts, and control the the
388      * content on your web server.
389      * <p>
390      * This method will replace HTML characters such as &gt; with their
391      * HTML entity reference (&amp;gt;) so that the html parser will
392      * be sure to interpret them as plain text rather than HTML or script.
393      * <p>
394      * This method should be used for both data to be displayed in text
395      * in the html document, and data put in form elements. For example:<br>
396      * <code>&lt;html&gt;&lt;body&gt;<i>This in not a &amp;lt;tag&amp;gt;
397      * in HTML</i>&lt;/body&gt;&lt;/html&gt;</code><br>
398      * and<br>
399      * <code>&lt;form&gt;&lt;input type="hidden" name="date" value="<i>This data could
400      * be &amp;quot;malicious&amp;quot;</i>"&gt;&lt;/form&gt;</code><br>
401      * In the second example, the form data would be properly be resubmitted
402      * to your cgi script in the URLEncoded format:<br>
403      * <code><i>This data could be %22malicious%22</i></code>
404      *
405      * @param s String to be escaped
406      * @return escaped String
407      * @throws NullPointerException if s is null.
408      *
409      * @since ostermillerutils 1.00.00
410      */
411     public static String   escapeHTML(String   s){
412         int length = s.length();
413         int newLength = length;
414         boolean someCharacterEscaped = false;
415         // first check for characters that might
416         // be dangerous and calculate a length
417         // of the string that has escapes.
418         for (int i=0; i<length; i++){
419             char c = s.charAt(i);
420             int cint = 0xffff & c;
421             if (cint < 32){
422                 switch(c){
423                     case '\r':
424                     case '\n':
425                     case '\t':
426                     case '\f':{
427                     } break;
428                     default: {
429                         newLength -= 1;
430                         someCharacterEscaped = true;
431                     }
432                 }
433             } else {
434                 switch(c){
435                     case '\"':{
436                         newLength += 5;
437                         someCharacterEscaped = true;
438                     } break;
439                     case '&':
440                     case '\'':{
441                         newLength += 4;
442                         someCharacterEscaped = true;
443                     } break;
444                     case '<':
445                     case '>':{
446                         newLength += 3;
447                         someCharacterEscaped = true;
448                     } break;
449                 }
450             }
451         }
452         if (!someCharacterEscaped){
453             // nothing to escape in the string
454             return s;
455         }
456         StringBuffer   sb = new StringBuffer  (newLength);
457         for (int i=0; i<length; i++){
458             char c = s.charAt(i);
459             int cint = 0xffff & c;
460             if (cint < 32){
461                 switch(c){
462                     case '\r':
463                     case '\n':
464                     case '\t':
465                     case '\f':{
466                         sb.append(c);
467                     } break;
468                     default: {
469                         // Remove this character
470                     }
471                 }
472             } else {
473                 switch(c){
474                     case '\"':{
475                         sb.append("&quot;");
476                     } break;
477                     case '\'':{
478                         sb.append("&#39;");
479                     } break;
480                     case '&':{
481                         sb.append("&amp;");
482                     } break;
483                     case '<':{
484                         sb.append("&lt;");
485                     } break;
486                     case '>':{
487                         sb.append("&gt;");
488                     } break;
489                     default: {
490                         sb.append(c);
491                     }
492                 }
493             }
494         }
495         return sb.toString();
496     }
497 
498     /**
499      * Replaces characters that may be confused by an SQL
500      * parser with their equivalent escape characters.
501      * <p>
502      * Any data that will be put in an SQL query should
503      * be be escaped.  This is especially important for data
504      * that comes from untrusted sources such as Internet users.
505      * <p>
506      * For example if you had the following SQL query:<br>
507      * <code>"SELECT * FROM addresses WHERE name='" + name + "' AND private='N'"</code><br>
508      * Without this function a user could give <code>" OR 1=1 OR ''='"</code>
509      * as their name causing the query to be:<br>
510      * <code>"SELECT * FROM addresses WHERE name='' OR 1=1 OR ''='' AND private='N'"</code><br>
511      * which will give all addresses, including private ones.<br>
512      * Correct usage would be:<br>
513      * <code>"SELECT * FROM addresses WHERE name='" + StringHelper.escapeSQL(name) + "' AND private='N'"</code><br>
514      * <p>
515      * Another way to avoid this problem is to use a PreparedStatement
516      * with appropriate placeholders.
517      *
518      * @param s String to be escaped
519      * @return escaped String
520      * @throws NullPointerException if s is null.
521      *
522      * @since ostermillerutils 1.00.00
523      */
524     public static String   escapeSQL(String   s){
525         int length = s.length();
526         int newLength = length;
527         // first check for characters that might
528         // be dangerous and calculate a length
529         // of the string that has escapes.
530         for (int i=0; i<length; i++){
531             char c = s.charAt(i);
532             switch(c){
533                 case '\\':
534                 case '\"':
535                 case '\'':
536                 case '\0':{
537                     newLength += 1;
538                 } break;
539             }
540         }
541         if (length == newLength){
542             // nothing to escape in the string
543             return s;
544         }
545         StringBuffer   sb = new StringBuffer  (newLength);
546         for (int i=0; i<length; i++){
547             char c = s.charAt(i);
548             switch(c){
549                 case '\\':{
550                     sb.append("\\\\");
551                 } break;
552                 case '\"':{
553                     sb.append("\\\"");
554                 } break;
555                 case '\'':{
556                     sb.append("\\\'");
557                 } break;
558                 case '\0':{
559                     sb.append("\\0");
560                 } break;
561                 default: {
562                     sb.append(c);
563                 }
564             }
565         }
566         return sb.toString();
567     }
568 
569     /**
570      * Replaces characters that are not allowed in a Java style
571      * string literal with their escape characters.  Specifically
572      * quote ("), single quote ('), new line (\n), carriage return (\r),
573      * and backslash (\), and tab (\t) are escaped.
574      *
575      * @param s String to be escaped
576      * @return escaped String
577      * @throws NullPointerException if s is null.
578      *
579      * @since ostermillerutils 1.00.00
580      */
581     public static String   escapeJavaLiteral(String   s){
582         int length = s.length();
583         int newLength = length;
584         // first check for characters that might
585         // be dangerous and calculate a length
586         // of the string that has escapes.
587         for (int i=0; i<length; i++){
588             char c = s.charAt(i);
589             switch(c){
590                 case '\"':
591                 case '\'':
592                 case '\n':
593                 case '\r':
594                 case '\t':
595                 case '\\':{
596                     newLength += 1;
597                 } break;
598             }
599         }
600         if (length == newLength){
601             // nothing to escape in the string
602             return s;
603         }
604         StringBuffer   sb = new StringBuffer  (newLength);
605         for (int i=0; i<length; i++){
606             char c = s.charAt(i);
607             switch(c){
608                 case '\"':{
609                     sb.append("\\\"");
610                 } break;
611                 case '\'':{
612                     sb.append("\\\'");
613                 } break;
614                 case '\n':{
615                     sb.append("\\n");
616                 } break;
617                 case '\r':{
618                     sb.append("\\r");
619                 } break;
620                 case '\t':{
621                     sb.append("\\t");
622                 } break;
623                 case '\\':{
624                     sb.append("\\\\");
625                 } break;
626                 default: {
627                     sb.append(c);
628                 }
629             }
630         }
631         return sb.toString();
632     }
633 
634     /**
635      * Trim any of the characters contained in the second
636      * string from the beginning and end of the first.
637      *
638      * @param s String to be trimmed.
639      * @param c list of characters to trim from s.
640      * @return trimmed String.
641      * @throws NullPointerException if s is null.
642      *
643      * @since ostermillerutils 1.00.00
644      */
645     public static String   trim(String   s, String   c){
646         int length = s.length();
647         if (c == null){
648             return s;
649         }
650         int cLength = c.length();
651         if (c.length() == 0){
652             return s;
653         }
654         int start = 0;
655         int end = length;
656         boolean found; // trim-able character found.
657         int i;
658         // Start from the beginning and find the
659         // first non-trim-able character.
660         found = false;
661         for (i=0; !found && i<length; i++){
662             char ch = s.charAt(i);
663             found = true;
664             for (int j=0; found && j<cLength; j++){
665                 if (c.charAt(j) == ch) found = false;
666             }
667         }
668         // if all characters are trim-able.
669         if (!found) return "";
670         start = i-1;
671         // Start from the end and find the
672         // last non-trim-able character.
673         found = false;
674         for (i=length-1; !found && i>=0; i--){
675             char ch = s.charAt(i);
676             found = true;
677             for (int j=0; found && j<cLength; j++){
678                 if (c.charAt(j) == ch) found = false;
679             }
680         }
681         end = i+2;
682         return s.substring(start, end);
683     }
684 
685     private static HashMap  <String  , Integer  > htmlEntities = new HashMap  <String  , Integer  >();
686     static {
687         htmlEntities.put("nbsp", new Integer  (160));
688         htmlEntities.put("iexcl", new Integer  (161));
689         htmlEntities.put("cent", new Integer  (162));
690         htmlEntities.put("pound", new Integer  (163));
691         htmlEntities.put("curren", new Integer  (164));
692         htmlEntities.put("yen", new Integer  (165));
693         htmlEntities.put("brvbar", new Integer  (166));
694         htmlEntities.put("sect", new Integer  (167));
695         htmlEntities.put("uml", new Integer  (168));
696         htmlEntities.put("copy", new Integer  (169));
697         htmlEntities.put("ordf", new Integer  (170));
698         htmlEntities.put("laquo", new Integer  (171));
699         htmlEntities.put("not", new Integer  (172));
700         htmlEntities.put("shy", new Integer  (173));
701         htmlEntities.put("reg", new Integer  (174));
702         htmlEntities.put("macr", new Integer  (175));
703         htmlEntities.put("deg", new Integer  (176));
704         htmlEntities.put("plusmn", new Integer  (177));
705         htmlEntities.put("sup2", new Integer  (178));
706         htmlEntities.put("sup3", new Integer  (179));
707         htmlEntities.put("acute", new Integer  (180));
708         htmlEntities.put("micro", new Integer  (181));
709         htmlEntities.put("para", new Integer  (182));
710         htmlEntities.put("middot", new Integer  (183));
711         htmlEntities.put("cedil", new Integer  (184));
712         htmlEntities.put("sup1", new Integer  (185));
713         htmlEntities.put("ordm", new Integer  (186));
714         htmlEntities.put("raquo", new Integer  (187));
715         htmlEntities.put("frac14", new Integer  (188));
716         htmlEntities.put("frac12", new Integer  (189));
717         htmlEntities.put("frac34", new Integer  (190));
718         htmlEntities.put("iquest", new Integer  (191));
719         htmlEntities.put("Agrave", new Integer  (192));
720         htmlEntities.put("Aacute", new Integer  (193));
721         htmlEntities.put("Acirc", new Integer  (194));
722         htmlEntities.put("Atilde", new Integer  (195));
723         htmlEntities.put("Auml", new Integer  (196));
724         htmlEntities.put("Aring", new Integer  (197));
725         htmlEntities.put("AElig", new Integer  (198));
726         htmlEntities.put("Ccedil", new Integer  (199));
727         htmlEntities.put("Egrave", new Integer  (200));
728         htmlEntities.put("Eacute", new Integer  (201));
729         htmlEntities.put("Ecirc", new Integer  (202));
730         htmlEntities.put("Euml", new Integer  (203));
731         htmlEntities.put("Igrave", new Integer  (204));
732         htmlEntities.put("Iacute", new Integer  (205));
733         htmlEntities.put("Icirc", new Integer  (206));
734         htmlEntities.put("Iuml", new Integer  (207));
735         htmlEntities.put("ETH", new Integer  (208));
736         htmlEntities.put("Ntilde", new Integer  (209));
737         htmlEntities.put("Ograve", new Integer  (210));
738         htmlEntities.put("Oacute", new Integer  (211));
739         htmlEntities.put("Ocirc", new Integer  (212));
740         htmlEntities.put("Otilde", new Integer  (213));
741         htmlEntities.put("Ouml", new Integer  (214));
742         htmlEntities.put("times", new Integer  (215));
743         htmlEntities.put("Oslash", new Integer  (216));
744         htmlEntities.put("Ugrave", new Integer  (217));
745         htmlEntities.put("Uacute", new Integer  (218));
746         htmlEntities.put("Ucirc", new Integer  (219));
747         htmlEntities.put("Uuml", new Integer  (220));
748         htmlEntities.put("Yacute", new Integer  (221));
749         htmlEntities.put("THORN", new Integer  (222));
750         htmlEntities.put("szlig", new Integer  (223));
751         htmlEntities.put("agrave", new Integer  (224));
752         htmlEntities.put("aacute", new Integer  (225));
753         htmlEntities.put("acirc", new Integer  (226));
754         htmlEntities.put("atilde", new Integer  (227));
755         htmlEntities.put("auml", new Integer  (228));
756         htmlEntities.put("aring", new Integer  (229));
757         htmlEntities.put("aelig", new Integer  (230));
758         htmlEntities.put("ccedil", new Integer  (231));
759         htmlEntities.put("egrave", new Integer  (232));
760         htmlEntities.put("eacute", new Integer  (233));
761         htmlEntities.put("ecirc", new Integer  (234));
762         htmlEntities.put("euml", new Integer  (235));
763         htmlEntities.put("igrave", new Integer  (236));
764         htmlEntities.put("iacute", new Integer  (237));
765         htmlEntities.put("icirc", new Integer  (238));
766         htmlEntities.put("iuml", new Integer  (239));
767         htmlEntities.put("eth", new Integer  (240));
768         htmlEntities.put("ntilde", new Integer  (241));
769         htmlEntities.put("ograve", new Integer  (242));
770         htmlEntities.put("oacute", new Integer  (243));
771         htmlEntities.put("ocirc", new Integer  (244));
772         htmlEntities.put("otilde", new Integer  (245));
773         htmlEntities.put("ouml", new Integer  (246));
774         htmlEntities.put("divide", new Integer  (247));
775         htmlEntities.put("oslash", new Integer  (248));
776         htmlEntities.put("ugrave", new Integer  (249));
777         htmlEntities.put("uacute", new Integer  (250));
778         htmlEntities.put("ucirc", new Integer  (251));
779         htmlEntities.put("uuml", new Integer  (252));
780         htmlEntities.put("yacute", new Integer  (253));
781         htmlEntities.put("thorn", new Integer  (254));
782         htmlEntities.put("yuml", new Integer  (255));
783         htmlEntities.put("fnof", new Integer  (402));
784         htmlEntities.put("Alpha", new Integer  (913));
785         htmlEntities.put("Beta", new Integer  (914));
786         htmlEntities.put("Gamma", new Integer  (915));
787         htmlEntities.put("Delta", new Integer  (916));
788         htmlEntities.put("Epsilon", new Integer  (917));
789         htmlEntities.put("Zeta", new Integer  (918));
790         htmlEntities.put("Eta", new Integer  (919));
791         htmlEntities.put("Theta", new Integer  (920));
792         htmlEntities.put("Iota", new Integer  (921));
793         htmlEntities.put("Kappa", new Integer  (922));
794         htmlEntities.put("Lambda", new Integer  (923));
795         htmlEntities.put("Mu", new Integer  (924));
796         htmlEntities.put("Nu", new Integer  (925));
797         htmlEntities.put("Xi", new Integer  (926));
798         htmlEntities.put("Omicron", new Integer  (927));
799         htmlEntities.put("Pi", new Integer  (928));
800         htmlEntities.put("Rho", new Integer  (929));
801         htmlEntities.put("Sigma", new Integer  (931));
802         htmlEntities.put("Tau", new Integer  (932));
803         htmlEntities.put("Upsilon", new Integer  (933));
804         htmlEntities.put("Phi", new Integer  (934));
805         htmlEntities.put("Chi", new Integer  (935));
806         htmlEntities.put("Psi", new Integer  (936));
807         htmlEntities.put("Omega", new Integer  (937));
808         htmlEntities.put("alpha", new Integer  (945));
809         htmlEntities.put("beta", new Integer  (946));
810         htmlEntities.put("gamma", new Integer  (947));
811         htmlEntities.put("delta", new Integer  (948));
812         htmlEntities.put("epsilon", new Integer  (949));
813         htmlEntities.put("zeta", new Integer  (950));
814         htmlEntities.put("eta", new Integer  (951));
815         htmlEntities.put("theta", new Integer  (952));
816         htmlEntities.put("iota", new Integer  (953));
817         htmlEntities.put("kappa", new Integer  (954));
818         htmlEntities.put("lambda", new Integer  (955));
819         htmlEntities.put("mu", new Integer  (956));
820         htmlEntities.put("nu", new Integer  (957));
821         htmlEntities.put("xi", new Integer  (958));
822         htmlEntities.put("omicron", new Integer  (959));
823         htmlEntities.put("pi", new Integer  (960));
824         htmlEntities.put("rho", new Integer  (961));
825         htmlEntities.put("sigmaf", new Integer  (962));
826         htmlEntities.put("sigma", new Integer  (963));
827         htmlEntities.put("tau", new Integer  (964));
828         htmlEntities.put("upsilon", new Integer  (965));
829         htmlEntities.put("phi", new Integer  (966));
830         htmlEntities.put("chi", new Integer  (967));
831         htmlEntities.put("psi", new Integer  (968));
832         htmlEntities.put("omega", new Integer  (969));
833         htmlEntities.put("thetasym", new Integer  (977));
834         htmlEntities.put("upsih", new Integer  (978));
835         htmlEntities.put("piv", new Integer  (982));
836         htmlEntities.put("bull", new Integer  (8226));
837         htmlEntities.put("hellip", new Integer  (8230));
838         htmlEntities.put("prime", new Integer  (8242));
839         htmlEntities.put("Prime", new Integer  (8243));
840         htmlEntities.put("oline", new Integer  (8254));
841         htmlEntities.put("frasl", new Integer  (8260));
842         htmlEntities.put("weierp", new Integer  (8472));
843         htmlEntities.put("image", new Integer  (8465));
844         htmlEntities.put("real", new Integer  (8476));
845         htmlEntities.put("trade", new Integer  (8482));
846         htmlEntities.put("alefsym", new Integer  (8501));
847         htmlEntities.put("larr", new Integer  (8592));
848         htmlEntities.put("uarr", new Integer  (8593));
849         htmlEntities.put("rarr", new Integer  (8594));
850         htmlEntities.put("darr", new Integer  (8595));
851         htmlEntities.put("harr", new Integer  (8596));
852         htmlEntities.put("crarr", new Integer  (8629));
853         htmlEntities.put("lArr", new Integer  (8656));
854         htmlEntities.put("uArr", new Integer  (8657));
855         htmlEntities.put("rArr", new Integer  (8658));
856         htmlEntities.put("dArr", new Integer  (8659));
857         htmlEntities.put("hArr", new Integer  (8660));
858         htmlEntities.put("forall", new Integer  (8704));
859         htmlEntities.put("part", new Integer  (8706));
860         htmlEntities.put("exist", new Integer  (8707));
861         htmlEntities.put("empty", new Integer  (8709));
862         htmlEntities.put("nabla", new Integer  (8711));
863         htmlEntities.put("isin", new Integer  (8712));
864         htmlEntities.put("notin", new Integer  (8713));
865         htmlEntities.put("ni", new Integer  (8715));
866         htmlEntities.put("prod", new Integer  (8719));
867         htmlEntities.put("sum", new Integer  (8721));
868         htmlEntities.put("minus", new Integer  (8722));
869         htmlEntities.put("lowast", new Integer  (8727));
870         htmlEntities.put("radic", new Integer  (8730));
871         htmlEntities.put("prop", new Integer  (8733));
872         htmlEntities.put("infin", new Integer  (8734));
873         htmlEntities.put("ang", new Integer  (8736));
874         htmlEntities.put("and", new Integer  (8743));
875         htmlEntities.put("or", new Integer  (8744));
876         htmlEntities.put("cap", new Integer  (8745));
877         htmlEntities.put("cup", new Integer  (8746));
878         htmlEntities.put("int", new Integer  (8747));
879         htmlEntities.put("there4", new Integer  (8756));
880         htmlEntities.put("sim", new Integer  (8764));
881         htmlEntities.put("cong", new Integer  (8773));
882         htmlEntities.put("asymp", new Integer  (8776));
883         htmlEntities.put("ne", new Integer  (8800));
884         htmlEntities.put("equiv", new Integer  (8801));
885         htmlEntities.put("le", new Integer  (8804));
886         htmlEntities.put("ge", new Integer  (8805));
887         htmlEntities.put("sub", new Integer  (8834));
888         htmlEntities.put("sup", new Integer  (8835));
889         htmlEntities.put("nsub", new Integer  (8836));
890         htmlEntities.put("sube", new Integer  (8838));
891         htmlEntities.put("supe", new Integer  (8839));
892         htmlEntities.put("oplus", new Integer  (8853));
893         htmlEntities.put("otimes", new Integer  (8855));
894         htmlEntities.put("perp", new Integer  (8869));
895         htmlEntities.put("sdot", new Integer  (8901));
896         htmlEntities.put("lceil", new Integer  (8968));
897         htmlEntities.put("rceil", new Integer  (8969));
898         htmlEntities.put("lfloor", new Integer  (8970));
899         htmlEntities.put("rfloor", new Integer  (8971));
900         htmlEntities.put("lang", new Integer  (9001));
901         htmlEntities.put("rang", new Integer  (9002));
902         htmlEntities.put("loz", new Integer  (9674));
903         htmlEntities.put("spades", new Integer  (9824));
904         htmlEntities.put("clubs", new Integer  (9827));
905         htmlEntities.put("hearts", new Integer  (9829));
906         htmlEntities.put("diams", new Integer  (9830));
907         htmlEntities.put("quot", new Integer  (34));
908         htmlEntities.put("amp", new Integer  (38));
909         htmlEntities.put("lt", new Integer  (60));
910         htmlEntities.put("gt", new Integer  (62));
911         htmlEntities.put("OElig", new Integer  (338));
912         htmlEntities.put("oelig", new Integer  (339));
913         htmlEntities.put("Scaron", new Integer  (352));
914         htmlEntities.put("scaron", new Integer  (353));
915         htmlEntities.put("Yuml", new Integer  (376));
916         htmlEntities.put("circ", new Integer  (710));
917         htmlEntities.put("tilde", new Integer  (732));
918         htmlEntities.put("ensp", new Integer  (8194));
919         htmlEntities.put("emsp", new Integer  (8195));
920         htmlEntities.put("thinsp", new Integer  (8201));
921         htmlEntities.put("zwnj", new Integer  (8204));
922         htmlEntities.put("zwj", new Integer  (8205));
923         htmlEntities.put("lrm", new Integer  (8206));
924         htmlEntities.put("rlm", new Integer  (8207));
925         htmlEntities.put("ndash", new Integer  (8211));
926         htmlEntities.put("mdash", new Integer  (8212));
927         htmlEntities.put("lsquo", new Integer  (8216));
928         htmlEntities.put("rsquo", new Integer  (8217));
929         htmlEntities.put("sbquo", new Integer  (8218));
930         htmlEntities.put("ldquo", new Integer  (8220));
931         htmlEntities.put("rdquo", new Integer  (8221));
932         htmlEntities.put("bdquo", new Integer  (8222));
933         htmlEntities.put("dagger", new Integer  (8224));
934         htmlEntities.put("Dagger", new Integer  (8225));
935         htmlEntities.put("permil", new Integer  (8240));
936         htmlEntities.put("lsaquo", new Integer  (8249));
937         htmlEntities.put("rsaquo", new Integer  (8250));
938         htmlEntities.put("euro", new Integer  (8364));
939     }
940 
941     /**
942      * Turn any HTML escape entities in the string into
943      * characters and return the resulting string.
944      *
945      * @param s String to be unescaped.
946      * @return unescaped String.
947      * @throws NullPointerException if s is null.
948      *
949      * @since ostermillerutils 1.00.00
950      */
951     public static String   unescapeHTML(String   s){
952         StringBuffer   result = new StringBuffer  (s.length());
953         int ampInd = s.indexOf("&");
954         int lastEnd = 0;
955         while (ampInd >= 0){
956             int nextAmp = s.indexOf("&", ampInd+1);
957             int nextSemi = s.indexOf(";", ampInd+1);
958             if (nextSemi != -1 && (nextAmp == -1 || nextSemi < nextAmp)){
959                 int value = -1;
960                 String   escape = s.substring(ampInd+1,nextSemi);
961                 try {
962                     if (escape.startsWith("#")){
963                         value = Integer.parseInt(escape.substring(1), 10);
964                     } else {
965                         if (htmlEntities.containsKey(escape)){
966                             value = ((Integer  )(htmlEntities.get(escape))).intValue();
967                         }
968                     }
969                 } catch (NumberFormatException   x){
970                 }
971                 result.append(s.substring(lastEnd, ampInd));
972                 lastEnd = nextSemi + 1;
973                 if (value >= 0 && value <= 0xffff){
974                     result.append((char)value);
975                 } else {
976                     result.append("&").append(escape).append(";");
977                 }
978             }
979             ampInd = nextAmp;
980         }
981         result.append(s.substring(lastEnd));
982         return result.toString();
983     }
984 
985     /**
986      * Escapes characters that have special meaning to
987      * regular expressions
988      *
989      * @param s String to be escaped
990      * @return escaped String
991      * @throws NullPointerException if s is null.
992      *
993      * @since ostermillerutils 1.02.25
994      */
995     public static String   escapeRegularExpressionLiteral(String   s){
996         // According to the documentation in the Pattern class:
997         //
998         // The backslash character ('\') serves to introduce escaped constructs,
999         // as defined in the table above, as well as to quote characters that
1000        // otherwise would be interpreted as unescaped constructs. Thus the
1001        // expression \\ matches a single backslash and \{ matches a left brace.
1002        //
1003        // It is an error to use a backslash prior to any alphabetic character
1004        // that does not denote an escaped construct; these are reserved for future
1005        // extensions to the regular-expression language. A backslash may be used
1006        // prior to a non-alphabetic character regardless of whether that character
1007        // is part of an unescaped construct.
1008        //
1009        // As a result, escape everything except [0-9a-zA-Z]
1010
1011        int length = s.length();
1012        int newLength = length;
1013        // first check for characters that might
1014        // be dangerous and calculate a length
1015        // of the string that has escapes.
1016        for (int i=0; i<length; i++){
1017            char c = s.charAt(i);
1018            if (!((c>='0' && c<='9') || (c>='A' && c<='Z') || (c>='a' && c<='z'))){
1019                newLength += 1;
1020            }
1021        }
1022        if (length == newLength){
1023            // nothing to escape in the string
1024            return s;
1025        }
1026        StringBuffer   sb = new StringBuffer  (newLength);
1027        for (int i=0; i<length; i++){
1028            char c = s.charAt(i);
1029            if (!((c>='0' && c<='9') || (c>='A' && c<='Z') || (c>='a' && c<='z'))){
1030                sb.append('\\');
1031            }
1032            sb.append(c);
1033        }
1034        return sb.toString();
1035    }
1036
1037    /**
1038     * Build a regular expression that is each of the terms or'd together.
1039     *
1040     * @param terms a list of search terms.
1041     * @param sb place to build the regular expression.
1042     * @throws IllegalArgumentException if the length of terms is zero.
1043     *
1044     * @since ostermillerutils 1.02.25
1045     */
1046    private static void buildFindAnyPattern(String  [] terms, StringBuffer   sb){
1047        if (terms.length == 0) throw new IllegalArgumentException  ("There must be at least one term to find.");
1048        sb.append("(?:");
1049        for (int i=0; i<terms.length; i++){
1050            if (i>0) sb.append("|");
1051            sb.append("(?:");
1052            sb.append(escapeRegularExpressionLiteral(terms[i]));
1053            sb.append(")");
1054        }
1055        sb.append(")");
1056    }
1057
1058    /**
1059     * Compile a pattern that can will match a string if the string
1060     * contains any of the given terms.
1061     * <p>
1062     * Usage:<br>
1063     * <code>boolean b = getContainsAnyPattern(terms).matcher(s).matches();</code>
1064     * <p>
1065     * If multiple strings are matched against the same set of terms,
1066     * it is more efficient to reuse the pattern returned by this function.
1067     *
1068     * @param terms Array of search strings.
1069     * @return Compiled pattern that can be used to match a string to see if it contains any of the terms.
1070     *
1071     * @since ostermillerutils 1.02.25
1072     */
1073    public static Pattern   getContainsAnyPattern(String  [] terms){
1074        StringBuffer   sb = new StringBuffer  ();
1075        sb.append("(?s).*");
1076        buildFindAnyPattern(terms, sb);
1077        sb.append(".*");
1078        return Pattern.compile(sb.toString());
1079    }
1080
1081    /**
1082     * Compile a pattern that can will match a string if the string
1083     * equals any of the given terms.
1084     * <p>
1085     * Usage:<br>
1086     * <code>boolean b = getEqualsAnyPattern(terms).matcher(s).matches();</code>
1087     * <p>
1088     * If multiple strings are matched against the same set of terms,
1089     * it is more efficient to reuse the pattern returned by this function.
1090     *
1091     * @param terms Array of search strings.
1092     * @return Compiled pattern that can be used to match a string to see if it equals any of the terms.
1093     *
1094     * @since ostermillerutils 1.02.25
1095     */
1096     public static Pattern   getEqualsAnyPattern(String  [] terms){
1097        StringBuffer   sb = new StringBuffer  ();
1098        sb.append("(?s)\\A");
1099        buildFindAnyPattern(terms, sb);
1100        sb.append("\\z");
1101        return Pattern.compile(sb.toString());
1102    }
1103
1104    /**
1105     * Compile a pattern that can will match a string if the string
1106     * starts with any of the given terms.
1107     * <p>
1108     * Usage:<br>
1109     * <code>boolean b = getStartsWithAnyPattern(terms).matcher(s).matches();</code>
1110     * <p>
1111     * If multiple strings are matched against the same set of terms,
1112     * it is more efficient to reuse the pattern returned by this function.
1113     *
1114     * @param terms Array of search strings.
1115     * @return Compiled pattern that can be used to match a string to see if it starts with any of the terms.
1116     *
1117     * @since ostermillerutils 1.02.25
1118     */
1119     public static Pattern   getStartsWithAnyPattern(String  [] terms){
1120        StringBuffer   sb = new StringBuffer  ();
1121        sb.append("(?s)\\A");
1122        buildFindAnyPattern(terms, sb);
1123        sb.append(".*");
1124        return Pattern.compile(sb.toString());
1125    }
1126
1127    /**
1128     * Compile a pattern that can will match a string if the string
1129     * ends with any of the given terms.
1130     * <p>
1131     * Usage:<br>
1132     * <code>boolean b = getEndsWithAnyPattern(terms).matcher(s).matches();</code>
1133     * <p>
1134     * If multiple strings are matched against the same set of terms,
1135     * it is more efficient to reuse the pattern returned by this function.
1136     *
1137     * @param terms Array of search strings.
1138     * @return Compiled pattern that can be used to match a string to see if it ends with any of the terms.
1139     *
1140     * @since ostermillerutils 1.02.25
1141     */
1142    public static Pattern   getEndsWithAnyPattern(String  [] terms){
1143        StringBuffer   sb = new StringBuffer  ();
1144        sb.append("(?s).*");
1145        buildFindAnyPattern(terms, sb);
1146        sb.append("\\z");
1147        return Pattern.compile(sb.toString());
1148    }
1149
1150    /**
1151     * Compile a pattern that can will match a string if the string
1152     * contains any of the given terms.
1153     * <p>
1154     * Case is ignored when matching using Unicode case rules.
1155     * <p>
1156     * Usage:<br>
1157     * <code>boolean b = getContainsAnyPattern(terms).matcher(s).matches();</code>
1158     * <p>
1159     * If multiple strings are matched against the same set of terms,
1160     * it is more efficient to reuse the pattern returned by this function.
1161     *
1162     * @param terms Array of search strings.
1163     * @return Compiled pattern that can be used to match a string to see if it contains any of the terms.
1164     *
1165     * @since ostermillerutils 1.02.25
1166     */
1167    public static Pattern   getContainsAnyIgnoreCasePattern(String  [] terms){
1168        StringBuffer   sb = new StringBuffer  ();
1169        sb.append("(?i)(?u)(?s).*");
1170        buildFindAnyPattern(terms, sb);
1171        sb.append(".*");
1172        return Pattern.compile(sb.toString());
1173    }
1174
1175    /**
1176     * Compile a pattern that can will match a string if the string
1177     * equals any of the given terms.
1178     * <p>
1179     * Case is ignored when matching using Unicode case rules.
1180     * <p>
1181     * Usage:<br>
1182     * <code>boolean b = getEqualsAnyPattern(terms).matcher(s).matches();</code>
1183     * <p>
1184     * If multiple strings are matched against the same set of terms,
1185     * it is more efficient to reuse the pattern returned by this function.
1186     *
1187     * @param terms Array of search strings.
1188     * @return Compiled pattern that can be used to match a string to see if it equals any of the terms.
1189     *
1190     * @since ostermillerutils 1.02.25
1191     */
1192     public static Pattern   getEqualsAnyIgnoreCasePattern(String  [] terms){
1193        StringBuffer   sb = new StringBuffer  ();
1194        sb.append("(?i)(?u)(?s)\\A");
1195        buildFindAnyPattern(terms, sb);
1196        sb.append("\\z");
1197        return Pattern.compile(sb.toString());
1198    }
1199
1200    /**
1201     * Compile a pattern that can will match a string if the string
1202     * starts with any of the given terms.
1203     * <p>
1204     * Case is ignored when matching using Unicode case rules.
1205     * <p>
1206     * Usage:<br>
1207     * <code>boolean b = getStartsWithAnyPattern(terms).matcher(s).matches();</code>
1208     * <p>
1209     * If multiple strings are matched against the same set of terms,
1210     * it is more efficient to reuse the pattern returned by this function.
1211     *
1212     * @param terms Array of search strings.
1213     * @return Compiled pattern that can be used to match a string to see if it starts with any of the terms.
1214     *
1215     * @since ostermillerutils 1.02.25
1216     */
1217     public static Pattern   getStartsWithAnyIgnoreCasePattern(String  [] terms){
1218        StringBuffer   sb = new StringBuffer  ();
1219        sb.append("(?i)(?u)(?s)\\A");
1220        buildFindAnyPattern(terms, sb);
1221        sb.append(".*");
1222        return Pattern.compile(sb.toString());
1223    }
1224
1225    /**
1226     * Compile a pattern that can will match a string if the string
1227     * ends with any of the given terms.
1228     * <p>
1229     * Case is ignored when matching using Unicode case rules.
1230     * <p>
1231     * Usage:<br>
1232     * <code>boolean b = getEndsWithAnyPattern(terms).matcher(s).matches();</code>
1233     * <p>
1234     * If multiple strings are matched against the same set of terms,
1235     * it is more efficient to reuse the pattern returned by this function.
1236     *
1237     * @param terms Array of search strings.
1238     * @return Compiled pattern that can be used to match a string to see if it ends with any of the terms.
1239     *
1240     * @since ostermillerutils 1.02.25
1241     */
1242    public static Pattern   getEndsWithAnyIgnoreCasePattern(String  [] terms){
1243        StringBuffer   sb = new StringBuffer  ();
1244        sb.append("(?i)(?u)(?s).*");
1245        buildFindAnyPattern(terms, sb);
1246        sb.append("\\z");
1247        return Pattern.compile(sb.toString());
1248    }
1249
1250    /**
1251     * Tests to see if the given string contains any of the given terms.
1252     * <p>
1253     * This implementation is more efficient than the brute force approach
1254     * of testing the string against each of the terms.  It instead compiles
1255     * a single regular expression that can test all the terms at once, and
1256     * uses that expression against the string.
1257     * <p>
1258     * This is a convenience method.  If multiple strings are tested against
1259     * the same set of terms, it is more efficient not to compile the regular
1260     * expression multiple times.
1261     * @see #getContainsAnyPattern(String[])
1262     *
1263     * @param s String that may contain any of the given terms.
1264     * @param terms list of substrings that may be contained in the given string.
1265     * @return true iff one of the terms is a substring of the given string.
1266     *
1267     * @since ostermillerutils 1.02.25
1268     */
1269    public static boolean containsAny(String   s, String  [] terms){
1270        return getContainsAnyPattern(terms).matcher(s).matches();
1271    }
1272
1273    /**
1274     * Tests to see if the given string equals any of the given terms.
1275     * <p>
1276     * This implementation is more efficient than the brute force approach
1277     * of testing the string against each of the terms.  It instead compiles
1278     * a single regular expression that can test all the terms at once, and
1279     * uses that expression against the string.
1280     * <p>
1281     * This is a convenience method.  If multiple strings are tested against
1282     * the same set of terms, it is more efficient not to compile the regular
1283     * expression multiple times.
1284     * @see #getEqualsAnyPattern(String[])
1285     *
1286     * @param s String that may equal any of the given terms.
1287     * @param terms list of strings that may equal the given string.
1288     * @return true iff one of the terms is equal to the given string.
1289     *
1290     * @since ostermillerutils 1.02.25
1291     */
1292    public static boolean equalsAny(String   s, String  [] terms){
1293        return getEqualsAnyPattern(terms).matcher(s).matches();
1294    }
1295
1296    /**
1297     * Tests to see if the given string starts with any of the given terms.
1298     * <p>
1299     * This implementation is more efficient than the brute force approach
1300     * of testing the string against each of the terms.  It instead compiles
1301     * a single regular expression that can test all the terms at once, and
1302     * uses that expression against the string.
1303     * <p>
1304     * This is a convenience method.  If multiple strings are tested against
1305     * the same set of terms, it is more efficient not to compile the regular
1306     * expression multiple times.
1307     * @see #getStartsWithAnyPattern(String[])
1308     *
1309     * @param s String that may start with any of the given terms.
1310     * @param terms list of strings that may start with the given string.
1311     * @return true iff the given string starts with one of the given terms.
1312     *
1313     * @since ostermillerutils 1.02.25
1314     */
1315    public static boolean startsWithAny(String   s, String  [] terms){
1316        return getStartsWithAnyPattern(terms).matcher(s).matches();
1317    }
1318
1319    /**
1320     * Tests to see if the given string ends with any of the given terms.
1321     * <p>
1322     * This implementation is more efficient than the brute force approach
1323     * of testing the string against each of the terms.  It instead compiles
1324     * a single regular expression that can test all the terms at once, and
1325     * uses that expression against the string.
1326     * <p>
1327     * This is a convenience method.  If multiple strings are tested against
1328     * the same set of terms, it is more efficient not to compile the regular
1329     * expression multiple times.
1330     * @see #getEndsWithAnyPattern(String[])
1331     *
1332     * @param s String that may end with any of the given terms.
1333     * @param terms list of strings that may end with the given string.
1334     * @return true iff the given string ends with one of the given terms.
1335     *
1336     * @since ostermillerutils 1.02.25
1337     */
1338    public static boolean endsWithAny(String   s, String  [] terms){
1339        return getEndsWithAnyPattern(terms).matcher(s).matches();
1340    }
1341
1342    /**
1343     * Tests to see if the given string contains any of the given terms.
1344     * <p>
1345     * Case is ignored when matching using Unicode case rules.
1346     * <p>
1347     * This implementation is more efficient than the brute force approach
1348     * of testing the string against each of the terms.  It instead compiles
1349     * a single regular expression that can test all the terms at once, and
1350     * uses that expression against the string.
1351     * <p>
1352     * This is a convenience method.  If multiple strings are tested against
1353     * the same set of terms, it is more efficient not to compile the regular
1354     * expression multiple times.
1355     * @see #getContainsAnyIgnoreCasePattern(String[])
1356     *
1357     * @param s String that may contain any of the given terms.
1358     * @param terms list of substrings that may be contained in the given string.
1359     * @return true iff one of the terms is a substring of the given string.
1360     *
1361     * @since ostermillerutils 1.02.25
1362     */
1363    public static boolean containsAnyIgnoreCase(String   s, String  [] terms){
1364        return getContainsAnyIgnoreCasePattern(terms).matcher(s).matches();
1365    }
1366
1367    /**
1368     * Tests to see if the given string equals any of the given terms.
1369     * <p>
1370     * Case is ignored when matching using Unicode case rules.
1371     * <p>
1372     * This implementation is more efficient than the brute force approach
1373     * of testing the string against each of the terms.  It instead compiles
1374     * a single regular expression that can test all the terms at once, and
1375     * uses that expression against the string.
1376     * <p>
1377     * This is a convenience method.  If multiple strings are tested against
1378     * the same set of terms, it is more efficient not to compile the regular
1379     * expression multiple times.
1380     * @see #getEqualsAnyIgnoreCasePattern(String[])
1381     *
1382     * @param s String that may equal any of the given terms.
1383     * @param terms list of strings that may equal the given string.
1384     * @return true iff one of the terms is equal to the given string.
1385     *
1386     * @since ostermillerutils 1.02.25
1387     */
1388    public static boolean equalsAnyIgnoreCase(String   s, String  [] terms){
1389        return getEqualsAnyIgnoreCasePattern(terms).matcher(s).matches();
1390    }
1391
1392    /**
1393     * Tests to see if the given string starts with any of the given terms.
1394     * <p>
1395     * Case is ignored when matching using Unicode case rules.
1396     * <p>
1397     * This implementation is more efficient than the brute force approach
1398     * of testing the string against each of the terms.  It instead compiles
1399     * a single regular expression that can test all the terms at once, and
1400     * uses that expression against the string.
1401     * <p>
1402     * This is a convenience method.  If multiple strings are tested against
1403     * the same set of terms, it is more efficient not to compile the regular
1404     * expression multiple times.
1405     * @see #getStartsWithAnyIgnoreCasePattern(String[])
1406     *
1407     * @param s String that may start with any of the given terms.
1408     * @param terms list of strings that may start with the given string.
1409     * @return true iff the given string starts with one of the given terms.
1410     *
1411     * @since ostermillerutils 1.02.25
1412     */
1413    public static boolean startsWithAnyIgnoreCase(String   s, String  [] terms){
1414        return getStartsWithAnyIgnoreCasePattern(terms).matcher(s).matches();
1415    }
1416
1417    /**
1418     * Tests to see if the given string ends with any of the given terms.
1419     * <p>
1420     * Case is ignored when matching using Unicode case rules.
1421     * <p>
1422     * This implementation is more efficient than the brute force approach
1423     * of testing the string against each of the terms.  It instead compiles
1424     * a single regular expression that can test all the terms at once, and
1425     * uses that expression against the string.
1426     * <p>
1427     * This is a convenience method.  If multiple strings are tested against
1428     * the same set of terms, it is more efficient not to compile the regular
1429     * expression multiple times.
1430     * @see #getEndsWithAnyIgnoreCasePattern(String[])
1431     *
1432     * @param s String that may end with any of the given terms.
1433     * @param terms list of strings that may end with the given string.
1434     * @return true iff the given string ends with one of the given terms.
1435     *
1436     * @since ostermillerutils 1.02.25
1437     */
1438    public static boolean endsWithAnyIgnoreCase(String   s, String  [] terms){
1439        return getEndsWithAnyIgnoreCasePattern(terms).matcher(s).matches();
1440    }
1441}
1442
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags