KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > clipbuilder > html > util > StringUtilities


1 package org.jahia.clipbuilder.html.util;
2 import org.apache.commons.lang.StringUtils;
3 import org.htmlparser.util.Translate;
4 /**
5  * Utilities for String
6  *
7  *@author Tlili Khaled
8  */

9 public abstract class StringUtilities {
10     private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(StringUtilities.class);
11
12
13     /**
14      * Constructor for the StringUtilities object
15      */

16     public StringUtilities() {
17     }
18
19
20     /**
21      * Gets the part of the string where that match the best the string pattern
22      * Exemple: where = ttooootottooto patterrn: toto. Result is tooto
23      *
24      *@param where Description of Parameter
25      *@param pattern Description of Parameter
26      *@param separator Description of Parameter
27      *@return The BestMatch value
28      */

29     public static String JavaDoc getBestMatchString(String JavaDoc where, String JavaDoc pattern, char separator) {
30         if (pattern == null || pattern.equalsIgnoreCase("")) {
31             return "";
32         }
33
34         // init varibles
35
logger.debug("[ where = " + where + " ]");
36         logger.debug("[ pattern = " + pattern + " ]");
37         String JavaDoc resultString = "";
38
39         //Compute the best match for the current subString lenght
40
Object JavaDoc[] res = getBestMatch(where, pattern, separator);
41
42         resultString = (String JavaDoc) res[0];
43
44         return resultString;
45     }
46
47
48
49     /**
50      * Compute Levenshtein distance
51      *
52      *@param s Description of Parameter
53      *@param t Description of Parameter
54      *@return Description of the Returned Value
55      */

56     public static int distance(String JavaDoc s, String JavaDoc t) {
57         return StringUtils.getLevenshteinDistance(s, t);
58         /*
59          * int d[][];
60          * // matrix
61          * int n;
62          * // length of s
63          * int m;
64          * // length of t
65          * int i;
66          * // iterates through s
67          * int j;
68          * // iterates through t
69          * char s_i;
70          * // ith character of s
71          * char t_j;
72          * // jth character of t
73          * int cost;
74          * // cost
75          * // Step 1
76          * n = s.length();
77          * m = t.length();
78          * if (n == 0) {
79          * return m;
80          * }
81          * if (m == 0) {
82          * return n;
83          * }
84          * d = new int[n + 1][m + 1];
85          * // Step 2
86          * for (i = 0; i <= n; i++) {
87          * d[i][0] = i;
88          * }
89          * for (j = 0; j <= m; j++) {
90          * d[0][j] = j;
91          * }
92          * // Step 3
93          * for (i = 1; i <= n; i++) {
94          * s_i = s.charAt(i - 1);
95          * // Step 4
96          * for (j = 1; j <= m; j++) {
97          * t_j = t.charAt(j - 1);
98          * // Step 5
99          * if (s_i == t_j) {
100          * cost = 0;
101          * }
102          * else {
103          * cost = 1;
104          * }
105          * // Step 6
106          * d[i][j] = minimum(d[i - 1][j] + 1, d[i][j - 1] + 1,
107          * d[i - 1][j - 1] + cost);
108          * }
109          * }
110          * // Step 7
111          * return d[n][m];
112          */

113     }
114
115
116     /**
117      * Description of the Method
118      *
119      *@param s Description of Parameter
120      *@return Description of the Returned Value
121      */

122     public static String JavaDoc replaceSpecialGraphics(String JavaDoc s) {
123         if (s == null) {
124             return null;
125         }
126         /*
127          * Object[][] specialGraphic = {
128          * {
129          * "&lt;", "<"}
130          * , {
131          * "&gt;", ">"}
132          * , {
133          * "&amp;", "&"}
134          * , {
135          * "&quot;", "\""}
136          * , {
137          * "&nbsp;", " "}
138          * , {
139          * "&reg;", ""}
140          * , {
141          * "&copy;", ""}
142          * , {
143          * "&ensp;", ""}
144          * , {
145          * "&emsp", ""}
146          * };
147          */

148         Object JavaDoc[][] specialGraphic = {
149                 {
150                 "&amp;", "&"}
151                 };
152
153         for (int i = 0; i < specialGraphic.length; i++) {
154             s = s.replaceAll((String JavaDoc) specialGraphic[i][0], (String JavaDoc) specialGraphic[i][1]);
155         }
156         return s;
157     }
158
159
160     /**
161      * Description of the Method
162      *
163      *@param s Description of Parameter
164      *@return Description of the Returned Value
165      */

166     public static String JavaDoc escapeHTML(String JavaDoc s) {
167         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
168         int n = s.length();
169         for (int i = 0; i < n; i++) {
170             char c = s.charAt(i);
171             switch (c) {
172                 case 'à':
173                     sb.append("&agrave;");
174                     break;
175                 case 'À':
176                     sb.append("&Agrave;");
177                     break;
178                 case 'â':
179                     sb.append("&acirc;");
180                     break;
181                 case 'Â':
182                     sb.append("&Acirc;");
183                     break;
184                 case 'ä':
185                     sb.append("&auml;");
186                     break;
187                 case 'Ä':
188                     sb.append("&Auml;");
189                     break;
190                 case 'å':
191                     sb.append("&aring;");
192                     break;
193                 case 'Å':
194                     sb.append("&Aring;");
195                     break;
196                 case 'æ':
197                     sb.append("&aelig;");
198                     break;
199                 case 'Æ':
200                     sb.append("&AElig;");
201                     break;
202                 case 'ç':
203                     sb.append("&ccedil;");
204                     break;
205                 case 'Ç':
206                     sb.append("&Ccedil;");
207                     break;
208                 case 'é':
209                     sb.append("&eacute;");
210                     break;
211                 case 'É':
212                     sb.append("&Eacute;");
213                     break;
214                 case 'è':
215                     sb.append("&egrave;");
216                     break;
217                 case 'È':
218                     sb.append("&Egrave;");
219                     break;
220                 case 'ê':
221                     sb.append("&ecirc;");
222                     break;
223                 case 'Ê':
224                     sb.append("&Ecirc;");
225                     break;
226                 case 'ë':
227                     sb.append("&euml;");
228                     break;
229                 case 'Ë':
230                     sb.append("&Euml;");
231                     break;
232                 case 'ï':
233                     sb.append("&iuml;");
234                     break;
235                 case 'Ï':
236                     sb.append("&Iuml;");
237                     break;
238                 case 'ô':
239                     sb.append("&ocirc;");
240                     break;
241                 case 'Ô':
242                     sb.append("&Ocirc;");
243                     break;
244                 case 'ö':
245                     sb.append("&ouml;");
246                     break;
247                 case 'Ö':
248                     sb.append("&Ouml;");
249                     break;
250                 case 'ø':
251                     sb.append("&oslash;");
252                     break;
253                 case 'Ø':
254                     sb.append("&Oslash;");
255                     break;
256                 case 'ß':
257                     sb.append("&szlig;");
258                     break;
259                 case 'ù':
260                     sb.append("&ugrave;");
261                     break;
262                 case 'Ù':
263                     sb.append("&Ugrave;");
264                     break;
265                 case 'û':
266                     sb.append("&ucirc;");
267                     break;
268                 case 'Û':
269                     sb.append("&Ucirc;");
270                     break;
271                 case 'ü':
272                     sb.append("&uuml;");
273                     break;
274                 case 'Ü':
275                     sb.append("&Uuml;");
276                     break;
277                 case '®':
278                     sb.append("&reg;");
279                     break;
280                 case '©':
281                     sb.append("&copy;");
282                     break;
283                 case '€':
284                     sb.append("&euro;");
285                     break;
286                 // be carefull with this one (non-breaking whitee space)
287

288                 /*case ' ':
289                     sb.append(" ");
290                     break;*/

291                 default:
292                     sb.append(c);
293                     break;
294             }
295         }
296         return sb.toString();
297     }
298
299
300     /**
301      * Repare HTML string, encode special character and add missing ;
302      *
303      *@param s Description of Parameter
304      *@return Description of the Returned Value
305      */

306     public static String JavaDoc repareHTML(String JavaDoc s) {
307         s = replaceSpecialGraphics(s);
308         s = escapeHTML(s);
309         return s;
310     }
311
312
313
314     /**
315      * Gets the BestMatch attribute of the StringUtilities class
316      *
317      *@param where Description of Parameter
318      *@param pattern Description of Parameter
319      *@param separator Description of Parameter
320      *@return The BestMacthf value
321      */

322     private static Object JavaDoc[] getBestMatch(String JavaDoc where, String JavaDoc pattern, char separator) {
323         //no pattern
324
if (pattern == null || pattern.equalsIgnoreCase("")) {
325             return null;
326         }
327
328         // Exemple: where = ttooootottooto pattern: toto
329
String JavaDoc result = "Not set";
330         //Step 1: compute all the distance between patternr and all the string include
331
//in whrere whith size = patternsize
332

333         //Exemple patterSite = 4
334
int patternSize = pattern.length();
335         logger.debug("patterLastIndex : " + patternSize);
336         //Exemple whereLastIndex = 13
337
int whereSize = where.length();
338
339         // if the patter is bigger than the where_string than where is the best string
340
if (patternSize > (whereSize)) {
341             Object JavaDoc[] res = new Object JavaDoc[2];
342             res[0] = where;
343             res[1] = new Integer JavaDoc(distance(where, pattern));
344             return res;
345         }
346         logger.debug("whereLastIndex : " + whereSize);
347         //Exemple lastFirstIndex = 9
348
int lastFirstIndex = whereSize - patternSize;
349         logger.debug("lastFirstIndex : " + lastFirstIndex);
350         int resultDistance = Integer.MAX_VALUE;
351
352         //after this loop, result will take the value of the substring that have the miner distance value
353
for (int i = 0; i <= lastFirstIndex; ) {
354             String JavaDoc subString = where.substring(i, i + patternSize);
355             //logger.debug("Current SubString is : " + subString);
356
int currentDistance = distance(subString, pattern);
357             if (currentDistance < resultDistance) {
358                 result = subString;
359                 resultDistance = currentDistance;
360
361             }
362
363             // go to next separator
364

365             i = i + subString.substring(1).indexOf(separator) + 1;
366             if (i == -1) {
367                 break;
368             }
369             //logger.debug("[ Next index is " + i + "]");
370
}
371
372         Object JavaDoc[] res = new Object JavaDoc[3];
373         res[0] = result;
374         res[1] = new Integer JavaDoc(resultDistance);
375
376         return res;
377     }
378
379
380     /**
381      * Description of the Method
382      *
383      *@param a Description of Parameter
384      *@param b Description of Parameter
385      *@param c Description of Parameter
386      *@return Description of the Returned Value
387      */

388     private static int minimum(int a, int b, int c) {
389         int mi;
390
391         mi = a;
392         if (b < mi) {
393             mi = b;
394         }
395         if (c < mi) {
396             mi = c;
397         }
398         return mi;
399     }
400
401 }
402
Popular Tags