KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > roller > util > RegexUtil


1 /*
2  * Created on Nov 8, 2003
3  *
4  */

5 package org.roller.util;
6 import org.apache.commons.codec.binary.Hex;
7
8 import java.io.UnsupportedEncodingException JavaDoc;
9 import java.util.ArrayList JavaDoc;
10 import java.util.regex.Matcher JavaDoc;
11 import java.util.regex.Pattern JavaDoc;
12
13
14 /**
15  * @author lance
16  */

17 public class RegexUtil
18 {
19     public static final Pattern JavaDoc mailtoPattern = Pattern.compile("mailto:([a-zA-Z0-9\\.]+@[a-zA-Z0-9\\.]+\\.[a-zA-Z0-9]+)");
20     public static final Pattern JavaDoc emailPattern = Pattern.compile("\\b[a-zA-Z0-9\\.]+(@)([a-zA-Z0-9\\.]+)(\\.)([a-zA-Z0-9]+)\\b");
21     
22     public static String JavaDoc encodeEmail(String JavaDoc str)
23     {
24         // obfuscate mailto's: turns them into hex encoded,
25
// so that browsers can still understand the mailto link
26
Matcher JavaDoc mailtoMatch = mailtoPattern.matcher(str);
27         while (mailtoMatch.find())
28         {
29             String JavaDoc email = mailtoMatch.group(1);
30             //System.out.println("email=" + email);
31
String JavaDoc hexed = encode(email);
32             str = str.replaceFirst("mailto:"+email, "mailto:"+hexed);
33         }
34         
35         return obfuscateEmail(str);
36     }
37
38     /**
39      * obfuscate plaintext emails: makes them
40      * "human-readable" - still too easy for
41      * machines to parse however.
42      *
43      * @param str
44      * @return
45      */

46     public static String JavaDoc obfuscateEmail(String JavaDoc str)
47     {
48         Matcher JavaDoc emailMatch = emailPattern.matcher(str);
49         while (emailMatch.find())
50         {
51             String JavaDoc at = emailMatch.group(1);
52             //System.out.println("at=" + at);
53
str = str.replaceFirst(at, "-AT-");
54             
55             String JavaDoc dot = emailMatch.group(2) + emailMatch.group(3) + emailMatch.group(4);
56             String JavaDoc newDot = emailMatch.group(2) + "-DOT-" + emailMatch.group(4);
57             //System.out.println("dot=" + dot);
58
str = str.replaceFirst(dot, newDot);
59         }
60         return str;
61     }
62     
63     /**
64      * Return the specified match "groups" from the pattern.
65      * For each group matched a String will be entered in the ArrayList.
66      *
67      * @param pattern The Pattern to use.
68      * @param match The String to match against.
69      * @param group The group number to return in case of a match.
70      * @return
71      */

72     public static ArrayList JavaDoc getMatches(Pattern JavaDoc pattern, String JavaDoc match, int group)
73     {
74         ArrayList JavaDoc matches = new ArrayList JavaDoc();
75         Matcher JavaDoc matcher = pattern.matcher(match);
76         while (matcher.find())
77         {
78             matches.add( matcher.group(group) );
79         }
80         return matches;
81     }
82
83     /**
84      * Thanks to the folks at Blojsom (http://sf.net/projects/blojsom)
85      * for showing me what I was doing wrong with the Hex class.
86      *
87      * @param email
88      * @return
89      */

90     public static String JavaDoc encode(String JavaDoc email)
91     {
92         StringBuffer JavaDoc result = new StringBuffer JavaDoc();
93         try {
94             char[] hexString = Hex.encodeHex(email.getBytes("UTF8"));
95             for (int i = 0; i < hexString.length; i++) {
96                 if (i % 2 == 0) {
97                     result.append("%");
98                 }
99                 result.append(hexString[i]);
100             }
101         } catch (UnsupportedEncodingException JavaDoc e) {
102             return email;
103         }
104
105         return result.toString();
106     }
107 }
108
Popular Tags