KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > roller > util > Blacklist


1 /*
2  * Created on Nov 11, 2003
3  */

4 package org.roller.util;
5
6 import org.roller.util.StringUtils;
7 import org.apache.commons.logging.Log;
8 import org.apache.commons.logging.LogFactory;
9
10 import java.io.BufferedReader JavaDoc;
11 import java.io.FileInputStream JavaDoc;
12 import java.io.FileWriter JavaDoc;
13 import java.io.IOException JavaDoc;
14 import java.io.InputStream JavaDoc;
15 import java.io.InputStreamReader JavaDoc;
16 import java.io.File JavaDoc;
17 import java.io.FileNotFoundException JavaDoc;
18 import java.net.HttpURLConnection JavaDoc;
19 import java.net.MalformedURLException JavaDoc;
20 import java.net.URL JavaDoc;
21 import java.text.ParseException JavaDoc;
22 import java.text.SimpleDateFormat JavaDoc;
23 import java.util.Date JavaDoc;
24 import java.util.Iterator JavaDoc;
25 import java.util.LinkedList JavaDoc;
26 import java.util.List JavaDoc;
27 import java.util.regex.Matcher JavaDoc;
28 import java.util.regex.Pattern JavaDoc;
29
30 /**
31  * Based on the list provided by Jay Allen for
32  * MT-Blacklist:
33  * http://www.jayallen.org/projects/mt-blacklist/
34  *
35  * Will provide response whether submitted string
36  * contains an item listed in the supplied blacklist.
37  * This implementation does not do everything
38  * MT-Blacklist does, such as the "Search & De-spam mode".
39  *
40  * @author lance
41  */

42 public class Blacklist
43 {
44     private static Log mLogger = LogFactory.getLog(Blacklist.class);
45
46     private static Blacklist blacklist;
47     
48     public static final String JavaDoc blacklistFile = "blacklist.txt";
49     private static final String JavaDoc blacklistURL = "http://www.jayallen.org/comment_spam/blacklist.txt";
50     private static final String JavaDoc lastUpdateStr = "Last update:";
51
52     // Default location of blacklist file (relative to realPath) in case that uploadDir is null or empty
53
// and realPath is non-null.
54
private static final String JavaDoc DEFAULT_BLACKLIST_DIR = "resources";
55     private String JavaDoc realPath;
56     private String JavaDoc uploadDir;
57
58     private List JavaDoc blacklistStr = new LinkedList JavaDoc();
59     private List JavaDoc blacklistRegex = new LinkedList JavaDoc();
60     
61     private Date JavaDoc ifModifiedSince = null;
62
63     /**
64      * Singleton factory method.
65      */

66     public static Blacklist getBlacklist(String JavaDoc realPath, String JavaDoc uploadDir)
67     {
68         if (blacklist == null)
69         {
70             Blacklist temp = new Blacklist(realPath, uploadDir);
71             temp.extractFromFile();
72             blacklist = temp;
73         }
74         return blacklist;
75     }
76     
77     /**
78      * This will try to download a new set of Blacklist
79      * rules. If no change has occurred then return
80      * current Blacklist.
81      *
82      * @return New Blacklist if rules have changed,
83      * otherwise return current Blacklist.
84      */

85     public static void checkForUpdate()
86     {
87         blacklist = blacklist.extractFromURL();
88     }
89
90     /**
91      * Hide constructor
92      */

93     private Blacklist(String JavaDoc realPath, String JavaDoc uploadDir)
94     {
95         this.realPath = realPath;
96         this.uploadDir = uploadDir;
97     }
98     
99     /**
100      * Read a local file for Blacklist rules.
101      */

102     private void extractFromFile()
103     {
104         InputStream JavaDoc txtStream = getFileInputStream();
105         if (txtStream != null)
106         {
107             readFromStream(txtStream, false);
108         }
109         else
110         {
111             throw new NullPointerException JavaDoc("Unable to load blacklist.txt. " +
112             "Make sure blacklist.txt is in classpath.");
113         }
114     }
115
116     /**
117      * Read in the InputStream for rules.
118      * @param txtStream
119      */

120     private String JavaDoc readFromStream(InputStream JavaDoc txtStream, boolean saveStream)
121     {
122         String JavaDoc line;
123         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
124         BufferedReader JavaDoc in = null;
125         try
126         {
127             in = new BufferedReader JavaDoc(
128                 new InputStreamReader JavaDoc( txtStream, "UTF-8" ) );
129             while ((line = in.readLine()) != null)
130             {
131                 if (line.startsWith("#"))
132                 {
133                     readComment(line);
134                 }
135                 else
136                 {
137                     readRule(line);
138                 }
139                 
140                 if (saveStream) buf.append(line).append("\n");
141             }
142         }
143         catch (Exception JavaDoc e)
144         {
145             mLogger.error(e);
146         }
147         finally
148         {
149            try
150             {
151                  if (in != null) in.close();
152             }
153             catch (IOException JavaDoc e1)
154             {
155                 mLogger.error(e1);
156             }
157         }
158         return buf.toString();
159     }
160     
161     /**
162      * Connect to the web for blacklist. Check to
163      * see if a newer version exists before parsing.
164      */

165     private Blacklist extractFromURL()
166     {
167         // now see if we can update it from the web
168
Blacklist oldBlacklist = getBlacklist(realPath, uploadDir);
169         Blacklist newBlacklist = new Blacklist(realPath, uploadDir);
170         try
171         {
172             URL JavaDoc url = new URL JavaDoc(blacklistURL);
173             HttpURLConnection JavaDoc connection = (HttpURLConnection JavaDoc)url.openConnection();
174             if (oldBlacklist.ifModifiedSince != null)
175             {
176                 connection.setRequestProperty("If-Modified-Since",
177                                               DateUtil.formatRfc822(oldBlacklist.ifModifiedSince));
178             }
179
180             // did the connection return NotModified? If so, no need to parse
181
if ( connection.getResponseCode() == HttpURLConnection.HTTP_NOT_MODIFIED)
182             {
183                 // we already have a current blacklist
184
return oldBlacklist;
185             }
186
187             // did the connection return a LastModified header?
188
long lastModifiedLong = connection.getHeaderFieldDate("Last-Modified", -1);
189
190             // if no ifModifiedSince, or lastModifiedLong is newer, then read stream
191
if (oldBlacklist.ifModifiedSince == null ||
192                 oldBlacklist.ifModifiedSince.getTime() < lastModifiedLong)
193             {
194                 String JavaDoc results = newBlacklist.readFromStream( connection.getInputStream(), true );
195
196                 // save the new blacklist
197
newBlacklist.writeToFile(results);
198
199                 if (newBlacklist.ifModifiedSince == null && lastModifiedLong != -1)
200                 {
201                     newBlacklist.ifModifiedSince = new Date JavaDoc(lastModifiedLong);
202                 }
203
204                 return newBlacklist;
205             }
206         }
207         catch (Exception JavaDoc e)
208         {
209             // Catch all exceptions and just log at INFO (should this be WARN?) without a full stacktrace.
210
mLogger.info("Roller Blacklist Update: Unable to update comment spam blacklist due to exception: " + e);
211         }
212         return oldBlacklist;
213     }
214
215     /**
216      * @param str
217      */

218     private void readRule(String JavaDoc str)
219     {
220         if (StringUtils.isEmpty(str)) return; // bad condition
221

222         String JavaDoc rule = str.trim();
223         
224         if (str.indexOf("#") > 0) // line has a comment
225
{
226             int commentLoc = str.indexOf("#");
227             rule = str.substring(0, commentLoc-1).trim(); // strip comment
228
}
229         
230         if (rule.indexOf( "(" ) > -1) // regex rule
231
{
232             // pre-compile patterns since they will be frequently used
233
blacklistRegex.add(Pattern.compile(rule));
234         }
235         else if (StringUtils.isNotEmpty(rule))
236         {
237             blacklistStr.add(rule);
238         }
239     }
240
241     /**
242      * Try to parse out "Last update" value: 2004/03/08 23:17:30.
243      * @param str
244      */

245     private void readComment(String JavaDoc str)
246     {
247         int lastUpdatePos = str.indexOf(lastUpdateStr);
248         if (lastUpdatePos > -1)
249         {
250             str = str.substring(lastUpdatePos + lastUpdateStr.length());
251             str = str.trim();
252             try
253             {
254                 SimpleDateFormat JavaDoc sdf = new SimpleDateFormat JavaDoc("yyyy/MM/dd HH:mm:ss");
255                 ifModifiedSince = DateUtil.parse(str, sdf);
256             }
257             catch (ParseException JavaDoc e)
258             {
259                 mLogger.debug("ParseException reading " + str);
260             }
261         }
262     }
263
264     /**
265      * Does the String argument match any of the rules in the blacklist?
266      *
267      * @param str
268      * @return
269      */

270     public boolean isBlacklisted(String JavaDoc str)
271     {
272         if (str == null || StringUtils.isEmpty(str)) return false;
273         
274         // First iterate over blacklist, doing indexOf.
275
// Then iterate over blacklistRegex and test.
276
// As soon as there is a hit in either case return true
277

278         // test plain String.indexOf
279
if( testStringRules(str) ) return true;
280         
281         // test regex blacklisted
282
return testRegExRules(str);
283     }
284     
285     /**
286      * Test String against the RegularExpression rules.
287      *
288      * @param str
289      * @return
290      */

291     private boolean testRegExRules(String JavaDoc str)
292     {
293         boolean hit = false;
294         Pattern JavaDoc testPattern = null;
295         Iterator JavaDoc iter = blacklistRegex.iterator();
296         while (iter.hasNext())
297         {
298             testPattern = (Pattern JavaDoc)iter.next();
299             
300             // want to see what it is matching on
301
// if we are in "debug mode"
302
if (mLogger.isDebugEnabled())
303             {
304                 Matcher JavaDoc matcher = testPattern.matcher(str);
305                 if (matcher.find())
306                 {
307                     mLogger.debug(matcher.group() + " matched by " + testPattern.pattern());
308                     hit = true;
309                     break;
310                 }
311             }
312             else
313             {
314                 if (testPattern.matcher(str).find())
315                 {
316                     hit = true;
317                     break;
318                 }
319             }
320         }
321         return hit;
322     }
323
324     /**
325      * Test the String against the String rules,
326      * using simple indexOf.
327      *
328      * @param str
329      * @return
330      */

331     private boolean testStringRules(String JavaDoc str)
332     {
333         String JavaDoc test;
334         Iterator JavaDoc iter = blacklistStr.iterator();
335         boolean hit = false;
336         while (iter.hasNext())
337         {
338             test = (String JavaDoc)iter.next();
339             //System.out.println("check against |" + test + "|");
340
if (str.indexOf(test) > -1)
341             {
342                 // want to see what it is matching on
343
if (mLogger.isDebugEnabled())
344                 {
345                     mLogger.debug("matched:" + test + ":");
346                 }
347                 hit = true;
348                 break;
349             }
350         }
351         return hit;
352     }
353     
354     /**
355      * Try reading blacklist.txt from wherever RollerConfig.getUploadDir()
356      * is, otherwise try loading it from web resource (/WEB-INF/).
357      */

358     private InputStream JavaDoc getFileInputStream()
359     {
360         try
361         {
362             // TODO: clean up
363
// This was previously throwing an NPE to get to the exception case
364
// when being called in several places with indexDir==null.
365
// This is just about as bad; it needs to be cleaned up.
366
String JavaDoc path = getBlacklistFilePath();
367             if (path == null)
368             {
369                 throw new FileNotFoundException JavaDoc(
370                         "null path (indexDir and realPath both null)");
371             }
372             return new FileInputStream JavaDoc( path );
373         }
374         catch (Exception JavaDoc e)
375         {
376             return getClass().getResourceAsStream("/"+blacklistFile);
377         }
378     }
379
380     /**
381      * @param results
382      */

383     private void writeToFile(String JavaDoc results)
384     {
385         FileWriter JavaDoc out = null;
386         String JavaDoc path = getBlacklistFilePath();
387         if (path == null)
388         {
389             mLogger.debug("Not writing blacklist file since directory paths were null.");
390             return;
391         }
392         try
393         {
394             // attempt writing results
395
out = new FileWriter JavaDoc(path);
396             out.write( results.toCharArray() );
397         }
398         catch (Exception JavaDoc e)
399         {
400             mLogger.info("Unable to write new " + path);
401         }
402         finally
403         {
404             try
405             {
406                 if (out != null) out.close();
407             }
408             catch (IOException JavaDoc e)
409             {
410                 mLogger.error("Unable to close stream to " + path);
411             }
412         }
413     }
414
415     // Added for ROL-612 - TODO: Consider refactoring - nearly duplicate code in FileManagerImpl.
416
private String JavaDoc getBlacklistFilePath()
417     {
418         if (uploadDir == null && realPath==null)
419         {
420             // to preserve existing behavior forced to interpret this differently
421
return null;
422         }
423         if (uploadDir == null || uploadDir.trim().length() == 0)
424         {
425             uploadDir = realPath + File.separator + DEFAULT_BLACKLIST_DIR;
426         }
427         return uploadDir + File.separator + blacklistFile;
428     }
429
430     /**
431      * Return pretty list of String and RegEx rules.
432      */

433     public String JavaDoc toString()
434     {
435         StringBuffer JavaDoc buf = new StringBuffer JavaDoc("blacklist ");
436         buf.append(blacklistStr).append("\n");
437         buf.append("Regex blacklist ").append(blacklistRegex);
438         return buf.toString();
439     }
440 }
441
Popular Tags