1 18 19 package org.apache.roller.util; 20 21 import org.apache.commons.logging.Log; 22 import org.apache.commons.logging.LogFactory; 23 import java.io.BufferedReader ; 24 import java.io.FileInputStream ; 25 import java.io.IOException ; 26 import java.io.InputStream ; 27 import java.io.InputStreamReader ; 28 import java.io.File ; 29 import java.io.FileOutputStream ; 30 import java.net.HttpURLConnection ; 31 import java.net.URL ; 32 import java.text.ParseException ; 33 import java.text.SimpleDateFormat ; 34 import java.util.ArrayList ; 35 import java.util.Date ; 36 import java.util.Iterator ; 37 import java.util.LinkedList ; 38 import java.util.List ; 39 import java.util.StringTokenizer ; 40 import java.util.regex.Matcher ; 41 import java.util.regex.Pattern ; 42 import org.apache.roller.config.RollerConfig; 43 import org.apache.commons.lang.StringUtils; 44 45 62 public class Blacklist { 63 64 private static Log mLogger = LogFactory.getLog(Blacklist.class); 65 66 private static Blacklist blacklist; 67 private static final String blacklistFile = "blacklist.txt"; 68 private static final String lastUpdateStr = "Last update:"; 69 70 71 private static final String blacklistURL = null; 72 73 private Date lastModified = null; 74 private List blacklistStr = new LinkedList (); 75 private List blacklistRegex = new LinkedList (); 76 77 static { 79 mLogger.info("Initializing MT Blacklist"); 80 blacklist = new Blacklist(); 81 blacklist.loadBlacklistFromFile(null); 82 } 83 84 85 private Blacklist() { 86 } 87 88 89 public static Blacklist getBlacklist() { 90 return blacklist; 91 } 92 93 94 public static void checkForUpdate() { 95 getBlacklist().update(); 96 } 97 98 99 public void update() { 100 if (this.blacklistURL != null) { 101 boolean blacklist_updated = this.downloadBlacklist(); 102 if (blacklist_updated) { 103 this.loadBlacklistFromFile(null); 104 } 105 } 106 } 107 108 109 private boolean downloadBlacklist() { 110 111 boolean blacklist_updated = false; 112 try { 113 mLogger.debug("Attempting to download MT blacklist"); 114 115 URL url = new URL (blacklistURL); 116 HttpURLConnection connection = 117 (HttpURLConnection ) url.openConnection(); 118 119 connection.setRequestProperty("User-Agent", "Mozilla/5.0"); 123 124 if (this.lastModified != null) { 125 connection.setRequestProperty("If-Modified-Since", 126 DateUtil.formatRfc822(this.lastModified)); 127 } 128 129 int responseCode = connection.getResponseCode(); 130 131 mLogger.debug("HttpConnection response = "+responseCode); 132 133 if (responseCode == HttpURLConnection.HTTP_NOT_MODIFIED) { 135 mLogger.debug("MT blacklist site says we are current"); 136 return false; 137 } 138 139 long lastModifiedLong = 141 connection.getHeaderFieldDate("Last-Modified", -1); 142 143 if (responseCode == HttpURLConnection.HTTP_OK && 145 (this.lastModified == null || 146 this.lastModified.getTime() < lastModifiedLong)) { 147 148 mLogger.debug("my last modified = "+this.lastModified.getTime()); 149 mLogger.debug("MT last modified = "+lastModifiedLong); 150 151 InputStream instream = connection.getInputStream(); 153 154 String uploadDir = RollerConfig.getProperty("uploads.dir"); 155 String path = uploadDir + File.separator + blacklistFile; 156 FileOutputStream outstream = new FileOutputStream (path); 157 158 mLogger.debug("writing updated MT blacklist to "+path); 159 160 byte[] buf = new byte[4096]; 162 int length = 0; 163 while((length = instream.read(buf)) > 0) 164 outstream.write(buf, 0, length); 165 166 outstream.close(); 167 instream.close(); 168 169 blacklist_updated = true; 170 171 mLogger.debug("MT blacklist download completed."); 172 173 } else { 174 mLogger.debug("blacklist *NOT* saved, assuming we are current"); 175 } 176 177 } catch (Exception e) { 178 mLogger.error("error downloading blacklist", e); 179 } 180 181 return blacklist_updated; 182 } 183 184 190 public void loadBlacklistFromFile(String blacklistFilePath) { 191 192 InputStream txtStream = null; 193 try { 194 String path = blacklistFilePath; 195 if (path == null) { 196 String uploadDir = RollerConfig.getProperty("uploads.dir"); 197 path = uploadDir + File.separator + blacklistFile; 198 } 199 File blacklistFile = new File (path); 200 201 if (this.lastModified != null && 203 this.lastModified.getTime() >= blacklistFile.lastModified()) { 204 mLogger.debug("Blacklist is current, no need to load again"); 205 return; 206 } else { 207 this.lastModified = new Date (blacklistFile.lastModified()); 208 } 209 txtStream = new FileInputStream (blacklistFile); 210 mLogger.info("Loading blacklist from "+path); 211 212 } catch (Exception e) { 213 txtStream = getClass().getResourceAsStream("/"+blacklistFile); 215 mLogger.warn("Couldn't find downloaded blacklist, " 216 + "loading from classpath instead"); 217 } 218 219 if (txtStream != null) { 220 readFromStream(txtStream, false); 221 } else { 222 mLogger.error("Couldn't load a blacklist file from anywhere, " 223 + "this means blacklist checking is disabled for now."); 224 } 225 mLogger.info("Number of blacklist string rules: "+blacklistStr.size()); 226 mLogger.info("Number of blacklist regex rules: "+blacklistRegex.size()); 227 } 228 229 233 private String readFromStream(InputStream txtStream, boolean saveStream) { 234 String line; 235 StringBuffer buf = new StringBuffer (); 236 BufferedReader in = null; 237 try { 238 in = new BufferedReader ( 239 new InputStreamReader ( txtStream, "UTF-8" ) ); 240 while ((line = in.readLine()) != null) { 241 if (line.startsWith("#")) { 242 readComment(line); 243 } else { 244 readRule(line); 245 } 246 247 if (saveStream) buf.append(line).append("\n"); 248 } 249 } catch (Exception e) { 250 mLogger.error(e); 251 } finally { 252 try { 253 if (in != null) in.close(); 254 } catch (IOException e1) { 255 mLogger.error(e1); 256 } 257 } 258 return buf.toString(); 259 } 260 261 private void readRule(String str) { 262 if (StringUtils.isEmpty(str)) return; 264 String rule = str.trim(); 265 266 if (str.indexOf("#") > 0) { 268 int commentLoc = str.indexOf("#"); 269 rule = str.substring(0, commentLoc-1).trim(); } 271 272 if (rule.indexOf( "(" ) > -1) { 274 blacklistRegex.add(Pattern.compile(rule)); 276 } else if (StringUtils.isNotEmpty(rule)) { 277 blacklistStr.add(rule); 278 } 279 } 280 281 282 private void readComment(String str) { 283 int lastUpdatePos = str.indexOf(lastUpdateStr); 284 if (lastUpdatePos > -1) { 285 str = str.substring(lastUpdatePos + lastUpdateStr.length()); 286 str = str.trim(); 287 try { 288 SimpleDateFormat sdf = new SimpleDateFormat ("yyyy/MM/dd HH:mm:ss"); 289 lastModified = DateUtil.parse(str, sdf); 290 } catch (ParseException e) { 291 mLogger.debug("ParseException reading " + str); 292 } 293 } 294 } 295 296 299 public boolean isBlacklisted(String str) { 300 return isBlacklisted(str, null, null); 301 } 302 303 310 public boolean isBlacklisted( 311 String str, List moreStringRules, List moreRegexRules) { 312 if (str == null || StringUtils.isEmpty(str)) return false; 313 314 318 List stringRules = blacklistStr; 320 if (moreStringRules != null && moreStringRules.size() > 0) { 321 stringRules = new ArrayList (); 322 stringRules.addAll(moreStringRules); 323 stringRules.addAll(blacklistStr); 324 } 325 if (testStringRules(str, stringRules)) return true; 326 327 List regexRules = blacklistRegex; 329 if (moreRegexRules != null && moreRegexRules.size() > 0) { 330 regexRules = new ArrayList (); 331 regexRules.addAll(moreRegexRules); 332 regexRules.addAll(blacklistRegex); 333 } 334 return testRegExRules(str, regexRules); 335 } 336 337 343 public static boolean matchesRulesOnly( 344 String str, List stringRules, List regexRules) { 345 if (testStringRules(str, stringRules)) return true; 346 return testRegExRules(str, regexRules); 347 } 348 349 350 private static boolean testRegExRules(String str, List regexRules) { 351 boolean hit = false; 352 Pattern testPattern = null; 353 Iterator iter = regexRules.iterator(); 354 while (iter.hasNext()) { 355 testPattern = (Pattern )iter.next(); 356 357 if (mLogger.isDebugEnabled()) { 359 Matcher matcher = testPattern.matcher(str); 360 if (matcher.find()) { 361 mLogger.debug(matcher.group() 362 + " matched by " + testPattern.pattern()); 363 return true; 364 } 365 } else { 366 if (testPattern.matcher(str).find()) { 367 return true; 368 } 369 } 370 } 371 return hit; 372 } 373 374 375 private static boolean testStringRules(String str, List stringRules) { 376 String test; 377 Iterator iter = stringRules.iterator(); 378 boolean hit = false; 379 while (iter.hasNext()) { 380 test = (String )iter.next(); 381 if (str.indexOf(test) > -1) { 382 if (mLogger.isDebugEnabled()) { 384 mLogger.debug("matched:" + test + ":"); 385 } 386 return true; 387 } 388 } 389 return hit; 390 } 391 392 393 public static void populateSpamRules( 394 String blacklist, List stringRules, List regexRules, String addendum) { 395 String weblogWords = blacklist; 396 weblogWords = (weblogWords == null) ? "" : weblogWords; 397 String siteWords = (addendum != null) ? addendum : ""; 398 StringTokenizer toker = new StringTokenizer (siteWords + weblogWords,"\n"); 399 while (toker.hasMoreTokens()) { 400 String token = toker.nextToken().trim(); 401 if (token.startsWith("#")) continue; 402 if (token.startsWith("(")) { 403 regexRules.add(Pattern.compile(token)); 404 } else { 405 stringRules.add(token); 406 } 407 } 408 } 409 410 411 public String toString() { 412 StringBuffer buf = new StringBuffer ("blacklist "); 413 buf.append(blacklistStr).append("\n"); 414 buf.append("Regex blacklist ").append(blacklistRegex); 415 return buf.toString(); 416 } 417 } 418 | Popular Tags |