1 23 package org.archive.crawler.url.canonicalize; 24 25 import java.util.regex.Pattern ; 26 27 28 33 public class StripSessionCFIDs 34 extends BaseRule { 35 36 private static final long serialVersionUID = 9122689291157731293L; 37 38 private static final String REGEX = "^(.+)" + 39 "(?:cfid=[^&]+&cftoken=[^&]+(?:jsession=[^&]+)?)(?:&(.*))?$"; 40 41 private static final String DESCRIPTION = "Strip ColdFusion session IDs. " + 42 "Use this rule to remove sessionids that look like the following: " + 43 "CFID=12412453&CFTOKEN=15501799 or " + 44 "CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A " + 45 "using the following case-insensitive regex: " + REGEX; 46 47 58 private static final Pattern COLDFUSION_PATTERN = 59 Pattern.compile(REGEX, Pattern.CASE_INSENSITIVE); 60 61 62 public StripSessionCFIDs(String name) { 63 super(name, DESCRIPTION); 64 } 65 66 public String canonicalize(String url, Object context) { 67 return doStripRegexMatch(url, COLDFUSION_PATTERN.matcher(url)); 68 } 69 } | Popular Tags |