KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > url > canonicalize > StripSessionCFIDs


1 /* $Id: StripSessionCFIDs.java,v 1.1.2.1 2007/01/13 01:31:28 stack-sf Exp $
2  *
3  * Created on September 1st, 2006
4  *
5  * Copyright (C) 2006 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.url.canonicalize;
24
25 import java.util.regex.Pattern JavaDoc;
26
27
28 /**
29  * Strip cold fusion session ids.
30  * @author stack
31  * @version $Date: 2007/01/13 01:31:28 $, $Revision: 1.1.2.1 $
32  */

33 public class StripSessionCFIDs
34 extends BaseRule {
35
36     private static final long serialVersionUID = 9122689291157731293L;
37
38     private static final String JavaDoc REGEX = "^(.+)" +
39         "(?:cfid=[^&]+&cftoken=[^&]+(?:jsession=[^&]+)?)(?:&(.*))?$";
40     
41     private static final String JavaDoc DESCRIPTION = "Strip ColdFusion session IDs. " +
42         "Use this rule to remove sessionids that look like the following: " +
43         "CFID=12412453&CFTOKEN=15501799 or " +
44         "CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A " +
45         "using the following case-insensitive regex: " + REGEX;
46         
47     /**
48      * Examples:
49      * <pre>
50      * Examples:
51      * boo?CFID=1169580&CFTOKEN=48630702&dtstamp=22%2F08%2F2006%7C06%3A58%3A11
52      * boo?CFID=12412453&CFTOKEN=15501799&dt=19_08_2006_22_39_28
53      * boo?CFID=14475712&CFTOKEN=2D89F5AF-3048-2957-DA4EE4B6B13661AB&r=468710288378&m=forgotten
54      * boo?CFID=16603925&CFTOKEN=2AE13EEE-3048-85B0-56CEDAAB0ACA44B8&r=501652357733&l1=home
55      * boo?CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A
56      * </pre>
57      */

58     private static final Pattern JavaDoc COLDFUSION_PATTERN =
59         Pattern.compile(REGEX, Pattern.CASE_INSENSITIVE);
60     
61
62     public StripSessionCFIDs(String JavaDoc name) {
63         super(name, DESCRIPTION);
64     }
65
66     public String JavaDoc canonicalize(String JavaDoc url, Object JavaDoc context) {
67         return doStripRegexMatch(url, COLDFUSION_PATTERN.matcher(url));
68     }
69 }
Popular Tags