KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > url > canonicalize > RegexRuleTest


1 /* RegexRuleTest
2  *
3  * Created on Oct 6, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.url.canonicalize;
24
25 import java.io.File JavaDoc;
26
27 import javax.management.InvalidAttributeValueException JavaDoc;
28
29 import org.apache.commons.httpclient.URIException;
30 import org.archive.crawler.datamodel.CrawlOrder;
31 import org.archive.crawler.settings.MapType;
32 import org.archive.crawler.settings.XMLSettingsHandler;
33 import org.archive.net.UURIFactory;
34 import org.archive.util.TmpDirTestCase;
35
36
37 /**
38  * Test the regex rule.
39  * @author stack
40  * @version $Date: 2005/07/18 17:30:10 $, $Revision: 1.6 $
41  */

42 public class RegexRuleTest extends TmpDirTestCase {
43     private File JavaDoc orderFile;
44     protected XMLSettingsHandler settingsHandler;
45     private MapType rules = null;
46     
47     protected void setUp() throws Exception JavaDoc {
48         super.setUp();
49         this.orderFile = new File JavaDoc(getTmpDir(), this.getClass().getName() +
50             ".order.xml");
51         this.settingsHandler = new XMLSettingsHandler(orderFile);
52         this.settingsHandler.initialize();
53         this.rules = (MapType)(settingsHandler.getSettingsObject(null)).
54             getModule(CrawlOrder.ATTR_NAME).
55                getAttribute(CrawlOrder.ATTR_RULES);
56     }
57     
58     public void testCanonicalize()
59     throws URIException, InvalidAttributeValueException JavaDoc {
60         final String JavaDoc url = "http://www.aRchive.Org/index.html";
61         RegexRule rr = new RegexRule("Test " + this.getClass().getName());
62         this.rules.addElement(null, rr);
63         rr.canonicalize(url, UURIFactory.getInstance(url));
64         String JavaDoc product = rr.canonicalize(url, null);
65         assertTrue("Default doesn't work.", url.equals(product));
66     }
67
68     public void testSessionid()
69     throws InvalidAttributeValueException JavaDoc {
70         final String JavaDoc urlBase = "http://joann.com/catalog.jhtml";
71         final String JavaDoc urlMinusSessionid = urlBase + "?CATID=96029";
72         final String JavaDoc url = urlBase +
73             ";$sessionid$JKOFFNYAAKUTIP4SY5NBHOR50LD3OEPO?CATID=96029";
74         RegexRule rr = new RegexRule("Test",
75             "^(.+)(?:;\\$sessionid\\$[A-Z0-9]{32})(\\?.*)+$",
76             "$1$2");
77         this.rules.addElement(null, rr);
78         String JavaDoc product = rr.canonicalize(url, null);
79         assertTrue("Failed " + url, urlMinusSessionid.equals(product));
80     }
81     
82     public void testNullFormat()
83     throws InvalidAttributeValueException JavaDoc {
84         final String JavaDoc urlBase = "http://joann.com/catalog.jhtml";
85         final String JavaDoc url = urlBase +
86             ";$sessionid$JKOFFNYAAKUTIP4SY5NBHOR50LD3OEPO";
87         RegexRule rr = new RegexRule("Test",
88             "^(.+)(?:;\\$sessionid\\$[A-Z0-9]{32})$",
89             "$1$2");
90         this.rules.addElement(null, rr);
91         String JavaDoc product = rr.canonicalize(url, null);
92         assertTrue("Failed " + url, urlBase.equals(product));
93     }
94 }
95
Popular Tags