KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > datamodel > RobotstxtTest


1 /* RobotstxtTest
2  *
3  * $Id: RobotstxtTest.java,v 1.2.14.1 2007/01/13 01:31:12 stack-sf Exp $
4  *
5  * Created Sep 1, 2005
6  *
7  * Copyright (C) 2005 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.datamodel;
26
27 import java.io.BufferedReader JavaDoc;
28 import java.io.IOException JavaDoc;
29 import java.io.StringReader JavaDoc;
30 import java.util.HashMap JavaDoc;
31 import java.util.LinkedList JavaDoc;
32 import java.util.List JavaDoc;
33
34 import junit.framework.TestCase;
35
36 public class RobotstxtTest extends TestCase {
37     public void testParseRobots() throws IOException JavaDoc {
38         LinkedList JavaDoc<String JavaDoc> userAgents = new LinkedList JavaDoc<String JavaDoc>();
39         HashMap JavaDoc<String JavaDoc,List JavaDoc<String JavaDoc>> disallows
40          = new HashMap JavaDoc<String JavaDoc,List JavaDoc<String JavaDoc>>();
41         BufferedReader JavaDoc reader = new BufferedReader JavaDoc(new StringReader JavaDoc("BLAH"));
42         assertFalse(Robotstxt.parse(reader, userAgents, disallows));
43         assertTrue(disallows.size() == 0);
44         // Parse archive robots.txt with heritrix agent.
45
String JavaDoc agent = "archive.org_bot";
46         reader = new BufferedReader JavaDoc(
47             new StringReader JavaDoc("User-agent: " + agent + "\n" +
48             "Disallow: /cgi-bin/\n" +
49             "Disallow: /details/software\n"));
50         assertFalse(Robotstxt.parse(reader, userAgents, disallows));
51         assertTrue(disallows.size() == 1);
52         assertTrue(userAgents.size() == 1);
53         assertEquals(userAgents.get(0), agent);
54         // Parse archive robots.txt with star agent.
55
agent = "*";
56         reader = new BufferedReader JavaDoc(
57             new StringReader JavaDoc("User-agent: " + agent + "\n" +
58             "Disallow: /cgi-bin/\n" +
59             "Disallow: /details/software\n"));
60         disallows = new HashMap JavaDoc<String JavaDoc,List JavaDoc<String JavaDoc>>();
61         userAgents = new LinkedList JavaDoc<String JavaDoc>();
62         assertFalse(Robotstxt.parse(reader, userAgents, disallows));
63         assertTrue(disallows.size() == 1);
64         assertTrue(userAgents.size() == 1);
65         assertEquals(userAgents.get(0), "");
66     }
67 }
68
Popular Tags