KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > datamodel > CrawlURITest


1 /* CrawlURITest
2  *
3  * Created on Jul 26, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.datamodel;
24
25 import java.io.File JavaDoc;
26 import java.io.FileInputStream JavaDoc;
27 import java.io.FileOutputStream JavaDoc;
28 import java.io.IOException JavaDoc;
29 import java.io.ObjectInputStream JavaDoc;
30 import java.io.ObjectOutputStream JavaDoc;
31
32 import org.apache.commons.httpclient.URIException;
33 import org.archive.net.UURIFactory;
34 import org.archive.util.TmpDirTestCase;
35
36 /**
37  * @author stack
38  * @version $Revision: 1.11 $, $Date: 2005/08/29 21:52:36 $
39  */

40 public class CrawlURITest extends TmpDirTestCase {
41     
42     CrawlURI seed = null;
43     
44     protected void setUp() throws Exception JavaDoc {
45         super.setUp();
46         final String JavaDoc url = "http://www.dh.gov.uk/Home/fs/en";
47         this.seed = new CrawlURI(UURIFactory.getInstance(url));
48         this.seed.setSchedulingDirective(CandidateURI.MEDIUM);
49         this.seed.setIsSeed(true);
50         // Force caching of string.
51
this.seed.toString();
52         // TODO: should this via really be itself?
53
this.seed.setVia(UURIFactory.getInstance(url));
54     }
55
56     /**
57      * Test serialization/deserialization works.
58      *
59      * @throws IOException
60      * @throws ClassNotFoundException
61      */

62     final public void testSerialization()
63             throws IOException JavaDoc, ClassNotFoundException JavaDoc {
64         File JavaDoc serialize = new File JavaDoc(getTmpDir(),
65             this.getClass().getName() + ".serialize");
66         try {
67             FileOutputStream JavaDoc fos = new FileOutputStream JavaDoc(serialize);
68             ObjectOutputStream JavaDoc oos = new ObjectOutputStream JavaDoc(fos);
69             oos.writeObject(this.seed);
70             oos.reset();
71             oos.writeObject(this.seed);
72             oos.reset();
73             oos.writeObject(this.seed);
74             oos.close();
75             // Read in the object.
76
FileInputStream JavaDoc fis = new FileInputStream JavaDoc(serialize);
77             ObjectInputStream JavaDoc ois = new ObjectInputStream JavaDoc(fis);
78             CrawlURI deserializedCuri = (CrawlURI)ois.readObject();
79             deserializedCuri = (CrawlURI)ois.readObject();
80             deserializedCuri = (CrawlURI)ois.readObject();
81             assertTrue("Deserialized not equal to original",
82                 this.seed.toString().equals(deserializedCuri.toString()));
83             String JavaDoc host = this.seed.getUURI().getHost();
84             assertTrue("Deserialized host not null",
85                 host != null && host.length() >= 0);
86         } finally {
87             serialize.delete();
88         }
89     }
90     
91     public void testCandidateURIWithLoadedAList()
92     throws URIException {
93         CandidateURI c = CandidateURI.
94             createSeedCandidateURI(UURIFactory.
95                 getInstance("http://www.archive.org"));
96         c.putString("key", "value");
97         CrawlURI curi = new CrawlURI(c, 0);
98         assertTrue("Didn't find AList item",
99             curi.getString("key").equals("value"));
100     }
101     
102 // TODO: move to QueueAssignmentPolicies
103
// public void testCalculateClassKey() throws URIException {
104
// final String uri = "http://mprsrv.agri.gov.cn";
105
// CrawlURI curi = new CrawlURI(UURIFactory.getInstance(uri));
106
// String key = curi.getClassKey();
107
// assertTrue("Key1 is bad " + key,
108
// key.equals(curi.getUURI().getAuthorityMinusUserinfo()));
109
// final String baduri = "ftp://pfbuser:pfbuser@mprsrv.agri.gov.cn/clzreceive/";
110
// curi = new CrawlURI(UURIFactory.getInstance(baduri));
111
// key = curi.getClassKey();
112
// assertTrue("Key2 is bad " + key,
113
// key.equals(curi.getUURI().getAuthorityMinusUserinfo()));
114
// }
115
}
116
Popular Tags