KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > selftest > CharsetSelfTest


1 /* CharsetSelfTest
2  *
3  * Created on Mar 10, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.selftest;
24
25 import java.io.File JavaDoc;
26 import java.util.Arrays JavaDoc;
27 import java.util.List JavaDoc;
28
29 /**
30  * Simple test to ensure we can extract links from multibyte pages.
31  *
32  * That is, can we regex over a multibyte stream.
33  *
34  * @author stack
35  * @version $Revision: 1.6.28.1 $, $Date: 2007/01/13 01:31:26 $
36  */

37 public class CharsetSelfTest extends SelfTestCase
38 {
39     /**
40      * Files to find as a list.
41      */

42     private static final List JavaDoc<File JavaDoc> FILES_TO_FIND =
43         Arrays.asList(new File JavaDoc[]
44             {new File JavaDoc("utf8.jsp"),
45                 new File JavaDoc("shiftjis.jsp"),
46                 new File JavaDoc("charsetselftest_end.html")});
47
48     /**
49      * Look for last file in link chain.
50      *
51      * The way the pages are setup under the CharsetSelfTest directory under
52      * the webapp is that we have one multibyte page w/ a single link buried in
53      * it that points off to another multibyte page. On the end of the link
54      * chain is a page named END_OF_CHAIN_PAGE. This test looks to see that
55      * arc has all pages in the chain.
56      */

57     public void testCharset()
58     {
59         testFilesInArc(FILES_TO_FIND);
60     }
61 }
62
Popular Tags