KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > selftest > BadURIsStopPageParsingSelfTest


1 /* BadURIsStopPageParsingSelfTest
2  *
3  * Created on Mar 10, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.selftest;
24
25 import java.io.File JavaDoc;
26 import java.util.ArrayList JavaDoc;
27 import java.util.Arrays JavaDoc;
28 import java.util.Iterator JavaDoc;
29 import java.util.List JavaDoc;
30
31 /**
32  * Selftest for figuring problems parsing URIs in a page.
33  *
34  * @author stack
35  * @see <a
36  * HREF="https://sourceforge.net/tracker/?func=detail&aid=788219&group_id=73833&atid=539099">[ 788219 ]
37  * URI Syntax Errors stop page parsing.</a>
38  * @version $Revision: 1.2.26.1 $, $Date: 2007/01/13 01:31:26 $
39  */

40 public class BadURIsStopPageParsingSelfTest extends SelfTestCase
41 {
42     /**
43      * Files to find as a list.
44      *
45      * We don't find goodtwo.html because it has a BASE that is out
46      * of scope.
47      */

48     private static final List JavaDoc<File JavaDoc> FILES_TO_FIND =
49         Arrays.asList(new File JavaDoc[]
50             {new File JavaDoc("goodone.html"),
51                 new File JavaDoc("goodthree.html"),
52                 new File JavaDoc("one.html"),
53                 new File JavaDoc("two.html"),
54                 new File JavaDoc("three.html")});
55
56     public void testFilesFound() {
57         List JavaDoc<File JavaDoc> foundFiles = filesFoundInArc();
58         ArrayList JavaDoc<File JavaDoc> editedFoundFiles
59          = new ArrayList JavaDoc<File JavaDoc>(foundFiles.size());
60         for (Iterator JavaDoc i = foundFiles.iterator(); i.hasNext();) {
61             File JavaDoc f = (File JavaDoc)i.next();
62             if (f.getAbsolutePath().endsWith("polishex.html")) {
63                 // There is a URI in our list with the above as suffix. Its in
64
// the arc as a 404. Remove it. It doesn't exist on disk so it
65
// will cause the below testFilesInArc to fail.
66
continue;
67             }
68             editedFoundFiles.add(f);
69         }
70         testFilesInArc(FILES_TO_FIND, editedFoundFiles);
71     }
72 }
73
Popular Tags