KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > db > IWebDBReader


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.db;
5
6 import java.io.*;
7 import java.util.*;
8
9 import net.nutch.io.*;
10 import net.nutch.pagedb.*;
11 import net.nutch.linkdb.*;
12
13 /**********************************************
14  * IWebDBReader is an interface to the consolidated
15  * page/link database. It permits all kind of read-only ops.
16  *
17  * This database may be implemented in several different
18  * ways, which this interface hides from its user.
19  *
20  * @author Mike Cafarella
21  **********************************************/

22 public interface IWebDBReader {
23     /**
24      * Done reading. Release a handle on the db.
25      */

26     public void close() throws IOException;
27     
28     /**
29      * Return a Page object with the given URL, if any.
30      * Pages are guaranteed to be unique by URL, so there
31      * can be max. 1 returned object.
32      */

33     public Page getPage(String JavaDoc url) throws IOException;
34
35     /**
36      * Return any Pages with the given MD5 checksum. Pages
37      * with different URLs often have identical checksums; this
38      * can happen if the content has been copied, or a site
39      * is available under several different URLs.
40      */

41     public Page[] getPages(MD5Hash md5) throws IOException;
42
43     /**
44      * Returns whether a Page with the given MD5 checksum is in the db.
45      */

46     public boolean pageExists(MD5Hash md5) throws IOException;
47
48     /**
49      * Obtain an Enumeration of all Page objects, sorted by URL
50      */

51     public Enumeration pages() throws IOException;
52
53     /**
54      * Obtain an Enumeration of all Page objects, sorted by MD5.
55      */

56     public Enumeration pagesByMD5() throws IOException;
57
58     /**
59      * Simple count of all Page objects in db.
60      */

61     public long numPages();
62
63     /**
64      * Return any Link objects that point to the given URL. This
65      * array can be very large if the given URL has lots of incoming
66      * Links. So large, in fact, that this method call will probably
67      * kill the process for certain URLs.
68      */

69     public Link[] getLinks(UTF8 url) throws IOException;
70
71     /**
72      * Return all the Link objects that originate from a document
73      * with the given MD5 checksum. These will be the outlinks for
74      * the page of content described.
75      */

76     public Link[] getLinks(MD5Hash md5) throws IOException;
77
78     /**
79      * Obtain an Enumeration of all Link objects, sorted by target
80      * URL.
81      */

82     public Enumeration links() throws IOException;
83
84     /**
85      * Simple count of all Link objects in db.
86      */

87     public long numLinks();
88 }
89
Popular Tags