KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > db > IWebDBWriter


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.db;
5
6 import java.io.*;
7 import java.util.*;
8
9 import net.nutch.pagedb.*;
10 import net.nutch.linkdb.*;
11
12 /******************************************
13  * IWebDBWriter is an interface to the consolidated
14  * page/link database. It permits certain kinds of
15  * operations.
16  *
17  * This database may be implemented in several different
18  * ways (single or muli-pass, single-machine or distributed).
19  * The user of this interface has no idea which one is
20  * being used. They all commit to the IWebDBWriter contract.
21  *
22  * Note that changes to a webdb are finalized upon the call
23  * to close(). Before the call to close() returns, any
24  * readers of the database should see the db in a pristine
25  * pre-write state.
26  *
27  * @author Mike Cafarella
28  ******************************************/

29 public interface IWebDBWriter {
30     /**
31      * Flush and complete all writes to the db.
32      */

33     public void close() throws IOException;
34
35     /**
36      * addPage(Page page) will insert a Page object into the webdb.
37      * If the Page already exists, the existing one will be overwritten.
38      * (Except for the link analysis score, which we try to attach to
39      * a given URL. If an existing Page is overwritten, we will retain
40      * the link score.)
41      *
42      * Page objects are uniquified by their URLs. It's fine to have
43      * many Pages with different URLs but identical MD5s. (Indeed,
44      * that happens all the time with duplicated pages.) But every
45      * Page in the db must have its own URL.
46      */

47     public void addPage(Page page) throws IOException;
48
49     /**
50      * addPageWithScore(Page page) inserts a Page into the webdb.
51      * It works just like the above function, except that link scores
52      * are not preserved if the inserted object already exists. The
53      * inserted object's score will replace one that may already be there.
54      *
55      * This function is useful for the Link Analysis program.
56      */

57     public void addPageWithScore(Page page) throws IOException;
58
59     /**
60      * addPageIfNotPresent(Page) works just like addPage(), except that
61      * the insertion will not take place if there is already a Page with
62      * that URL in the webdb. In that case, the call to addPage() is
63      * simply ignored.
64      */

65     public void addPageIfNotPresent(Page page) throws IOException;
66
67     /**
68      * addPageIfNotPresent(Page, Link) works just like the above addPage(),
69      * except that a Link is also conditionally added to the webdb.
70      */

71     public void addPageIfNotPresent(Page page, Link link) throws IOException;
72
73     /**
74      * deletePage(url) will remove a Page object from the db with the
75      * given URL. Fails silently if there is no Page with the given URL.
76      */

77     public void deletePage(String JavaDoc url) throws IOException;
78
79     /**
80      * addLink(Link) will add the given Link to the webdb. If the
81      * Link already exists, the existing one will be overwritten.
82      *
83      * Links are uniquified by both source MD5 and target URL.
84      * Two Links are considered identical only if they match both
85      * fields.
86      *
87      * Links are only permitted in the webdb if they have a valid
88      * source MD5 for a Page that is also in the webdb. When a
89      * Page is removed, the webdb will automatically remove Links
90      * as appropriate.
91      *
92      * (Note that since there can be multiple URLs with identical
93      * content, the webdb basically needs to do reference-counting
94      * for each Link's source-MD5.)
95      */

96     public void addLink(Link link) throws IOException;
97 }
98
Popular Tags