KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > websphinx > Classifier


1 /*
2  * WebSphinx web-crawling toolkit
3  *
4  * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
5  * reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in
16  * the documentation and/or other materials provided with the
17  * distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  */

32
33 package websphinx;
34 /**
35  * Classifier interface. A classifier is a helper object that annotates
36  * pages and links with labels (using Page.setLabel() and Link.setLabel()).
37  * When a page is retrieved by a crawler, it is passed to the classify()
38  * method of every Classifier registered with the crawler. Here are some
39  * typical uses for classifiers:
40  * <UL>
41  * <LI> classifying links into categories like child or parent (see
42  * websphinx.StandardClassifier);
43  * <LI> classifying pages into categories like biology or computers;
44  * <LI> recognizing and parsing pages formatted in a particular style, such as
45  * AltaVista, Yahoo, or latex2html (e.g., the search engine classifiers
46  * in websphinx.searchengine)
47  * <LI>
48  * </UL>
49  */

50 public interface Classifier
51 //#ifdef JDK1.1
52
extends java.io.Serializable JavaDoc
53 //#endif JDK1.1
54
{
55     /**
56      * Classify a page. Typically, the classifier calls page.setLabel() and
57      * page.setField() to mark up the page. The classifier may also look
58      * through the page's links and call link.setLabel() to mark them up.
59      * @param page Page to classify
60      */

61     public abstract void classify (Page page);
62     
63     /**
64      * Get priority of this classifier. Lower priorities execute first.
65      * A classifier should also define a public constant <CODE>priority</CODE>
66      * so that classifiers that depend on it can compute their
67      * priorities statically. For example, if your classifier
68      * depends on FooClassifier and BarClassifier, you might set your
69      * priority as:
70      * <PRE>
71      * public static final float priority = Math.max (FooClassifier, BarClassifier) + 1;
72      * public float getPriority () { return priority; }
73      * </PRE>
74      *
75      * @return priority of this classifier
76      */

77     public float getPriority ();
78 }
79
Popular Tags