KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > contineo > searchengine > crawler > LuceneDocument


1 package org.contineo.searchengine.crawler;
2
3 import java.io.File JavaDoc;
4 import org.apache.lucene.document.Document;
5 import org.apache.lucene.document.Field;
6 import org.contineo.documan.Keywords;
7
8 /**
9  * Created on 5. November 2003, 16:53
10  * @author Michael Scholz
11  */

12 public class LuceneDocument {
13     
14     private File JavaDoc file = null;
15
16     /**
17      * @uml.property name="doc"
18      * @uml.associationEnd
19      */

20     private Document doc;
21
22     /**
23      * @uml.property name="content"
24      */

25     private String JavaDoc content = "";
26
27     /**
28      *
29      * @uml.property name="document"
30      * @uml.associationEnd
31      * @uml.property name="document" multiplicity="(1 1)"
32      */

33     private org.contineo.documan.Document document = new org.contineo.documan.Document();
34
35     
36     /** Creates a new instance of LuceneDocument */
37     public LuceneDocument(org.contineo.documan.Document d) {
38         document = d;
39     }
40
41     /**
42      * Builds a lucene compatible document of a file.
43      * The document contains 7 Fields:
44      * name - name of the document
45      * size - size of the document in bytes
46      * path - path of the document for calling it on the web browser (e.g DocFrame.do?menuid=1)
47      * type - file format (e.g pdf, sxw)
48      * date - date of creation
49      * content - full text of the document
50      * summary - first 500 letters of the content
51      * @param f - File of which the document should be built.
52      * @return
53      */

54     public Document getDocument(File JavaDoc f, StringBuffer JavaDoc content) {
55         file = f;
56         doc = new Document();
57         setDocId();
58         setName();
59         setSize();
60         setPath();
61         setDocData();
62         setType();
63         setDate();
64         setContent(content);
65         setSummary();
66         setKeywords();
67         return doc;
68     }
69     
70     public void setDocId() {
71         doc.add(Field.UnIndexed("docid", String.valueOf(document.getDocId())));
72     }
73
74     /**
75      * Returns the content of the indexed document.
76      * @return
77      *
78      * @uml.property name="content"
79      */

80     public String JavaDoc getContent() {
81         return content;
82     }
83
84     
85     protected void setName() {
86         doc.add(Field.Text("name", document.getDocName()));
87     }
88     
89     protected void setSize() {
90         String JavaDoc size = String.valueOf(file.length()/1024);
91         doc.add(Field.Keyword("size",size));
92     }
93     
94     protected void setPath() {
95         String JavaDoc menuid = String.valueOf(document.getMenuId());
96         String JavaDoc path = "DocFrame.do?menuid=" + menuid;
97         doc.add(Field.Keyword("menuid",menuid));
98         doc.add(Field.UnIndexed("path",path));
99     }
100     
101     protected void setDocData() {
102         doc.add(Field.UnStored("source", document.getSource()));
103         doc.add(Field.UnStored("sourceauthor", document.getSourceAuthor()));
104         doc.add(Field.UnStored("sourcetype", document.getSourceType()));
105         doc.add(Field.UnStored("coverage", document.getCoverage()));
106     }
107     
108     protected void setType() {
109         int point = file.getName().lastIndexOf(".");
110         String JavaDoc type = file.getName().substring(point + 1);
111         type = type.toUpperCase();
112         doc.add(Field.Keyword("type",type));
113     }
114     
115     protected void setDate() {
116         long date = file.lastModified();
117         doc.add(Field.Keyword("date", String.valueOf(date)));
118     }
119     
120     protected void setContent(StringBuffer JavaDoc content) {
121         doc.add(Field.Text("content", content.toString()));
122         doc.add(Field.UnIndexed("length", String.valueOf(content.length())));
123     }
124     
125     protected void setSummary() {
126         int summarysize = Math.min(content.length(),500);
127         String JavaDoc summary = content.substring(0,summarysize);
128         doc.add(Field.Text("summary",summary));
129     }
130     
131     protected void setKeywords() {
132         Keywords kw = new Keywords();
133         doc.add(Field.Text("keywords", kw.toString(document.getKeywords())));
134     }
135 }
136
Popular Tags