KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > snapper > wrapper > lucene > FileDocument


1 package org.enhydra.snapper.wrapper.lucene;
2
3
4 /**
5  */

6
7 import java.io.File JavaDoc;
8 import java.text.SimpleDateFormat JavaDoc;
9 import java.util.Date JavaDoc;
10
11 import org.apache.lucene.document.Document;
12 import org.apache.lucene.document.Field;
13
14
15 /** A utility for making Lucene Documents from a File. */
16
17 public class FileDocument {
18   /** Makes a document for a File.
19     <p>
20     The document has three fields:
21     <ul>
22     <li><code>path</code>--containing the pathname of the file, as a stored,
23     tokenized field;
24     <li><code>modified</code>--containing the last modified date of the file as
25     a keyword field as encoded by <a
26     href="lucene.document.DateField.html">DateField</a>; and
27     <li><code>contents</code>--containing the full contents of the file, as a
28     Reader field;
29  * @param properties : Custom properties of Word documents
30  * @param metadata : Metadata from an external DB - if exists
31     */

32   public static Document Document(File JavaDoc f, String JavaDoc contents, String JavaDoc type, String JavaDoc title, int maxSize, String JavaDoc properties, String JavaDoc metadata)
33        throws java.io.FileNotFoundException JavaDoc {
34      
35     
36     // make a new, empty document
37
Document doc = new Document();
38
39     // Add the path of the file as a field named "path". Use a Text field, so
40
// that the index stores the path, and so that the path is searchable
41
doc.add(Field.Text("path", f.getPath()));
42     String JavaDoc str = f.getPath();
43     str = replace(str, "\\",".");
44     doc.add(Field.Keyword("pt", str));
45
46     // Add the last modified date of the file a field named "modified". Use a
47
// Keyword field, so that it's searchable, but so that no attempt is made
48
// to tokenize the field into words.
49
// doc.add(Field.Keyword("modified",
50
// DateField.timeToString(f.lastModified())));
51

52  // doc.add(Field.Keyword("type", type));
53
Field typeField = Field.Keyword("type", type);
54     float bostFactor1 = 0.1f;
55     typeField.setBoost(bostFactor1);
56     doc.add(typeField);
57     
58     doc.add(Field.Text("title", title));
59     doc.add(Field.UnIndexed("fileName", f.getName()));
60     doc.add(Field.Text("properties", properties));
61     doc.add(Field.Text("metadata", metadata));
62
63   // doc.add(Field.Keyword("modified", new Date(f.lastModified())));
64

65     SimpleDateFormat JavaDoc formatter = new SimpleDateFormat JavaDoc("yyyyMMdd");
66     Date JavaDoc last = new Date JavaDoc(f.lastModified());
67     String JavaDoc dateString = formatter.format(last);
68     doc.add(Field.Keyword("modified", dateString));
69   
70     // doc.add(Field.Keyword("modifiedNew", new Date(f.lastModified())));
71

72     // Add the contents of the file a field named "contents". Use a Text
73
// field, specifying a Reader, so that the text of the file is tokenized.
74
// ?? why doesn't FileReader work here ??
75
//FileInputStream is = new FileInputStream(f);
76
//Reader reader = new BufferedReader(new InputStreamReader(is));
77
String JavaDoc fullcontents = "";
78     if (contents.length() > maxSize){
79         fullcontents = contents.substring(0,maxSize);
80     }
81     else fullcontents = contents;
82     doc.add(Field.UnStored("contents", contents));
83     doc.add(Field.Text("fullcontents", fullcontents));
84
85     // return the document
86
return doc;
87   }
88   
89   
90   public static Document Document(long timestamp, String JavaDoc path, String JavaDoc contents, String JavaDoc type, String JavaDoc title, int maxSize, String JavaDoc properties, String JavaDoc metadata, String JavaDoc fileName)
91   throws java.io.FileNotFoundException JavaDoc {
92
93         // make a new, empty document
94
Document doc = new Document();
95         
96         // Add the path of the file as a field named "path". Use a Text field, so
97
// that the index stores the path, and so that the path is searchable
98
doc.add(Field.Text("path", path));
99         String JavaDoc str = path;
100         str = replace(str, "\\",".");
101         doc.add(Field.Keyword("pt", str));
102         
103         // Add the last modified date of the file a field named "modified". Use a
104
// Keyword field, so that it's searchable, but so that no attempt is made
105
// to tokenize the field into words.
106

107         SimpleDateFormat JavaDoc formatter = new SimpleDateFormat JavaDoc("yyyyMMdd");
108         Date JavaDoc last = new Date JavaDoc(timestamp);
109         String JavaDoc dateString = formatter.format(last);
110         doc.add(Field.Keyword("modified", dateString));
111         
112         Field typeField = Field.Keyword("type", type);
113         float bostFactor1 = 0.1f;
114         typeField.setBoost(bostFactor1);
115         doc.add(typeField);
116         //doc.add(Field.Keyword("type", type));
117
doc.add(Field.Text("title", title));
118         doc.add(Field.Text("metadata", metadata));
119         
120         // Add the contents of the file a field named "contents". Use a Text
121
// field, specifying a Reader, so that the text of the file is tokenized.
122
// ?? why doesn't FileReader work here ??
123
//FileInputStream is = new FileInputStream(f);
124
//Reader reader = new BufferedReader(new InputStreamReader(is));
125
String JavaDoc fullcontents = "";
126         if (contents.length() > maxSize){
127             fullcontents = contents.substring(0,maxSize);
128         }
129         else fullcontents = contents;
130         doc.add(Field.UnStored("contents", contents));
131         doc.add(Field.Text("properties", properties));
132         doc.add(Field.Text("fullcontents", fullcontents));
133         doc.add(Field.UnIndexed("fileName", fileName));
134
135         
136         // return the document
137
return doc;
138         }
139
140   private FileDocument() {}
141   
142   static String JavaDoc replace(String JavaDoc s, String JavaDoc one, String JavaDoc another) {
143 // In a string replace one substring with another
144
if (s.equals("")) return "";
145    String JavaDoc res = "";
146    int i = s.indexOf(one,0);
147    int lastpos = 0;
148    while (i != -1) {
149      res += s.substring(lastpos,i) + another;
150      lastpos = i + one.length();
151      i = s.indexOf(one,lastpos);
152    }
153    res += s.substring(lastpos); // the rest
154
return res;
155  }
156 }
157     
158
Popular Tags