KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lenya > lucene > index > IndexIterator


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17
18 /* $Id: IndexIterator.java 153519 2005-02-12 17:43:15Z gregor $ */
19
20 package org.apache.lenya.lucene.index;
21
22 import java.io.File JavaDoc;
23 import java.io.FileFilter JavaDoc;
24 import java.io.IOException JavaDoc;
25 import java.text.DateFormat JavaDoc;
26 import java.text.SimpleDateFormat JavaDoc;
27 import java.util.ArrayList JavaDoc;
28 import java.util.Arrays JavaDoc;
29 import java.util.Collections JavaDoc;
30 import java.util.Date JavaDoc;
31 import java.util.HashMap JavaDoc;
32 import java.util.Iterator JavaDoc;
33 import java.util.List JavaDoc;
34 import java.util.Map JavaDoc;
35
36 import org.apache.log4j.Category;
37 import org.apache.lucene.document.DateField;
38 import org.apache.lucene.index.IndexReader;
39 import org.apache.lucene.index.Term;
40 import org.apache.lucene.index.TermEnum;
41
42 public class IndexIterator {
43     
44     private static Category log = Category.getInstance(IndexIterator.class);
45     
46     /**
47      * Creates a new instance of IndexItertor
48      */

49     public IndexIterator(String JavaDoc index, FileFilter JavaDoc filter) {
50         this.filter = filter;
51         this.index = index;
52     }
53
54     private String JavaDoc index;
55
56     protected String JavaDoc getIndex() {
57         return index;
58     }
59
60     private FileFilter JavaDoc filter;
61
62     /**
63      * @return FileFilter
64      */

65     protected FileFilter JavaDoc getFilter() {
66         return filter;
67     }
68
69     private List JavaDoc handlers = new ArrayList JavaDoc();
70
71     /**
72      * DOCUMENT ME!
73      *
74      * @param handler DOCUMENT ME!
75      */

76     public void addHandler(IndexIteratorHandler handler) {
77         if (!handlers.contains(handler)) {
78             handlers.add(handler);
79         }
80     }
81
82     protected void handleFile(File JavaDoc file) {
83         for (Iterator JavaDoc i = handlers.iterator(); i.hasNext();) {
84             IndexIteratorHandler handler = (IndexIteratorHandler) i.next();
85             handler.handleFile(getReader(), file);
86         }
87     }
88
89     protected void handleStaleDocument(Term term) {
90         for (Iterator JavaDoc i = handlers.iterator(); i.hasNext();) {
91             IndexIteratorHandler handler = (IndexIteratorHandler) i.next();
92             handler.handleStaleDocument(getReader(), term);
93         }
94     }
95
96     protected void handleUnmodifiedDocument(Term term, File JavaDoc file) {
97         for (Iterator JavaDoc i = handlers.iterator(); i.hasNext();) {
98             IndexIteratorHandler handler = (IndexIteratorHandler) i.next();
99             handler.handleUnmodifiedDocument(getReader(), term, file);
100         }
101     }
102
103     protected void handleNewDocument(Term term, File JavaDoc file) {
104         for (Iterator JavaDoc i = handlers.iterator(); i.hasNext();) {
105             IndexIteratorHandler handler = (IndexIteratorHandler) i.next();
106             handler.handleNewDocument(getReader(), term, file);
107         }
108     }
109
110     private IndexReader reader;
111
112     protected IndexReader getReader() {
113         return reader;
114     }
115
116     /**
117      * Iterate over all files within directory
118      *
119      * @param dumpDirectory Directory over which shall be iterated
120      */

121     public void iterate(File JavaDoc dumpDirectory) {
122         log.info("Iterating files (" + dumpDirectory + ")");
123
124         try {
125             reader = IndexReader.open(getIndex());
126
127             TermEnum iterator = enumerateUIDs(getReader());
128
129         // TODO: Should be configurable
130
boolean sort = false;
131
132         if (sort) {
133                 File JavaDoc[] files = getFiles(dumpDirectory);
134
135                 for (int i = 0; i < files.length; i++) {
136                     iterateFiles(iterator, files[i], dumpDirectory);
137                 }
138             } else {
139                 log.debug("Do not sort files ...");
140                 traverse(iterator, dumpDirectory, dumpDirectory);
141             }
142
143             // iterate the rest of stale documents
144
while ((iterator.term() != null) && iterator.term().field().equals("uid")) {
145                 handleStaleDocument(iterator.term());
146                 iterator.next();
147             }
148
149             iterator.close();
150             reader.close();
151         } catch (IOException JavaDoc e) {
152             log.error(e);
153         }
154     }
155
156     /**
157      *
158      */

159     protected void iterateFiles(TermEnum iterator, File JavaDoc file, File JavaDoc dumpDirectory)
160         throws IOException JavaDoc {
161         String JavaDoc uid = createUID(file, dumpDirectory);
162         log.debug("-----------------------------------------------------");
163         log.debug("[file] file uid: " + uid2url(uid));
164
165         handleFile(file);
166
167         // handle all terms with a smaller uid than the modified file and delete their documents
168
while (isStale(iterator.term(), uid)) {
169             log.debug("[stale] term uid: " + uid2url(iterator.term().text()));
170             handleStaleDocument(iterator.term());
171             iterator.next();
172         }
173
174         // handle un-modified file
175
if (hasEqualUID(iterator.term(), uid)) {
176             log.debug("[unmod] term uid: " + uid2url(iterator.term().text()));
177             handleUnmodifiedDocument(iterator.term(), file);
178             iterator.next();
179         }
180         // handle new file
181
else {
182             if (iterator.term() != null) {
183                 log.debug("[new] term uid: " + uid2url(iterator.term().text()));
184                 handleNewDocument(iterator.term(), file);
185             }
186         }
187     }
188
189     /**
190      * Returns an term enumerator beginning with the first term that represents a UID field.
191      */

192     protected TermEnum enumerateUIDs(IndexReader reader) {
193         TermEnum tEnum = null;
194
195         try {
196             tEnum = reader.terms(new Term("uid", ""));
197         } catch (IOException JavaDoc e) {
198             log.error("Term enumeration failed: ", e);
199         }
200             
201         return tEnum;
202     }
203
204     /**
205      * Returns if the term is not null and decribes a UID field.
206      */

207     protected static boolean isUIDTerm(Term term) {
208         return (term != null) && term.field().equals("uid");
209     }
210
211     /**
212      * Returns <code>true</code> if the file described by uid has a bigger UID than the
213      * file described by the existing UID term.
214      */

215     protected static boolean isStale(Term term, String JavaDoc uid) {
216         return isUIDTerm(term) && (term.text().compareTo(uid) < 0);
217     }
218
219     /**
220      * Returns <code>true</code> if the file described by uid has the same UID as the
221      * file described by the existing UID term.
222      */

223     protected static boolean hasEqualUID(Term term, String JavaDoc uid) {
224         return isUIDTerm(term) && term.text().equals(uid);
225     }
226
227     /**
228      * Create a unique id
229      *
230      * @param file file to index
231      * @param dumpDir dump directory
232      *
233      * @return id
234      */

235     public static String JavaDoc createID(File JavaDoc file, File JavaDoc dumpDir) {
236         if (dumpDir.getPath().length() <= file.getPath().length()) {
237             String JavaDoc id = file.getPath().substring(dumpDir.getPath().length());
238             //id = id.replace(File.separatorChar, '\u0000'));
239
return id;
240         } else {
241             log.warn("Length of dumping directory is less than length of file name! Absolute path is being returned as id.");
242             return file.getAbsolutePath();
243         }
244     }
245
246     /**
247      * Append path and date into a string in such a way that lexicographic sorting gives the same
248      * results as a walk of the file hierarchy. Thus null () is used both to separate
249      * directory components and to separate the path from the date.
250      *
251      * @param file DOCUMENT ME!
252      * @param htdocsDumpDir DOCUMENT ME!
253      *
254      * @return DOCUMENT ME!
255      */

256     public static String JavaDoc createUID(File JavaDoc file, File JavaDoc htdocsDumpDir) {
257         String JavaDoc requestURI = file.getPath().substring(htdocsDumpDir.getPath().length());
258         String JavaDoc uid = requestURI.replace(File.separatorChar, '\u0000') + "\u0000" +
259             DateField.timeToString(file.lastModified());
260
261         return uid;
262     }
263
264     /**
265      * Converts a UID to a URL string.
266      */

267     public static String JavaDoc uid2url(String JavaDoc uid) {
268         String JavaDoc url = uid.replace('\u0000', '/'); // replace nulls with slashes
269
String JavaDoc timeString = uid.substring(uid.lastIndexOf("\u0000") + 1);
270         Date JavaDoc date = DateField.stringToDate(timeString);
271         DateFormat JavaDoc format = new SimpleDateFormat JavaDoc("yyyy.MM.dd HH:mm:ss");
272
273         return url.substring(0, url.lastIndexOf('/')) + " " + format.format(date);
274     }
275
276     /**
277      * Get Files and sorts by alphabet?
278      */

279     public File JavaDoc[] getFiles(File JavaDoc dumpDirectory) {
280         List JavaDoc files = new ArrayList JavaDoc();
281         collectFiles(dumpDirectory, files);
282         Collections.sort(files);
283
284         Map JavaDoc uidToFile = new HashMap JavaDoc();
285
286         String JavaDoc[] uids = new String JavaDoc[files.size()];
287
288         for (int i = 0; i < uids.length; i++) {
289             uids[i] = createUID((File JavaDoc) files.get(i), dumpDirectory);
290             uidToFile.put(uids[i], files.get(i));
291         }
292
293         Arrays.sort(uids);
294
295         File JavaDoc[] fileArray = new File JavaDoc[uids.length];
296
297         for (int i = 0; i < uids.length; i++) {
298             File JavaDoc file = (File JavaDoc) uidToFile.get(uids[i]);
299             log.debug(file);
300             fileArray[i] = file;
301         }
302
303         return fileArray;
304     }
305
306     /**
307      * Collect files
308      */

309     protected void collectFiles(File JavaDoc file, List JavaDoc files) {
310         if (file.isDirectory()) {
311             log.debug("Apply filter " + getFilter().getClass().getName() + " to: " + file);
312             File JavaDoc[] fileArray = file.listFiles(getFilter());
313
314             for (int i = 0; i < fileArray.length; i++) {
315                 collectFiles(fileArray[i], files);
316             }
317         } else {
318             files.add(file);
319         }
320     }
321
322     /**
323      * Traverse directory
324      */

325     protected void traverse(TermEnum iterator, File JavaDoc file, File JavaDoc dumpDirectory) throws IOException JavaDoc {
326         if (file.isDirectory()) {
327             log.debug("Apply filter " + getFilter().getClass().getName() + " to: " + file);
328             File JavaDoc[] fileArray = file.listFiles(getFilter());
329
330             for (int i = 0; i < fileArray.length; i++) {
331                 traverse(iterator, fileArray[i], dumpDirectory);
332             }
333         } else {
334             log.debug(file);
335             iterateFiles(iterator, file, dumpDirectory);
336         }
337     }
338 }
339
Popular Tags