1 17 18 19 20 package org.apache.lenya.lucene.index; 21 22 import java.io.File ; 23 import java.io.FileFilter ; 24 import java.io.IOException ; 25 import java.text.DateFormat ; 26 import java.text.SimpleDateFormat ; 27 import java.util.ArrayList ; 28 import java.util.Arrays ; 29 import java.util.Collections ; 30 import java.util.Date ; 31 import java.util.HashMap ; 32 import java.util.Iterator ; 33 import java.util.List ; 34 import java.util.Map ; 35 36 import org.apache.log4j.Category; 37 import org.apache.lucene.document.DateField; 38 import org.apache.lucene.index.IndexReader; 39 import org.apache.lucene.index.Term; 40 import org.apache.lucene.index.TermEnum; 41 42 public class IndexIterator { 43 44 private static Category log = Category.getInstance(IndexIterator.class); 45 46 49 public IndexIterator(String index, FileFilter filter) { 50 this.filter = filter; 51 this.index = index; 52 } 53 54 private String index; 55 56 protected String getIndex() { 57 return index; 58 } 59 60 private FileFilter filter; 61 62 65 protected FileFilter getFilter() { 66 return filter; 67 } 68 69 private List handlers = new ArrayList (); 70 71 76 public void addHandler(IndexIteratorHandler handler) { 77 if (!handlers.contains(handler)) { 78 handlers.add(handler); 79 } 80 } 81 82 protected void handleFile(File file) { 83 for (Iterator i = handlers.iterator(); i.hasNext();) { 84 IndexIteratorHandler handler = (IndexIteratorHandler) i.next(); 85 handler.handleFile(getReader(), file); 86 } 87 } 88 89 protected void handleStaleDocument(Term term) { 90 for (Iterator i = handlers.iterator(); i.hasNext();) { 91 IndexIteratorHandler handler = (IndexIteratorHandler) i.next(); 92 handler.handleStaleDocument(getReader(), term); 93 } 94 } 95 96 protected void handleUnmodifiedDocument(Term term, File file) { 97 for (Iterator i = handlers.iterator(); i.hasNext();) { 98 IndexIteratorHandler handler = (IndexIteratorHandler) i.next(); 99 handler.handleUnmodifiedDocument(getReader(), term, file); 100 } 101 } 102 103 protected void handleNewDocument(Term term, File file) { 104 for (Iterator i = handlers.iterator(); i.hasNext();) { 105 IndexIteratorHandler handler = (IndexIteratorHandler) i.next(); 106 handler.handleNewDocument(getReader(), term, file); 107 } 108 } 109 110 private IndexReader reader; 111 112 protected IndexReader getReader() { 113 return reader; 114 } 115 116 121 public void iterate(File dumpDirectory) { 122 log.info("Iterating files (" + dumpDirectory + ")"); 123 124 try { 125 reader = IndexReader.open(getIndex()); 126 127 TermEnum iterator = enumerateUIDs(getReader()); 128 129 boolean sort = false; 131 132 if (sort) { 133 File [] files = getFiles(dumpDirectory); 134 135 for (int i = 0; i < files.length; i++) { 136 iterateFiles(iterator, files[i], dumpDirectory); 137 } 138 } else { 139 log.debug("Do not sort files ..."); 140 traverse(iterator, dumpDirectory, dumpDirectory); 141 } 142 143 while ((iterator.term() != null) && iterator.term().field().equals("uid")) { 145 handleStaleDocument(iterator.term()); 146 iterator.next(); 147 } 148 149 iterator.close(); 150 reader.close(); 151 } catch (IOException e) { 152 log.error(e); 153 } 154 } 155 156 159 protected void iterateFiles(TermEnum iterator, File file, File dumpDirectory) 160 throws IOException { 161 String uid = createUID(file, dumpDirectory); 162 log.debug("-----------------------------------------------------"); 163 log.debug("[file] file uid: " + uid2url(uid)); 164 165 handleFile(file); 166 167 while (isStale(iterator.term(), uid)) { 169 log.debug("[stale] term uid: " + uid2url(iterator.term().text())); 170 handleStaleDocument(iterator.term()); 171 iterator.next(); 172 } 173 174 if (hasEqualUID(iterator.term(), uid)) { 176 log.debug("[unmod] term uid: " + uid2url(iterator.term().text())); 177 handleUnmodifiedDocument(iterator.term(), file); 178 iterator.next(); 179 } 180 else { 182 if (iterator.term() != null) { 183 log.debug("[new] term uid: " + uid2url(iterator.term().text())); 184 handleNewDocument(iterator.term(), file); 185 } 186 } 187 } 188 189 192 protected TermEnum enumerateUIDs(IndexReader reader) { 193 TermEnum tEnum = null; 194 195 try { 196 tEnum = reader.terms(new Term("uid", "")); 197 } catch (IOException e) { 198 log.error("Term enumeration failed: ", e); 199 } 200 201 return tEnum; 202 } 203 204 207 protected static boolean isUIDTerm(Term term) { 208 return (term != null) && term.field().equals("uid"); 209 } 210 211 215 protected static boolean isStale(Term term, String uid) { 216 return isUIDTerm(term) && (term.text().compareTo(uid) < 0); 217 } 218 219 223 protected static boolean hasEqualUID(Term term, String uid) { 224 return isUIDTerm(term) && term.text().equals(uid); 225 } 226 227 235 public static String createID(File file, File dumpDir) { 236 if (dumpDir.getPath().length() <= file.getPath().length()) { 237 String id = file.getPath().substring(dumpDir.getPath().length()); 238 return id; 240 } else { 241 log.warn("Length of dumping directory is less than length of file name! Absolute path is being returned as id."); 242 return file.getAbsolutePath(); 243 } 244 } 245 246 256 public static String createUID(File file, File htdocsDumpDir) { 257 String requestURI = file.getPath().substring(htdocsDumpDir.getPath().length()); 258 String uid = requestURI.replace(File.separatorChar, '\u0000') + "\u0000" + 259 DateField.timeToString(file.lastModified()); 260 261 return uid; 262 } 263 264 267 public static String uid2url(String uid) { 268 String url = uid.replace('\u0000', '/'); String timeString = uid.substring(uid.lastIndexOf("\u0000") + 1); 270 Date date = DateField.stringToDate(timeString); 271 DateFormat format = new SimpleDateFormat ("yyyy.MM.dd HH:mm:ss"); 272 273 return url.substring(0, url.lastIndexOf('/')) + " " + format.format(date); 274 } 275 276 279 public File [] getFiles(File dumpDirectory) { 280 List files = new ArrayList (); 281 collectFiles(dumpDirectory, files); 282 Collections.sort(files); 283 284 Map uidToFile = new HashMap (); 285 286 String [] uids = new String [files.size()]; 287 288 for (int i = 0; i < uids.length; i++) { 289 uids[i] = createUID((File ) files.get(i), dumpDirectory); 290 uidToFile.put(uids[i], files.get(i)); 291 } 292 293 Arrays.sort(uids); 294 295 File [] fileArray = new File [uids.length]; 296 297 for (int i = 0; i < uids.length; i++) { 298 File file = (File ) uidToFile.get(uids[i]); 299 log.debug(file); 300 fileArray[i] = file; 301 } 302 303 return fileArray; 304 } 305 306 309 protected void collectFiles(File file, List files) { 310 if (file.isDirectory()) { 311 log.debug("Apply filter " + getFilter().getClass().getName() + " to: " + file); 312 File [] fileArray = file.listFiles(getFilter()); 313 314 for (int i = 0; i < fileArray.length; i++) { 315 collectFiles(fileArray[i], files); 316 } 317 } else { 318 files.add(file); 319 } 320 } 321 322 325 protected void traverse(TermEnum iterator, File file, File dumpDirectory) throws IOException { 326 if (file.isDirectory()) { 327 log.debug("Apply filter " + getFilter().getClass().getName() + " to: " + file); 328 File [] fileArray = file.listFiles(getFilter()); 329 330 for (int i = 0; i < fileArray.length; i++) { 331 traverse(iterator, fileArray[i], dumpDirectory); 332 } 333 } else { 334 log.debug(file); 335 iterateFiles(iterator, file, dumpDirectory); 336 } 337 } 338 } 339 | Popular Tags |