1 package org.apache.lucene.demo; 2 3 18 19 import java.io.*; 20 import org.apache.lucene.document.*; 21 import org.apache.lucene.demo.html.HTMLParser; 22 23 24 25 public class HTMLDocument { 26 static char dirSep = System.getProperty("file.separator").charAt(0); 27 28 public static String uid(File f) { 29 return f.getPath().replace(dirSep, '\u0000') + 34 "\u0000" + 35 DateField.timeToString(f.lastModified()); 36 } 37 38 public static String uid2url(String uid) { 39 String url = uid.replace('\u0000', '/'); return url.substring(0, url.lastIndexOf('/')); } 42 43 public static Document Document(File f) 44 throws IOException, InterruptedException { 45 Document doc = new Document(); 47 48 doc.add(Field.UnIndexed("url", f.getPath().replace(dirSep, '/'))); 51 52 doc.add(Field.Keyword("modified", 56 DateField.timeToString(f.lastModified()))); 57 58 doc.add(new Field("uid", uid(f), false, true, false)); 62 63 HTMLParser parser = new HTMLParser(f); 64 65 doc.add(Field.Text("contents", parser.getReader())); 68 69 doc.add(Field.UnIndexed("summary", parser.getSummary())); 72 73 doc.add(Field.Text("title", parser.getTitle())); 76 77 return doc; 79 } 80 81 private HTMLDocument() {} 82 } 83 84 | Popular Tags |