1 package org.apache.lucene; 2 3 18 19 import org.apache.lucene.analysis.SimpleAnalyzer; 20 import org.apache.lucene.analysis.Analyzer; 21 import org.apache.lucene.analysis.TokenStream; 22 import org.apache.lucene.analysis.Token; 23 24 import java.io.Reader ; 25 import java.io.StringReader ; 26 import java.io.File ; 27 import java.io.FileInputStream ; 28 import java.io.BufferedReader ; 29 import java.io.InputStreamReader ; 30 import java.util.Date ; 31 32 class AnalysisTest { 33 public static void main(String [] args) { 34 try { 35 test("This is a test", true); 36 test(new File ("words.txt"), false); 38 } catch (Exception e) { 39 System.out.println(" caught a " + e.getClass() + 40 "\n with message: " + e.getMessage()); 41 } 42 } 43 44 static void test(File file, boolean verbose) 45 throws Exception { 46 long bytes = file.length(); 47 System.out.println(" Reading test file containing " + bytes + " bytes."); 48 49 FileInputStream is = new FileInputStream (file); 50 BufferedReader ir = new BufferedReader (new InputStreamReader (is)); 51 52 test(ir, verbose, bytes); 53 54 ir.close(); 55 } 56 57 static void test(String text, boolean verbose) throws Exception { 58 System.out.println(" Tokenizing string: " + text); 59 test(new StringReader (text), verbose, text.length()); 60 } 61 62 static void test(Reader reader, boolean verbose, long bytes) 63 throws Exception { 64 Analyzer analyzer = new SimpleAnalyzer(); 65 TokenStream stream = analyzer.tokenStream(null, reader); 66 67 Date start = new Date (); 68 69 int count = 0; 70 for (Token t = stream.next(); t!=null; t = stream.next()) { 71 if (verbose) { 72 System.out.println("Text=" + t.termText() 73 + " start=" + t.startOffset() 74 + " end=" + t.endOffset()); 75 } 76 count++; 77 } 78 79 Date end = new Date (); 80 81 long time = end.getTime() - start.getTime(); 82 System.out.println(time + " milliseconds to extract " + count + " tokens"); 83 System.out.println((time*1000.0)/count + " microseconds/token"); 84 System.out.println((bytes * 1000.0 * 60.0 * 60.0)/(time * 1000000.0) 85 + " megabytes/hour"); 86 } 87 } 88 | Popular Tags |