KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > AnalysisTest


1 package org.apache.lucene;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.analysis.SimpleAnalyzer;
20 import org.apache.lucene.analysis.Analyzer;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.Token;
23
24 import java.io.Reader JavaDoc;
25 import java.io.StringReader JavaDoc;
26 import java.io.File JavaDoc;
27 import java.io.FileInputStream JavaDoc;
28 import java.io.BufferedReader JavaDoc;
29 import java.io.InputStreamReader JavaDoc;
30 import java.util.Date JavaDoc;
31
32 class AnalysisTest {
33   public static void main(String JavaDoc[] args) {
34     try {
35       test("This is a test", true);
36       // FIXME: OG: what's with this hard-coded file name??
37
test(new File JavaDoc("words.txt"), false);
38     } catch (Exception JavaDoc e) {
39       System.out.println(" caught a " + e.getClass() +
40              "\n with message: " + e.getMessage());
41     }
42   }
43
44   static void test(File JavaDoc file, boolean verbose)
45        throws Exception JavaDoc {
46     long bytes = file.length();
47     System.out.println(" Reading test file containing " + bytes + " bytes.");
48
49     FileInputStream JavaDoc is = new FileInputStream JavaDoc(file);
50     BufferedReader JavaDoc ir = new BufferedReader JavaDoc(new InputStreamReader JavaDoc(is));
51     
52     test(ir, verbose, bytes);
53
54     ir.close();
55   }
56
57   static void test(String JavaDoc text, boolean verbose) throws Exception JavaDoc {
58     System.out.println(" Tokenizing string: " + text);
59     test(new StringReader JavaDoc(text), verbose, text.length());
60   }
61
62   static void test(Reader JavaDoc reader, boolean verbose, long bytes)
63        throws Exception JavaDoc {
64     Analyzer analyzer = new SimpleAnalyzer();
65     TokenStream stream = analyzer.tokenStream(null, reader);
66
67     Date JavaDoc start = new Date JavaDoc();
68
69     int count = 0;
70     for (Token t = stream.next(); t!=null; t = stream.next()) {
71       if (verbose) {
72     System.out.println("Text=" + t.termText()
73                + " start=" + t.startOffset()
74                + " end=" + t.endOffset());
75       }
76       count++;
77     }
78
79     Date JavaDoc end = new Date JavaDoc();
80
81     long time = end.getTime() - start.getTime();
82     System.out.println(time + " milliseconds to extract " + count + " tokens");
83     System.out.println((time*1000.0)/count + " microseconds/token");
84     System.out.println((bytes * 1000.0 * 60.0 * 60.0)/(time * 1000000.0)
85                + " megabytes/hour");
86   }
87 }
88
Popular Tags