KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > ru > TestRussianStem


1 package org.apache.lucene.analysis.ru;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import junit.framework.TestCase;
20
21 import java.io.BufferedReader JavaDoc;
22 import java.io.File JavaDoc;
23 import java.io.InputStreamReader JavaDoc;
24 import java.io.FileInputStream JavaDoc;
25 import java.util.ArrayList JavaDoc;
26
27 public class TestRussianStem extends TestCase
28 {
29     private ArrayList JavaDoc words = new ArrayList JavaDoc();
30     private ArrayList JavaDoc stems = new ArrayList JavaDoc();
31
32     public TestRussianStem(String JavaDoc name)
33     {
34         super(name);
35     }
36
37     /**
38      * @see TestCase#setUp()
39      */

40     protected void setUp() throws Exception JavaDoc
41     {
42         super.setUp();
43         //System.out.println(new java.util.Date());
44
String JavaDoc str;
45         
46         File JavaDoc dataDir = new File JavaDoc(System.getProperty("dataDir"));
47
48         // open and read words into an array list
49
BufferedReader JavaDoc inWords =
50             new BufferedReader JavaDoc(
51                 new InputStreamReader JavaDoc(
52                     new FileInputStream JavaDoc(new File JavaDoc(dataDir, "/org/apache/lucene/analysis/ru/wordsUnicode.txt")),
53                     "Unicode"));
54         while ((str = inWords.readLine()) != null)
55         {
56             words.add(str);
57         }
58         inWords.close();
59
60         // open and read stems into an array list
61
BufferedReader JavaDoc inStems =
62             new BufferedReader JavaDoc(
63                 new InputStreamReader JavaDoc(
64                     new FileInputStream JavaDoc(new File JavaDoc(dataDir, "/org/apache/lucene/analysis/ru/stemsUnicode.txt")),
65                     "Unicode"));
66         while ((str = inStems.readLine()) != null)
67         {
68             stems.add(str);
69         }
70         inStems.close();
71     }
72
73     /**
74      * @see TestCase#tearDown()
75      */

76     protected void tearDown() throws Exception JavaDoc
77     {
78         super.tearDown();
79     }
80
81     public void testStem()
82     {
83         for (int i = 0; i < words.size(); i++)
84         {
85             //if ( (i % 100) == 0 ) System.err.println(i);
86
String JavaDoc realStem =
87                 RussianStemmer.stem(
88                     (String JavaDoc) words.get(i),
89                     RussianCharsets.UnicodeRussian);
90             assertEquals("unicode", stems.get(i), realStem);
91         }
92     }
93
94     private String JavaDoc printChars(String JavaDoc output)
95     {
96         StringBuffer JavaDoc s = new StringBuffer JavaDoc();
97         for (int i = 0; i < output.length(); i++)
98             {
99             s.append(output.charAt(i));
100         }
101         return s.toString();
102     }
103 }
104
Popular Tags