KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > ru > TestRussianAnalyzer


1 package org.apache.lucene.analysis.ru;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import junit.framework.TestCase;
20
21 import java.io.*;
22
23 import org.apache.lucene.analysis.TokenStream;
24 import org.apache.lucene.analysis.Token;
25
26 /**
27  * Test case for RussianAnalyzer.
28  *
29  * @author Boris Okner
30  * @version $Id: TestRussianAnalyzer.java,v 1.6 2004/03/29 22:48:06 cutting Exp $
31  */

32
33 public class TestRussianAnalyzer extends TestCase
34 {
35     private InputStreamReader inWords;
36
37     private InputStreamReader sampleUnicode;
38
39     private Reader inWordsKOI8;
40
41     private Reader sampleKOI8;
42
43     private Reader inWords1251;
44
45     private Reader sample1251;
46
47     private File dataDir;
48
49     protected void setUp() throws Exception JavaDoc
50     {
51       dataDir = new File(System.getProperty("dataDir"));
52     }
53
54     public void testUnicode() throws IOException
55     {
56         RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.UnicodeRussian);
57         inWords =
58             new InputStreamReader(
59                 new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUnicode.txt")),
60                 "Unicode");
61
62         sampleUnicode =
63             new InputStreamReader(
64                 new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resUnicode.htm")),
65                 "Unicode");
66
67         TokenStream in = ra.tokenStream("all", inWords);
68
69         RussianLetterTokenizer sample =
70             new RussianLetterTokenizer(
71                 sampleUnicode,
72                 RussianCharsets.UnicodeRussian);
73
74         for (;;)
75         {
76             Token token = in.next();
77
78             if (token == null)
79             {
80                 break;
81             }
82
83             Token sampleToken = sample.next();
84             assertEquals(
85                 "Unicode",
86                 token.termText(),
87                 sampleToken == null
88                 ? null
89                 : sampleToken.termText());
90         }
91
92         inWords.close();
93         sampleUnicode.close();
94     }
95
96     public void testKOI8() throws IOException
97     {
98         //System.out.println(new java.util.Date());
99
RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.KOI8);
100         // KOI8
101
inWordsKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testKOI8.txt")), "iso-8859-1");
102
103         sampleKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resKOI8.htm")), "iso-8859-1");
104
105         TokenStream in = ra.tokenStream("all", inWordsKOI8);
106         RussianLetterTokenizer sample =
107             new RussianLetterTokenizer(
108                 sampleKOI8,
109                 RussianCharsets.KOI8);
110
111         for (;;)
112         {
113             Token token = in.next();
114
115             if (token == null)
116             {
117                 break;
118             }
119
120             Token sampleToken = sample.next();
121             assertEquals(
122                 "KOI8",
123                 token.termText(),
124                 sampleToken == null
125                 ? null
126                 : sampleToken.termText());
127
128         }
129
130         inWordsKOI8.close();
131         sampleKOI8.close();
132     }
133
134     public void test1251() throws IOException
135     {
136         // 1251
137
inWords1251 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/test1251.txt")), "iso-8859-1");
138
139         sample1251 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/res1251.htm")), "iso-8859-1");
140
141         RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.CP1251);
142         TokenStream in = ra.tokenStream("", inWords1251);
143         RussianLetterTokenizer sample =
144             new RussianLetterTokenizer(
145                 sample1251,
146                 RussianCharsets.CP1251);
147
148         for (;;)
149         {
150             Token token = in.next();
151
152             if (token == null)
153             {
154                 break;
155             }
156
157             Token sampleToken = sample.next();
158             assertEquals(
159                 "1251",
160                 token.termText(),
161                 sampleToken == null
162                 ? null
163                 : sampleToken.termText());
164
165         }
166
167         inWords1251.close();
168         sample1251.close();
169     }
170 }
171
Popular Tags