KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > analysis > lang > TestNGramProfile


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3 package net.nutch.analysis.lang;
4
5 import java.io.ByteArrayInputStream JavaDoc;
6 import java.io.ByteArrayOutputStream JavaDoc;
7 import java.io.InputStream JavaDoc;
8 import java.util.Iterator JavaDoc;
9 import java.util.Vector JavaDoc;
10
11 import junit.framework.TestCase;
12 import org.apache.lucene.analysis.Token;
13
14 public class TestNGramProfile extends TestCase {
15
16   String JavaDoc tokencontent1 = "testaddtoken";
17   String JavaDoc tokencontent2 = "anotherteststring";
18
19   int[] counts1 = { 3, 2, 2, 2, 1, 1, 1, 1, 1 };
20
21   String JavaDoc[] chars1 = { "t", "_", "d", "e", "a", "k", "n", "o", "s" };
22
23   /**
24    * Test addFromToken method
25    *
26    */

27   public void testAddToken() {
28
29     NGramProfile p = new NGramProfile("test", 1, 1);
30
31     Token t = new Token(tokencontent1, 0, tokencontent1.length());
32     p.addFromToken(t);
33     p.normalize();
34     
35     testCounts(p.getSorted(), counts1);
36     testContents(p.getSorted(), chars1);
37   }
38
39   /**
40    * Test analyze method
41    */

42   public void testAnalyze() {
43     String JavaDoc tokencontent = "testmeagain";
44
45     NGramProfile p = new NGramProfile("test", 1, 1);
46     p.analyze(new StringBuffer JavaDoc(tokencontent));
47
48     //test that profile size is ok, eg 9 different NGramEntries "_tesmagin"
49
assertEquals(9, p.getSorted().size());
50   }
51
52   /**
53    * Test addNGrams method with StringBuffer argument
54    *
55    */

56   public void testAddNGramsStringBuffer() {
57     String JavaDoc tokencontent = "testmeagain";
58
59     NGramProfile p = new NGramProfile("test", 1, 1);
60     p.addNGrams(new StringBuffer JavaDoc(tokencontent));
61
62     //test that profile size is ok, eg 8 different NGramEntries "tesmagin"
63
assertEquals(8, p.getSorted().size());
64
65   }
66
67   /**
68    * test getSorted method
69    */

70   public void testGetSorted() {
71     int[] count = { 4, 3, 2, 1 };
72     String JavaDoc[] ngram = { "a", "b", "" + NGramProfile.SEPARATOR, "c" };
73
74     String JavaDoc teststring = "AAaaBbbC";
75
76     NGramProfile p = new NGramProfile("test", 1, 1);
77     p.analyze(new StringBuffer JavaDoc(teststring));
78
79     //test size of profile
80
assertEquals(4, p.getSorted().size());
81
82     testCounts(p.getSorted(), count);
83     testContents(p.getSorted(), ngram);
84
85   }
86
87   public void testGetSimilarity() {
88     NGramProfile a = new NGramProfile("a", 1, 1);
89     NGramProfile b = new NGramProfile("b", 1, 1);
90     
91     a.analyze(new StringBuffer JavaDoc(tokencontent1));
92     b.analyze(new StringBuffer JavaDoc(tokencontent2));
93
94     //because of rounding errors might slightly return different results
95
assertEquals(a.getSimilarity(b), b.getSimilarity(a), 0.0000001);
96
97   }
98
99   public void testExactMatch() {
100     NGramProfile a = new NGramProfile("a", 1, 1);
101     
102     a.analyze(new StringBuffer JavaDoc(tokencontent1));
103
104     assertEquals(a.getSimilarity(a), 0, 0);
105
106   }
107
108   
109   public void testIO() {
110     //Create profile and set some contents
111
NGramProfile a = new NGramProfile("a", 1, 1);
112     a.analyze(new StringBuffer JavaDoc(this.tokencontent1));
113
114     NGramProfile b = new NGramProfile("a_from_inputstream", 1, 1);
115
116     //save profile
117
ByteArrayOutputStream JavaDoc os = new ByteArrayOutputStream JavaDoc();
118
119     try {
120       a.save(os);
121       os.close();
122     } catch (Exception JavaDoc e) {
123       fail();
124     }
125
126     //load profile
127
InputStream JavaDoc is = new ByteArrayInputStream JavaDoc(os.toByteArray());
128     try {
129       b.load(is);
130       is.close();
131     } catch (Exception JavaDoc e) {
132       fail();
133     }
134
135     //check it
136
testCounts(b.getSorted(), counts1);
137     testContents(b.getSorted(), chars1);
138   }
139
140   private void testContents(Vector JavaDoc entries, String JavaDoc contents[]) {
141     int c = 0;
142     Iterator JavaDoc i = entries.iterator();
143
144     while (i.hasNext()) {
145       NGramProfile.NGramEntry nge = (NGramProfile.NGramEntry) i.next();
146       assertEquals(contents[c], nge.getSeq().toString());
147       c++;
148     }
149   }
150
151   private void testCounts(Vector JavaDoc entries, int counts[]) {
152     int c = 0;
153     Iterator JavaDoc i = entries.iterator();
154
155     while (i.hasNext()) {
156       NGramProfile.NGramEntry nge = (NGramProfile.NGramEntry) i.next();
157       assertEquals(counts[c], nge.getCount());
158       c++;
159     }
160   }
161 }
Popular Tags