1 2 3 package net.nutch.analysis.lang; 4 5 import java.io.ByteArrayInputStream ; 6 import java.io.ByteArrayOutputStream ; 7 import java.io.InputStream ; 8 import java.util.Iterator ; 9 import java.util.Vector ; 10 11 import junit.framework.TestCase; 12 import org.apache.lucene.analysis.Token; 13 14 public class TestNGramProfile extends TestCase { 15 16 String tokencontent1 = "testaddtoken"; 17 String tokencontent2 = "anotherteststring"; 18 19 int[] counts1 = { 3, 2, 2, 2, 1, 1, 1, 1, 1 }; 20 21 String [] chars1 = { "t", "_", "d", "e", "a", "k", "n", "o", "s" }; 22 23 27 public void testAddToken() { 28 29 NGramProfile p = new NGramProfile("test", 1, 1); 30 31 Token t = new Token(tokencontent1, 0, tokencontent1.length()); 32 p.addFromToken(t); 33 p.normalize(); 34 35 testCounts(p.getSorted(), counts1); 36 testContents(p.getSorted(), chars1); 37 } 38 39 42 public void testAnalyze() { 43 String tokencontent = "testmeagain"; 44 45 NGramProfile p = new NGramProfile("test", 1, 1); 46 p.analyze(new StringBuffer (tokencontent)); 47 48 assertEquals(9, p.getSorted().size()); 50 } 51 52 56 public void testAddNGramsStringBuffer() { 57 String tokencontent = "testmeagain"; 58 59 NGramProfile p = new NGramProfile("test", 1, 1); 60 p.addNGrams(new StringBuffer (tokencontent)); 61 62 assertEquals(8, p.getSorted().size()); 64 65 } 66 67 70 public void testGetSorted() { 71 int[] count = { 4, 3, 2, 1 }; 72 String [] ngram = { "a", "b", "" + NGramProfile.SEPARATOR, "c" }; 73 74 String teststring = "AAaaBbbC"; 75 76 NGramProfile p = new NGramProfile("test", 1, 1); 77 p.analyze(new StringBuffer (teststring)); 78 79 assertEquals(4, p.getSorted().size()); 81 82 testCounts(p.getSorted(), count); 83 testContents(p.getSorted(), ngram); 84 85 } 86 87 public void testGetSimilarity() { 88 NGramProfile a = new NGramProfile("a", 1, 1); 89 NGramProfile b = new NGramProfile("b", 1, 1); 90 91 a.analyze(new StringBuffer (tokencontent1)); 92 b.analyze(new StringBuffer (tokencontent2)); 93 94 assertEquals(a.getSimilarity(b), b.getSimilarity(a), 0.0000001); 96 97 } 98 99 public void testExactMatch() { 100 NGramProfile a = new NGramProfile("a", 1, 1); 101 102 a.analyze(new StringBuffer (tokencontent1)); 103 104 assertEquals(a.getSimilarity(a), 0, 0); 105 106 } 107 108 109 public void testIO() { 110 NGramProfile a = new NGramProfile("a", 1, 1); 112 a.analyze(new StringBuffer (this.tokencontent1)); 113 114 NGramProfile b = new NGramProfile("a_from_inputstream", 1, 1); 115 116 ByteArrayOutputStream os = new ByteArrayOutputStream (); 118 119 try { 120 a.save(os); 121 os.close(); 122 } catch (Exception e) { 123 fail(); 124 } 125 126 InputStream is = new ByteArrayInputStream (os.toByteArray()); 128 try { 129 b.load(is); 130 is.close(); 131 } catch (Exception e) { 132 fail(); 133 } 134 135 testCounts(b.getSorted(), counts1); 137 testContents(b.getSorted(), chars1); 138 } 139 140 private void testContents(Vector entries, String contents[]) { 141 int c = 0; 142 Iterator i = entries.iterator(); 143 144 while (i.hasNext()) { 145 NGramProfile.NGramEntry nge = (NGramProfile.NGramEntry) i.next(); 146 assertEquals(contents[c], nge.getSeq().toString()); 147 c++; 148 } 149 } 150 151 private void testCounts(Vector entries, int counts[]) { 152 int c = 0; 153 Iterator i = entries.iterator(); 154 155 while (i.hasNext()) { 156 NGramProfile.NGramEntry nge = (NGramProfile.NGramEntry) i.next(); 157 assertEquals(counts[c], nge.getCount()); 158 c++; 159 } 160 } 161 } | Popular Tags |