|                                                                                                              1
 2
 3   package net.nutch.analysis.lang;
 4
 5   import java.io.ByteArrayInputStream
  ; 6   import java.io.ByteArrayOutputStream
  ; 7   import java.io.InputStream
  ; 8   import java.util.Iterator
  ; 9   import java.util.Vector
  ; 10
 11  import junit.framework.TestCase;
 12  import org.apache.lucene.analysis.Token;
 13
 14  public class TestNGramProfile extends TestCase {
 15
 16    String
  tokencontent1 = "testaddtoken"; 17    String
  tokencontent2 = "anotherteststring"; 18
 19    int[] counts1 = { 3, 2, 2, 2, 1, 1, 1, 1, 1 };
 20
 21    String
  [] chars1 = { "t", "_", "d", "e", "a", "k", "n", "o", "s" }; 22
 23
 27    public void testAddToken() {
 28
 29      NGramProfile p = new NGramProfile("test", 1, 1);
 30
 31      Token t = new Token(tokencontent1, 0, tokencontent1.length());
 32      p.addFromToken(t);
 33      p.normalize();
 34
 35      testCounts(p.getSorted(), counts1);
 36      testContents(p.getSorted(), chars1);
 37    }
 38
 39
 42    public void testAnalyze() {
 43      String
  tokencontent = "testmeagain"; 44
 45      NGramProfile p = new NGramProfile("test", 1, 1);
 46      p.analyze(new StringBuffer
  (tokencontent)); 47
 48          assertEquals(9, p.getSorted().size());
 50    }
 51
 52
 56    public void testAddNGramsStringBuffer() {
 57      String
  tokencontent = "testmeagain"; 58
 59      NGramProfile p = new NGramProfile("test", 1, 1);
 60      p.addNGrams(new StringBuffer
  (tokencontent)); 61
 62          assertEquals(8, p.getSorted().size());
 64
 65    }
 66
 67
 70    public void testGetSorted() {
 71      int[] count = { 4, 3, 2, 1 };
 72      String
  [] ngram = { "a", "b", "" + NGramProfile.SEPARATOR, "c" }; 73
 74      String
  teststring = "AAaaBbbC"; 75
 76      NGramProfile p = new NGramProfile("test", 1, 1);
 77      p.analyze(new StringBuffer
  (teststring)); 78
 79          assertEquals(4, p.getSorted().size());
 81
 82      testCounts(p.getSorted(), count);
 83      testContents(p.getSorted(), ngram);
 84
 85    }
 86
 87    public void testGetSimilarity() {
 88      NGramProfile a = new NGramProfile("a", 1, 1);
 89      NGramProfile b = new NGramProfile("b", 1, 1);
 90
 91      a.analyze(new StringBuffer
  (tokencontent1)); 92      b.analyze(new StringBuffer
  (tokencontent2)); 93
 94          assertEquals(a.getSimilarity(b), b.getSimilarity(a), 0.0000001);
 96
 97    }
 98
 99    public void testExactMatch() {
 100     NGramProfile a = new NGramProfile("a", 1, 1);
 101
 102     a.analyze(new StringBuffer
  (tokencontent1)); 103
 104     assertEquals(a.getSimilarity(a), 0, 0);
 105
 106   }
 107
 108
 109   public void testIO() {
 110         NGramProfile a = new NGramProfile("a", 1, 1);
 112     a.analyze(new StringBuffer
  (this.tokencontent1)); 113
 114     NGramProfile b = new NGramProfile("a_from_inputstream", 1, 1);
 115
 116         ByteArrayOutputStream
  os = new ByteArrayOutputStream  (); 118
 119     try {
 120       a.save(os);
 121       os.close();
 122     } catch (Exception
  e) { 123       fail();
 124     }
 125
 126         InputStream
  is = new ByteArrayInputStream  (os.toByteArray()); 128     try {
 129       b.load(is);
 130       is.close();
 131     } catch (Exception
  e) { 132       fail();
 133     }
 134
 135         testCounts(b.getSorted(), counts1);
 137     testContents(b.getSorted(), chars1);
 138   }
 139
 140   private void testContents(Vector
  entries, String  contents[]) { 141     int c = 0;
 142     Iterator
  i = entries.iterator(); 143
 144     while (i.hasNext()) {
 145       NGramProfile.NGramEntry nge = (NGramProfile.NGramEntry) i.next();
 146       assertEquals(contents[c], nge.getSeq().toString());
 147       c++;
 148     }
 149   }
 150
 151   private void testCounts(Vector
  entries, int counts[]) { 152     int c = 0;
 153     Iterator
  i = entries.iterator(); 154
 155     while (i.hasNext()) {
 156       NGramProfile.NGramEntry nge = (NGramProfile.NGramEntry) i.next();
 157       assertEquals(counts[c], nge.getCount());
 158       c++;
 159     }
 160   }
 161 }
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |