KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > SnowMailClient > SpamFilter > Word


1 package SnowMailClient.SpamFilter;
2
3 import snow.utils.storage.*;
4 import java.util.*;
5
6 /**
7     1) during the train operation, the occurences are counted
8     2) calculateProbs is called
9 */

10 public final class Word implements Comparable JavaDoc<Word>
11 {
12   public String JavaDoc word;
13
14   private int occurencesInHAM = 0;
15   private int occurencesInSPAM = 0;
16   private double spamProb =-1;
17
18
19   public Word(String JavaDoc w)
20   {
21     this.word = w;
22   }
23
24   public Word(String JavaDoc w, int occurencesInHAM, int occurencesInSPAM)
25   {
26     this.word = w;
27     this.occurencesInHAM = occurencesInHAM;
28     this.occurencesInSPAM = occurencesInSPAM;
29   }
30
31   public double getSpamProb() { return spamProb; }
32   // called in some special cases
33
public void setSpamProb(double p) { spamProb = p; }
34   public void addSpamOccurence() { occurencesInSPAM++; }
35   public void addHamOccurence() { occurencesInHAM++; }
36
37   public int getHamOccurences() { return occurencesInHAM; }
38   public int getSpamOccurences() { return occurencesInSPAM; }
39
40
41   /** @param totHams the total number of ham messages
42   */

43   public void calculateProbs(double totHams, double totSpams)
44   {
45     if(occurencesInHAM==0)
46     {
47       if(occurencesInSPAM<10)
48       {
49         spamProb = 0.9998;
50       }
51       else
52       {
53         spamProb = 0.9999;
54       }
55     }
56     else if(occurencesInSPAM==0)
57     {
58       if(occurencesInHAM<10)
59       {
60         spamProb = 0.0003;
61       }
62       else
63       {
64         spamProb = 0.0002;
65       }
66     }
67     else
68     {
69       // not zero occurences
70
// count ham two times
71
spamProb = (double) occurencesInSPAM / totSpams /
72          ( (double) occurencesInHAM*2.0 / totHams + (double) occurencesInSPAM / totSpams);
73     }
74   }
75
76   // search
77
//
78

79   public boolean equalsIgnoreCase(String JavaDoc w)
80   {
81     return word.equalsIgnoreCase(w);
82   }
83
84   public boolean equals(Object JavaDoc o)
85   {
86     if(!(o instanceof Word)) throw new RuntimeException JavaDoc("Bad class "+o.getClass());
87     Word w2 = (Word) o;
88     return word.equals(w2.word);
89   }
90
91   public int hashCode()
92   {
93     return word.hashCode();
94   }
95
96   /** in decreasing spam prob
97   */

98   public int compareTo(Word w2)
99   {
100
101     if( this.spamProb > w2.spamProb) return -1;
102     if( this.spamProb < w2.spamProb) return 1;
103
104     // same probability => compare words
105
return this.word.compareTo(w2.word);
106   }
107
108   public String JavaDoc toString()
109   {
110      return word+" ("+occurencesInHAM+" / "+occurencesInSPAM+" / "+this.spamProb+")";
111   }
112
113
114   static class RelevanceComparator implements Comparator<Word>
115   {
116     public int compare(Word w1, Word w2)
117     {
118       double d1 = Math.abs( w1.getSpamProb() - 0.5);
119       double d2 = Math.abs( w2.getSpamProb() - 0.5);
120
121       if(d1>d2) return -1;
122       if(d1<d2) return 1;
123
124       // d1==d2
125
// important, to be able to suppress multiple occurences of the same word later
126
return w1.word.compareTo(w2.word);
127     }
128
129   }
130
131 } // Word
Popular Tags