1 package SnowMailClient.SpamFilter; 2 3 import java.util.*; 4 5 10 public final class WordTokenizer implements Iterator 11 { 12 int pos = 0; 13 String text; 14 15 public WordTokenizer(String text) 16 { 17 this.text = text; 18 avanceToNextWordStart(); 19 } 21 public boolean hasMoreTokens() 22 { 23 return pos != text.length(); 24 } 25 26 public Object next() { return nextToken(); } 29 public boolean hasNext() { return hasMoreTokens(); } 30 public void remove() { throw new RuntimeException ("Not supported"); } 31 32 public String nextToken() 33 { 34 StringBuffer token = new StringBuffer (); 35 for(; pos<text.length(); pos++) 36 { 37 char chi = text.charAt(pos); 38 if(!isWordChar(chi)) 40 { 41 if(chi=='.' || chi==',') 42 { 43 char chi1 = text.charAt(pos-1); 47 if(pos+1==text.length()) break; 48 char chi2 = text.charAt(pos+1); 49 50 if(!Character.isDigit(chi1) 51 || !Character.isDigit(chi2)) break; 52 } 53 else 54 { 55 break; 56 } 57 } 58 59 token.append(chi); 60 } 61 avanceToNextWordStart(); 62 return token.toString(); 63 } 64 65 67 private void avanceToNextWordStart() 68 { 69 for(; pos<text.length(); pos++) 70 { 71 char chi = text.charAt(pos); 72 if(isWordChar(chi)) return; 73 } 74 } 75 76 private boolean isWordChar(char c) 77 { 78 if(c>='a' && c<='z') return true; 79 if(c>='A' && c<='Z') return true; 80 if(c>='0' && c<='9') return true; 81 if(c=='-' || c=='_' || c=='!') return true; 82 83 return false; 84 } 85 86 public static String [] extractWords(String text) 87 { 88 WordTokenizer wt = new WordTokenizer(text); 89 Vector<String > words = new Vector<String >(); 90 while(wt.hasMoreTokens()) 91 { 92 String w = wt.nextToken(); 93 if(w.length()!=0) 94 { 95 words.add(w); 98 } 99 else 100 { 101 System.out.println("################ Problem ?"); 102 } 103 } 104 return words.toArray(new String [words.size()]); 105 } 106 107 108 111 public static void main( String [] arguments ) 112 { 113 WordTokenizer wt = new WordTokenizer(" Hello2-t!!! th-is is teXtz 44.and another,a.b <html>Shit<pre></pre>" 114 +" http://62.65.146.182.is http://www.aaa.bbb a cool IP 34,23$ is my price</html>"); 115 while(wt.hasMoreTokens()) 116 { 117 System.out.println(""+wt.nextToken()); 118 } 119 120 double[] t = new double[]{1,2,3}; 121 test(t); 122 System.out.println(t[0]); 123 } 125 126 private static void test(double[] a) 127 { 128 a[0] = 12; 129 } 130 131 } | Popular Tags |