1 package org.apache.lucene.analysis.br; 2 3 18 19 import org.apache.lucene.analysis.Analyzer; 20 import org.apache.lucene.analysis.LowerCaseFilter; 21 import org.apache.lucene.analysis.StopFilter; 22 import org.apache.lucene.analysis.TokenStream; 23 import org.apache.lucene.analysis.WordlistLoader; 24 import org.apache.lucene.analysis.standard.StandardFilter; 25 import org.apache.lucene.analysis.standard.StandardTokenizer; 26 import java.io.File ; 27 import java.io.IOException ; 28 import java.io.Reader ; 29 import java.util.Hashtable ; 30 import java.util.HashSet ; 31 import java.util.Set ; 32 33 40 public final class BrazilianAnalyzer extends Analyzer { 41 42 45 public final static String [] BRAZILIAN_STOP_WORDS = { 46 "a","ainda","alem","ambas","ambos","antes", 47 "ao","aonde","aos","apos","aquele","aqueles", 48 "as","assim","com","como","contra","contudo", 49 "cuja","cujas","cujo","cujos","da","das","de", 50 "dela","dele","deles","demais","depois","desde", 51 "desta","deste","dispoe","dispoem","diversa", 52 "diversas","diversos","do","dos","durante","e", 53 "ela","elas","ele","eles","em","entao","entre", 54 "essa","essas","esse","esses","esta","estas", 55 "este","estes","ha","isso","isto","logo","mais", 56 "mas","mediante","menos","mesma","mesmas","mesmo", 57 "mesmos","na","nas","nao","nas","nem","nesse","neste", 58 "nos","o","os","ou","outra","outras","outro","outros", 59 "pelas","pelas","pelo","pelos","perante","pois","por", 60 "porque","portanto","proprio","propios","quais","qual", 61 "qualquer","quando","quanto","que","quem","quer","se", 62 "seja","sem","sendo","seu","seus","sob","sobre","sua", 63 "suas","tal","tambem","teu","teus","toda","todas","todo", 64 "todos","tua","tuas","tudo","um","uma","umas","uns"}; 65 66 67 70 private Set stoptable = new HashSet (); 71 72 75 private Set excltable = new HashSet (); 76 77 80 public BrazilianAnalyzer() { 81 stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS ); 82 } 83 84 87 public BrazilianAnalyzer( String [] stopwords ) { 88 stoptable = StopFilter.makeStopSet( stopwords ); 89 } 90 91 94 public BrazilianAnalyzer( Hashtable stopwords ) { 95 stoptable = new HashSet (stopwords.keySet()); 96 } 97 98 101 public BrazilianAnalyzer( File stopwords ) throws IOException { 102 stoptable = WordlistLoader.getWordSet( stopwords ); 103 } 104 105 108 public void setStemExclusionTable( String [] exclusionlist ) { 109 excltable = StopFilter.makeStopSet( exclusionlist ); 110 } 111 114 public void setStemExclusionTable( Hashtable exclusionlist ) { 115 excltable = new HashSet (exclusionlist.keySet()); 116 } 117 120 public void setStemExclusionTable( File exclusionlist ) throws IOException { 121 excltable = WordlistLoader.getWordSet( exclusionlist ); 122 } 123 124 130 public final TokenStream tokenStream(String fieldName, Reader reader) { 131 TokenStream result = new StandardTokenizer( reader ); 132 result = new StandardFilter( result ); 133 result = new StopFilter( result, stoptable ); 134 result = new BrazilianStemFilter( result, excltable ); 135 result = new LowerCaseFilter( result ); 137 return result; 138 } 139 } 140 141 | Popular Tags |