1 4 package org.javabb.lucene.analysis; 5 6 7 import java.io.IOException ; 8 import java.util.regex.Matcher ; 9 import java.util.regex.Pattern ; 10 11 import org.apache.lucene.analysis.Token; 12 import org.apache.lucene.analysis.TokenFilter; 13 import org.apache.lucene.analysis.TokenStream; 14 15 16 24 class SpecialCharFilter extends TokenFilter { 25 26 private static final String [] REPLACES; 27 private static final Pattern [] PATTERNS; 28 29 static { 30 31 REPLACES = new String []{"a", "e", "i", "o", "u", "c"}; 32 33 PATTERNS = new Pattern [REPLACES.length]; 34 35 PATTERNS[0] = Pattern.compile("[âãáàä]", Pattern.CASE_INSENSITIVE); 37 PATTERNS[1] = Pattern.compile("[éèêë]", Pattern.CASE_INSENSITIVE); 38 PATTERNS[2] = Pattern.compile("[íìîï]", Pattern.CASE_INSENSITIVE); 39 PATTERNS[3] = Pattern.compile("[óòôõö]", Pattern.CASE_INSENSITIVE); 40 PATTERNS[4] = Pattern.compile("[úùûü]", Pattern.CASE_INSENSITIVE); 41 PATTERNS[5] = Pattern.compile("ç", Pattern.CASE_INSENSITIVE); 42 43 } 44 45 48 public SpecialCharFilter ( TokenStream in ) { 49 50 super(in); 51 52 } 53 54 57 public Token next() throws IOException { 58 59 Token t = input.next(); 60 61 if (t == null) { 62 63 return null; 64 65 } 66 67 String termText = replaceSpecial(t.termText()); 68 Token token = new Token(termText, t.startOffset(), t.endOffset()); 69 70 return token; 71 72 } 73 74 private String replaceSpecial( String text ) { 75 76 String result = text; 77 78 for (int i = 0; i < PATTERNS.length; i++) { 79 80 Matcher matcher = PATTERNS[i].matcher(result); 81 result = matcher.replaceAll(REPLACES[i]); 82 83 } 84 85 return result; 86 87 } 88 } 89 | Popular Tags |