KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > contineo > core > text > lili > LanguageIdentifier


1 /*
2  * Created on 30.10.2004
3  */

4 package org.contineo.core.text.lili;
5
6
7 /**
8  * @author Michael Scholz
9  * @version 1.0
10  */

11 public class LanguageIdentifier {
12     
13     private long germanValue;
14     private long englishValue;
15     private long frenchValue;
16     
17     public LanguageIdentifier() {
18         germanValue = 0;
19         englishValue = 0;
20         frenchValue = 0;
21     }
22     
23     /**
24      * Identifies the language of a given text. This method can identify english,
25      * french and german.
26      * @param text
27      * @return Language code (de, en, fr)
28      */

29     public String JavaDoc identify(String JavaDoc text) {
30         Weighter deWeighter = new GermanWeighter();
31         Weighter enWeighter = new EnglishWeighter();
32         Weighter frWeighter = new FrenchWeighter();
33         char cbuf[] = text.toLowerCase().toCharArray();
34         for (int i = 2; i<text.length(); i++) {
35             StringBuffer JavaDoc trigram = new StringBuffer JavaDoc();
36             trigram.append(cbuf[i-2]);
37             trigram.append(cbuf[i-1]);
38             trigram.append(cbuf[i]);
39             germanValue += deWeighter.getWeight(trigram.toString().toLowerCase());
40             englishValue += enWeighter.getWeight(trigram.toString().toLowerCase());
41             frenchValue += frWeighter.getWeight(trigram.toString().toLowerCase());
42         }
43         if (germanValue > englishValue && germanValue > frenchValue)
44             return "de";
45         else
46             if (frenchValue > englishValue && frenchValue > germanValue)
47                 return "fr";
48             else
49                 return "en";
50     }
51 }
52
Popular Tags