1 6 7 package org.contineo.core.text.analyze.test; 8 9 import java.io.BufferedInputStream ; 10 import java.io.File ; 11 import java.io.FileInputStream ; 12 import java.util.Collection ; 13 import java.util.Iterator ; 14 15 import org.contineo.core.text.analyze.Analyzer; 16 import org.contineo.core.text.analyze.Entry; 17 import org.contineo.core.text.analyze.de.GermanAnalyzer; 18 22 public class AnalyzeFile { 23 24 25 public AnalyzeFile() { 26 } 27 28 31 public static void main(String [] args) { 32 if (args.length != 2) { 33 System.out.println("You must specify a language ('de' or 'en') and the absolute path of the file."); 34 } else { 35 try { 36 String filename = args[1]; 37 Analyzer analyzer = new GermanAnalyzer(4); 39 File file = new File (filename); 40 BufferedInputStream bis = new BufferedInputStream (new FileInputStream (file)); 41 StringBuffer content = new StringBuffer (); 42 int ichar = 0; 43 while ((ichar = bis.read()) > 0) { 44 content.append((char)ichar); 45 } 46 analyzer.analyze(content.toString()); 47 Collection coll = analyzer.getTopWords(10); 48 long count = analyzer.getWordCount(); 49 int relevant = analyzer.relevantWords(); 50 System.out.println("words: " + count); 51 System.out.println("relevant words: " + relevant); 52 Iterator iter = coll.iterator(); 53 int i = 1; 54 while (iter.hasNext()) { 55 System.out.print("hit " + i + ": "); 56 Entry entry = (Entry)iter.next(); 57 System.out.println(entry.getWord() + " - " + entry.getNumber()); 58 i++; 59 } 60 } catch (Exception e) { 61 } 62 } 63 } 64 65 } 66 | Popular Tags |