1 2 3 4 package net.nutch.parse.text; 5 6 import java.util.Properties ; 7 8 import net.nutch.protocol.Content; 9 import net.nutch.parse.*; 10 import net.nutch.util.*; 11 12 public class TextParser implements Parser { 13 public Parse getParse(Content content) throws ParseException { 14 Properties metadata = new Properties (); 16 metadata.putAll(content.getMetadata()); 17 18 ParseData parseData = new ParseData("", new Outlink[0], metadata); 19 20 String encoding = 21 StringUtil.parseCharacterEncoding(content.getContentType()); 22 String text; 23 if (encoding != null) { try { text = new String (content.getContent(), encoding); 26 } catch (java.io.UnsupportedEncodingException e) { 27 throw new ParseException(e); 28 } 29 } else { 30 text = new String (content.getContent()); } 34 35 return new ParseImpl(text, parseData); 36 } 37 } 38 | Popular Tags |