1 37 package org.webharvest.runtime.processors; 38 39 import org.webharvest.definition.HtmlToXmlDef; 40 import org.webharvest.runtime.Scraper; 41 import org.webharvest.runtime.ScraperContext; 42 import org.webharvest.runtime.html.HtmlCleanerProcessor; 43 import org.webharvest.runtime.html.IXHtmlProcessor; 44 import org.webharvest.runtime.variables.IVariable; 45 import org.webharvest.runtime.variables.NodeVariable; 46 47 50 public class HtmlToXmlProcessor extends BaseProcessor { 51 52 private HtmlToXmlDef htmlToXmlDef; 53 54 public HtmlToXmlProcessor(HtmlToXmlDef htmlToXmlDef) { 55 super(htmlToXmlDef); 56 this.htmlToXmlDef = htmlToXmlDef; 57 } 58 59 public IVariable execute(Scraper scraper, ScraperContext context) { 60 IVariable body = getBodyTextContent(htmlToXmlDef, scraper, context); 61 62 IXHtmlProcessor htmlProcessor = new HtmlCleanerProcessor(); 63 String result = htmlProcessor.execute( body.toString() ); 64 65 return new NodeVariable(result); 66 } 67 68 } | Popular Tags |