1 16 package org.outerj.daisy.htmlcleaner; 17 18 import java.io.OutputStream ; 19 import java.io.ByteArrayOutputStream ; 20 import org.outerj.daisy.xmlutil.SaxBuffer; 21 22 32 public class HtmlCleaner { 33 private HtmlCleanerTemplate template; 34 35 HtmlCleaner(HtmlCleanerTemplate template) { 36 this.template = template; 37 } 38 39 43 public void clean(String somethingWhichLooksLikeHtml, OutputStream outputStream) throws Exception { 44 NekoHtmlParser parser = new NekoHtmlParser(); 45 SaxBuffer buffer = parser.parse(GeckoCorruptTagCleaner.clean(somethingWhichLooksLikeHtml)); 46 47 StylingHtmlSerializer serializer = new StylingHtmlSerializer(template); 48 serializer.setOutputStream(outputStream); 49 HtmlRepairer repairer = new HtmlRepairer(template); 50 51 repairer.clean(buffer, new MergeCharacterEventsHandler(serializer)); 52 } 53 54 public byte[] cleanToByteArray(String somethingWhichLooksLikeHtml) throws Exception { 55 ByteArrayOutputStream os = new ByteArrayOutputStream (10000); 56 clean(somethingWhichLooksLikeHtml, os); 57 return os.toByteArray(); 58 } 59 60 public String cleanToString(String somethingWhichLooksLikeHtml) throws Exception { 61 ByteArrayOutputStream os = new ByteArrayOutputStream (10000); 62 clean(somethingWhichLooksLikeHtml, os); 63 return os.toString("UTF-8"); 64 } 65 } 66 | Popular Tags |