1 /* 2 * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved 3 * 4 * This file is part of Resin(R) Open Source 5 * 6 * Each copy or derived work must preserve the copyright notice and this 7 * notice unmodified. 8 * 9 * Resin Open Source is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License as published by 11 * the Free Software Foundation; either version 2 of the License, or 12 * (at your option) any later version. 13 * 14 * Resin Open Source is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty 17 * of NON-INFRINGEMENT. See the GNU General Public License for more 18 * details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with Resin Open Source; if not, write to the 22 * Free SoftwareFoundation, Inc. 23 * 59 Temple Place, Suite 330 24 * Boston, MA 02111-1307 USA 25 * 26 * @author Scott Ferguson 27 */ 28 29 package com.caucho.xml; 30 31 /** 32 * A forgiving HTML parser interface. 33 * 34 * <p>The forgiving HTML parser is useful for extracting information from 35 * the web since many sites have not-quite-standard HTML. 36 * 37 * <p>To parse a file into a DOM Document use 38 * <pre><code> 39 * Document doc = new Html().parseDocument("foo.html"); 40 * </code></pre> 41 * 42 * <p>To parse a string into a DOM Document use 43 * <pre><code> 44 * String html = "<h1>small test</h1>"; 45 * Document doc = new Html().parseDocumentString(html); 46 * </code></pre> 47 * 48 * <p>To parse a file using the SAX API use 49 * <pre><code> 50 * Html html = new Html(); 51 * html.setContentHandler(myContentHandler); 52 * html.parse("foo.html"); 53 * </code></pre> 54 */ 55 public class LooseHtml extends XmlParser { 56 /** 57 * Create a new forgiving HTML parser 58 */ 59 public LooseHtml() 60 { 61 super(new HtmlPolicy(), null); 62 63 _policy.forgiving = true; 64 _forgiving = true; 65 _extraForgiving = true; 66 } 67 } 68