1 17 18 19 20 package org.apache.lenya.lucene.parser; 21 22 import java.io.File ; 23 import java.io.FileReader ; 24 import java.io.IOException ; 25 import java.io.InputStreamReader ; 26 import java.io.Reader ; 27 import java.net.MalformedURLException ; 28 import java.net.URI ; 29 import java.net.URLConnection ; 30 31 import javax.swing.text.html.parser.ParserDelegator ; 32 33 public class SwingHTMLParser extends AbstractHTMLParser { 34 35 public SwingHTMLParser() { 36 } 37 38 41 public void parse(URI uri) throws ParseException { 42 try { 43 ParserDelegator delagator = new ParserDelegator (); 44 handler = new SwingHTMLHandler(); 45 46 Reader reader = new PreParser().parse(getReader(uri)); 47 delagator.parse(reader, handler, true); 48 } catch (IOException e) { 49 throw new ParseException(e); 50 } 51 } 52 53 private SwingHTMLHandler handler; 54 55 protected SwingHTMLHandler getHandler() { 56 return handler; 57 } 58 59 64 public String getTitle() { 65 return getHandler().getTitle(); 66 } 67 68 73 public String getKeywords() { 74 return getHandler().getKeywords(); 75 } 76 77 private Reader reader; 78 79 84 public Reader getReader() { 85 return getHandler().getReader(); 86 } 87 88 protected Reader getReader(URI uri) throws IOException , MalformedURLException { 89 if (uri.toString().startsWith("http:")) { 90 URLConnection connection = uri.toURL().openConnection(); 92 93 return new InputStreamReader (connection.getInputStream()); 94 } else { 95 return new FileReader (new File (uri)); 97 } 98 } 99 } 100 | Popular Tags |