1 package org.enhydra.snapper.parsers; 2 3 import org.enhydra.snapper.api.Parser; 4 import java.io.File ; 5 import java.io.InputStream ; 6 7 import org.htmlparser.beans.StringBean; 8 import org.htmlparser.tags.TitleTag; 9 import org.htmlparser.Node; 10 11 public class HTMLParser implements org.enhydra.snapper.api.Parser { 12 13 String title, parsedText, fileName; 14 15 public String parse(InputStream is) throws java.io.IOException { return "";} 16 17 public void parse(){ 18 19 20 StringBean sb = new StringBean (); 21 sb.setLinks (false); 22 sb.setReplaceNonBreakingSpaces (false); 23 sb.setCollapse (true); 24 sb.setURL (fileName); 25 parsedText = sb.getStrings (); 26 27 try{ 28 29 org.htmlparser.Parser parser = new org.htmlparser.Parser(fileName); 30 31 Node[] allTITLETags = parser.extractAllNodesThatAre(TitleTag.class); 32 TitleTag titleTag = (TitleTag) allTITLETags[0]; 33 title = titleTag.getTitle(); 34 35 }catch(Exception e) 36 { 37 title = fileName.substring(fileName.lastIndexOf(File.separator)+1); 38 title = title.substring(0,title.lastIndexOf(".")); 39 } 40 41 42 if(parsedText==null) 43 { 44 parsedText=""; 45 } 46 if(title==null) 47 { 48 title = fileName.substring(fileName.lastIndexOf(File.separator)+1); 49 title = title.substring(0,title.lastIndexOf(".")); 50 } 51 52 } 53 54 55 public void setFileName(String fileName) { 56 this.fileName = fileName; 57 } 58 59 public String getParsedText() { 60 return parsedText; 61 } 62 63 public String getTitle() { 64 return title; 65 } 66 67 68 } | Popular Tags |