KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > snapper > parsers > HTMLParser


1 package org.enhydra.snapper.parsers;
2
3 import org.enhydra.snapper.api.Parser;
4 import java.io.File JavaDoc;
5 import java.io.InputStream JavaDoc;
6
7 import org.htmlparser.beans.StringBean;
8 import org.htmlparser.tags.TitleTag;
9 import org.htmlparser.Node;
10
11 public class HTMLParser implements org.enhydra.snapper.api.Parser {
12     
13     String JavaDoc title, parsedText, fileName;
14     
15     public String JavaDoc parse(InputStream JavaDoc is) throws java.io.IOException JavaDoc{ return "";}
16
17       public void parse(){
18    
19         
20         StringBean sb = new StringBean ();
21         sb.setLinks (false);
22         sb.setReplaceNonBreakingSpaces (false);
23         sb.setCollapse (true);
24         sb.setURL (fileName);
25         parsedText = sb.getStrings ();
26         
27         try{
28         
29         org.htmlparser.Parser parser = new org.htmlparser.Parser(fileName);
30         
31         Node[] allTITLETags = parser.extractAllNodesThatAre(TitleTag.class);
32         TitleTag titleTag = (TitleTag) allTITLETags[0];
33         title = titleTag.getTitle();
34       
35         }catch(Exception JavaDoc e)
36         {
37             title = fileName.substring(fileName.lastIndexOf(File.separator)+1);
38             title = title.substring(0,title.lastIndexOf("."));
39         }
40         
41         
42           if(parsedText==null)
43            {
44              parsedText="";
45            }
46           if(title==null)
47           {
48             title = fileName.substring(fileName.lastIndexOf(File.separator)+1);
49             title = title.substring(0,title.lastIndexOf("."));
50           }
51       
52       }
53
54       
55       public void setFileName(String JavaDoc fileName) {
56         this.fileName = fileName;
57       }
58
59       public String JavaDoc getParsedText() {
60         return parsedText;
61       }
62       
63       public String JavaDoc getTitle() {
64         return title;
65       }
66       
67     
68     }
Popular Tags