KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > opensymphony > module > sitemesh > parser > HTMLPageParser


1 package com.opensymphony.module.sitemesh.parser;
2
3 import com.opensymphony.module.sitemesh.HTMLPage;
4 import com.opensymphony.module.sitemesh.Page;
5 import com.opensymphony.module.sitemesh.PageParser;
6 import com.opensymphony.module.sitemesh.html.HTMLProcessor;
7 import com.opensymphony.module.sitemesh.html.State;
8 import com.opensymphony.module.sitemesh.html.StateTransitionRule;
9 import com.opensymphony.module.sitemesh.html.tokenizer.TagTokenizer;
10 import com.opensymphony.module.sitemesh.html.util.CharArray;
11 import com.opensymphony.module.sitemesh.parser.rules.BodyTagRule;
12 import com.opensymphony.module.sitemesh.parser.rules.ContentBlockExtractingRule;
13 import com.opensymphony.module.sitemesh.parser.rules.FramesetRule;
14 import com.opensymphony.module.sitemesh.parser.rules.HeadExtractingRule;
15 import com.opensymphony.module.sitemesh.parser.rules.HtmlAttributesRule;
16 import com.opensymphony.module.sitemesh.parser.rules.MetaTagRule;
17 import com.opensymphony.module.sitemesh.parser.rules.ParameterExtractingRule;
18 import com.opensymphony.module.sitemesh.parser.rules.TitleExtractingRule;
19 import com.opensymphony.module.sitemesh.parser.rules.MSOfficeDocumentPropertiesRule;
20
21 import java.io.IOException JavaDoc;
22
23 /**
24  * <b>WARNING - This is experimental - use at own risk!</b> Builds an HTMLPage object from an HTML document. This behaves
25  * similarly to the FastPageParser, however it's a complete rewrite that is simpler to add custom features to such as
26  * extraction and transformation of elements.
27  *
28  * @see TagTokenizer
29  *
30  * @author Joe Walnes
31  */

32 public class HTMLPageParser implements PageParser {
33
34     public Page parse(char[] data) throws IOException JavaDoc {
35         CharArray head = new CharArray(64);
36         CharArray body = new CharArray(4096);
37         HTMLPage page = new TokenizedHTMLPage(data, body, head);
38
39         HTMLProcessor htmlProcessor = new HTMLProcessor(data, body);
40         State defaultState = htmlProcessor.defaultState();
41         State xmlState = new State();
42
43         defaultState.addRule(new HtmlAttributesRule(page));
44         defaultState.addRule(new HeadExtractingRule(head));
45         defaultState.addRule(new MetaTagRule(page));
46         defaultState.addRule(new TitleExtractingRule(page));
47         defaultState.addRule(new BodyTagRule(page, body));
48         defaultState.addRule(new ParameterExtractingRule(page));
49         defaultState.addRule(new ContentBlockExtractingRule(page));
50         defaultState.addRule(new FramesetRule(page));
51         defaultState.addRule(new StateTransitionRule("xml", xmlState, true));
52
53         xmlState.addRule(new MSOfficeDocumentPropertiesRule(page));
54
55         htmlProcessor.process();
56
57         return page;
58     }
59
60 }
61
Popular Tags