KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > javax > swing > text > html > parser > DocumentParser


1 /*
2  * @(#)DocumentParser.java 1.28 03/12/19
3  *
4  * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7
8 package javax.swing.text.html.parser;
9
10 import javax.swing.text.SimpleAttributeSet JavaDoc;
11 import javax.swing.text.html.HTMLEditorKit JavaDoc;
12 import javax.swing.text.html.HTML JavaDoc;
13 import javax.swing.text.ChangedCharSetException JavaDoc;
14
15 import java.util.*;
16 import java.io.*;
17 import java.net.*;
18
19 /**
20  * A Parser for HTML Documents (actually, you can specify a DTD, but
21  * you should really only use this class with the html dtd in swing).
22  * Reads an InputStream of HTML and
23  * invokes the appropriate methods in the ParserCallback class. This
24  * is the default parser used by HTMLEditorKit to parse HTML url's.
25  * <p>This will message the callback for all valid tags, as well as
26  * tags that are implied but not explicitly specified. For example, the
27  * html string (&lt;p&gt;blah) only has a p tag defined. The callback
28  * will see the following methods:
29  * <ol><li><i>handleStartTag(html, ...)</i></li>
30  * <li><i>handleStartTag(head, ...)</i></li>
31  * <li><i>handleEndTag(head)</i></li>
32  * <li><i>handleStartTag(body, ...)</i></li>
33  * <li>handleStartTag(p, ...)</i></li>
34  * <li>handleText(...)</li>
35  * <li><i>handleEndTag(p)</i></li>
36  * <li><i>handleEndTag(body)</i></li>
37  * <li><i>handleEndTag(html)</i></li>
38  * </ol>
39  * The items in <i>italic</i> are implied, that is, although they were not
40  * explicitly specified, to be correct html they should have been present
41  * (head isn't necessary, but it is still generated). For tags that
42  * are implied, the AttributeSet argument will have a value of
43  * <code>Boolean.TRUE</code> for the key
44  * <code>HTMLEditorKit.ParserCallback.IMPLIED</code>.
45  * <p>HTML.Attributes defines a type safe enumeration of html attributes.
46  * If an attribute key of a tag is defined in HTML.Attribute, the
47  * HTML.Attribute will be used as the key, otherwise a String will be used.
48  * For example &lt;p foo=bar class=neat&gt; has two attributes. foo is
49  * not defined in HTML.Attribute, where as class is, therefore the
50  * AttributeSet will have two values in it, HTML.Attribute.CLASS with
51  * a String value of 'neat' and the String key 'foo' with a String value of
52  * 'bar'.
53  * <p>The position argument will indicate the start of the tag, comment
54  * or text. Similiar to arrays, the first character in the stream has a
55  * position of 0. For tags that are
56  * implied the position will indicate
57  * the location of the next encountered tag. In the first example,
58  * the implied start body and html tags will have the same position as the
59  * p tag, and the implied end p, html and body tags will all have the same
60  * position.
61  * <p>As html skips whitespace the position for text will be the position
62  * of the first valid character, eg in the string '\n\n\nblah'
63  * the text 'blah' will have a position of 3, the newlines are skipped.
64  * <p>
65  * For attributes that do not have a value, eg in the html
66  * string <code>&lt;foo blah&gt;</code> the attribute <code>blah</code>
67  * does not have a value, there are two possible values that will be
68  * placed in the AttributeSet's value:
69  * <ul>
70  * <li>If the DTD does not contain an definition for the element, or the
71  * definition does not have an explicit value then the value in the
72  * AttributeSet will be <code>HTML.NULL_ATTRIBUTE_VALUE</code>.
73  * <li>If the DTD contains an explicit value, as in:
74  * <code>&lt;!ATTLIST OPTION selected (selected) #IMPLIED&gt;</code>
75  * this value from the dtd (in this case selected) will be used.
76  * </ul>
77  * <p>
78  * Once the stream has been parsed, the callback is notified of the most
79  * likely end of line string. The end of line string will be one of
80  * \n, \r or \r\n, which ever is encountered the most in parsing the
81  * stream.
82  *
83  * @version 1.28 12/19/03
84  * @author Sunita Mani
85  */

86 public class DocumentParser extends javax.swing.text.html.parser.Parser JavaDoc {
87
88     private int inbody;
89     private int intitle;
90     private int inhead;
91     private int instyle;
92     private int inscript;
93     private boolean seentitle;
94     private HTMLEditorKit.ParserCallback JavaDoc callback = null;
95     private boolean ignoreCharSet = false;
96     private static final boolean debugFlag = false;
97
98     public DocumentParser(DTD JavaDoc dtd) {
99     super(dtd);
100     }
101  
102     public void parse(Reader in, HTMLEditorKit.ParserCallback JavaDoc callback, boolean ignoreCharSet) throws IOException {
103     this.ignoreCharSet = ignoreCharSet;
104     this.callback = callback;
105     parse(in);
106     // end of line
107
callback.handleEndOfLineString(getEndOfLineString());
108     }
109
110     /**
111      * Handle Start Tag.
112      */

113     protected void handleStartTag(TagElement JavaDoc tag) {
114
115     Element JavaDoc elem = tag.getElement();
116     if (elem == dtd.body) {
117         inbody++;
118     } else if (elem == dtd.html) {
119     } else if (elem == dtd.head) {
120         inhead++;
121     } else if (elem == dtd.title) {
122         intitle++;
123     } else if (elem == dtd.style) {
124         instyle++;
125     } else if (elem == dtd.script) {
126             inscript++;
127     }
128     if (debugFlag) {
129         if (tag.fictional()) {
130         debug("Start Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
131         } else {
132         debug("Start Tag: " + tag.getHTMLTag() + " attributes: " +
133               getAttributes() + " pos: " + getCurrentPos());
134         }
135     }
136     if (tag.fictional()) {
137         SimpleAttributeSet JavaDoc attrs = new SimpleAttributeSet JavaDoc();
138         attrs.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED,
139                    Boolean.TRUE);
140         callback.handleStartTag(tag.getHTMLTag(), attrs,
141                     getBlockStartPosition());
142     } else {
143         callback.handleStartTag(tag.getHTMLTag(), getAttributes(),
144                     getBlockStartPosition());
145         flushAttributes();
146     }
147     }
148
149
150     protected void handleComment(char text[]) {
151     if (debugFlag) {
152         debug("comment: ->" + new String JavaDoc(text) + "<-"
153           + " pos: " + getCurrentPos());
154     }
155     callback.handleComment(text, getBlockStartPosition());
156     }
157
158     /**
159      * Handle Empty Tag.
160      */

161     protected void handleEmptyTag(TagElement JavaDoc tag) throws ChangedCharSetException JavaDoc {
162
163     Element JavaDoc elem = tag.getElement();
164     if (elem == dtd.meta && !ignoreCharSet) {
165         SimpleAttributeSet JavaDoc atts = getAttributes();
166         if (atts != null) {
167         String JavaDoc content = (String JavaDoc)atts.getAttribute(HTML.Attribute.CONTENT);
168         if (content != null) {
169             if ("content-type".equalsIgnoreCase((String JavaDoc)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) {
170             if (!content.equalsIgnoreCase("text/html") &&
171                 !content.equalsIgnoreCase("text/plain")) {
172                 throw new ChangedCharSetException JavaDoc(content, false);
173             }
174             } else if ("charset" .equalsIgnoreCase((String JavaDoc)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) {
175             throw new ChangedCharSetException JavaDoc(content, true);
176             }
177         }
178         }
179     }
180     if (inbody != 0 || elem == dtd.meta || elem == dtd.base || elem == dtd.isindex || elem == dtd.style || elem == dtd.link) {
181         if (debugFlag) {
182         if (tag.fictional()) {
183             debug("Empty Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
184         } else {
185             debug("Empty Tag: " + tag.getHTMLTag() + " attributes: "
186               + getAttributes() + " pos: " + getCurrentPos());
187         }
188         }
189         if (tag.fictional()) {
190         SimpleAttributeSet JavaDoc attrs = new SimpleAttributeSet JavaDoc();
191         attrs.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED,
192                    Boolean.TRUE);
193         callback.handleSimpleTag(tag.getHTMLTag(), attrs,
194                      getBlockStartPosition());
195         } else {
196         callback.handleSimpleTag(tag.getHTMLTag(), getAttributes(),
197                      getBlockStartPosition());
198         flushAttributes();
199         }
200     }
201     }
202
203     /**
204      * Handle End Tag.
205      */

206     protected void handleEndTag(TagElement JavaDoc tag) {
207     Element JavaDoc elem = tag.getElement();
208     if (elem == dtd.body) {
209         inbody--;
210     } else if (elem == dtd.title) {
211         intitle--;
212         seentitle = true;
213     } else if (elem == dtd.head) {
214             inhead--;
215     } else if (elem == dtd.style) {
216             instyle--;
217     } else if (elem == dtd.script) {
218             inscript--;
219     }
220     if (debugFlag) {
221         debug("End Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
222     }
223     callback.handleEndTag(tag.getHTMLTag(), getBlockStartPosition());
224
225     }
226
227     /**
228      * Handle Text.
229      */

230     protected void handleText(char data[]) {
231     if (data != null) {
232         if (inscript != 0) {
233         callback.handleComment(data, getBlockStartPosition());
234         return;
235         }
236         if (inbody != 0 || ((instyle != 0) ||
237                 ((intitle != 0) && !seentitle))) {
238         if (debugFlag) {
239             debug("text: ->" + new String JavaDoc(data) + "<-" + " pos: " + getCurrentPos());
240         }
241         callback.handleText(data, getBlockStartPosition());
242         }
243     }
244     }
245
246     /*
247      * Error handling.
248      */

249     protected void handleError(int ln, String JavaDoc errorMsg) {
250     if (debugFlag) {
251         debug("Error: ->" + errorMsg + "<-" + " pos: " + getCurrentPos());
252     }
253     /* PENDING: need to improve the error string. */
254     callback.handleError(errorMsg, getCurrentPos());
255     }
256
257
258     /*
259      * debug messages
260      */

261     private void debug(String JavaDoc msg) {
262     System.out.println(msg);
263     }
264 }
265
Popular Tags