KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > html > dom > HTMLBuilder


1 /*
2  * Copyright 1999,2000,2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.html.dom;
17
18
19 import java.util.Vector JavaDoc;
20
21 import org.apache.xerces.dom.ElementImpl;
22 import org.apache.xerces.dom.ProcessingInstructionImpl;
23 import org.apache.xerces.dom.TextImpl;
24 import org.w3c.dom.Node JavaDoc;
25 import org.w3c.dom.html.HTMLDocument;
26 import org.xml.sax.AttributeList JavaDoc;
27 import org.xml.sax.DocumentHandler JavaDoc;
28 import org.xml.sax.Locator JavaDoc;
29 import org.xml.sax.SAXException JavaDoc;
30
31
32 /**
33  * This is a SAX document handler that is used to build an HTML document.
34  * It can build a document from any SAX parser, but is specifically tuned
35  * for working with the OpenXML HTML parser.
36  *
37  *
38  * @version $Revision: 1.7 $ $Date: 2004/02/24 23:34:00 $
39  * @author <a HREF="mailto:arkin@openxml.org">Assaf Arkin</a>
40  */

41 public class HTMLBuilder
42     implements DocumentHandler JavaDoc
43 {
44
45
46     /**
47      * The document that is being built.
48      */

49     protected HTMLDocumentImpl _document;
50     
51     
52     /**
53      * The current node in the document into which elements, text and
54      * other nodes will be inserted. This starts as the document iself
55      * and reflects each element that is currently being parsed.
56      */

57     protected ElementImpl _current;
58     
59     /**
60      * A reference to the current locator, this is generally the parser
61      * itself. The locator is used to locate errors and identify the
62      * source locations of elements.
63      */

64     private Locator JavaDoc _locator;
65
66
67     /**
68      * Applies only to whitespace appearing between element tags in element content,
69      * as per the SAX definition, and true by default.
70      */

71     private boolean _ignoreWhitespace = true;
72
73
74     /**
75      * Indicates whether finished building a document. If so, can start building
76      * another document. Must be initially true to get the first document processed.
77      */

78     private boolean _done = true;
79
80
81     /**
82      * The document is only created the same time as the document element, however, certain
83      * nodes may precede the document element (comment and PI), and they are accumulated
84      * in this vector.
85      */

86     protected Vector JavaDoc _preRootNodes;
87
88     
89     public void startDocument()
90         throws SAXException JavaDoc
91     {
92         if ( ! _done )
93         throw new SAXException JavaDoc( "HTM001 State error: startDocument fired twice on one builder." );
94     _document = null;
95     _done = false;
96     }
97
98
99     public void endDocument()
100         throws SAXException JavaDoc
101     {
102         if ( _document == null )
103             throw new SAXException JavaDoc( "HTM002 State error: document never started or missing document element." );
104     if ( _current != null )
105         throw new SAXException JavaDoc( "HTM003 State error: document ended before end of document element." );
106         _current = null;
107     _done = true;
108     }
109
110
111     public synchronized void startElement( String JavaDoc tagName, AttributeList JavaDoc attrList )
112         throws SAXException JavaDoc
113     {
114         ElementImpl elem;
115         int i;
116         
117     if ( tagName == null )
118         throw new SAXException JavaDoc( "HTM004 Argument 'tagName' is null." );
119
120     // If this is the root element, this is the time to create a new document,
121
// because only know we know the document element name and namespace URI.
122
if ( _document == null )
123     {
124         // No need to create the element explicitly.
125
_document = new HTMLDocumentImpl();
126         elem = (ElementImpl) _document.getDocumentElement();
127         _current = elem;
128         if ( _current == null )
129         throw new SAXException JavaDoc( "HTM005 State error: Document.getDocumentElement returns null." );
130
131         // Insert nodes (comment and PI) that appear before the root element.
132
if ( _preRootNodes != null )
133         {
134         for ( i = _preRootNodes.size() ; i-- > 0 ; )
135             _document.insertBefore( (Node JavaDoc) _preRootNodes.elementAt( i ), elem );
136         _preRootNodes = null;
137         }
138          
139     }
140     else
141     {
142         // This is a state error, indicates that document has been parsed in full,
143
// or that there are two root elements.
144
if ( _current == null )
145         throw new SAXException JavaDoc( "HTM006 State error: startElement called after end of document element." );
146         elem = (ElementImpl) _document.createElement( tagName );
147         _current.appendChild( elem );
148         _current = elem;
149     }
150
151     // Add the attributes (specified and not-specified) to this element.
152
if ( attrList != null )
153         {
154             for ( i = 0 ; i < attrList.getLength() ; ++ i )
155                 elem.setAttribute( attrList.getName( i ), attrList.getValue( i ) );
156         }
157     }
158
159     
160     public void endElement( String JavaDoc tagName )
161         throws SAXException JavaDoc
162     {
163         if ( _current == null )
164             throw new SAXException JavaDoc( "HTM007 State error: endElement called with no current node." );
165     if ( ! _current.getNodeName().equalsIgnoreCase( tagName ))
166         throw new SAXException JavaDoc( "HTM008 State error: mismatch in closing tag name " + tagName + "\n" + tagName);
167
168     // Move up to the parent element. When you reach the top (closing the root element).
169
// the parent is document and current is null.
170
if ( _current.getParentNode() == _current.getOwnerDocument() )
171         _current = null;
172     else
173         _current = (ElementImpl) _current.getParentNode();
174     }
175
176
177     public void characters( String JavaDoc text )
178         throws SAXException JavaDoc
179     {
180     if ( _current == null )
181             throw new SAXException JavaDoc( "HTM009 State error: character data found outside of root element." );
182     _current.appendChild( new TextImpl( _document, text ) );
183     }
184
185     
186     public void characters( char[] text, int start, int length )
187         throws SAXException JavaDoc
188     {
189     if ( _current == null )
190             throw new SAXException JavaDoc( "HTM010 State error: character data found outside of root element." );
191     _current.appendChild( new TextImpl( _document, new String JavaDoc( text, start, length ) ) );
192     }
193     
194     
195     public void ignorableWhitespace( char[] text, int start, int length )
196         throws SAXException JavaDoc
197     {
198         Node JavaDoc node;
199         
200         if ( ! _ignoreWhitespace )
201         _current.appendChild( new TextImpl( _document, new String JavaDoc( text, start, length ) ) );
202      }
203     
204     
205     public void processingInstruction( String JavaDoc target, String JavaDoc instruction )
206         throws SAXException JavaDoc
207     {
208         Node JavaDoc node;
209         
210     // Processing instruction may appear before the document element (in fact, before the
211
// document has been created, or after the document element has been closed.
212
if ( _current == null && _document == null )
213     {
214         if ( _preRootNodes == null )
215         _preRootNodes = new Vector JavaDoc();
216         _preRootNodes.addElement( new ProcessingInstructionImpl( null, target, instruction ) );
217     }
218     else
219         if ( _current == null && _document != null )
220         _document.appendChild( new ProcessingInstructionImpl( _document, target, instruction ) );
221     else
222         _current.appendChild( new ProcessingInstructionImpl( _document, target, instruction ) );
223     }
224     
225     
226     public HTMLDocument getHTMLDocument()
227     {
228         return (HTMLDocument) _document;
229     }
230
231     
232     public void setDocumentLocator( Locator JavaDoc locator )
233     {
234         _locator = locator;
235     }
236
237
238 }
239
Popular Tags