KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xml > serializer > ToHTMLStream


1 /*
2  * Copyright 2001-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 /*
17  * $Id: ToHTMLStream.java,v 1.28 2004/02/18 22:57:44 minchau Exp $
18  */

19 package org.apache.xml.serializer;
20
21 import java.io.IOException JavaDoc;
22 import java.io.OutputStream JavaDoc;
23 import java.io.UnsupportedEncodingException JavaDoc;
24 import java.util.Properties JavaDoc;
25
26 import javax.xml.transform.Result JavaDoc;
27
28 import org.apache.xml.res.XMLErrorResources;
29 import org.apache.xml.res.XMLMessages;
30 import org.apache.xml.utils.BoolStack;
31 import org.apache.xml.utils.Trie;
32 import org.xml.sax.Attributes JavaDoc;
33 import org.xml.sax.SAXException JavaDoc;
34
35 /**
36  * @author Santiago Pericas-Geertsen
37  * @author G. Todd Miller
38  */

39 public class ToHTMLStream extends ToStream
40 {
41
42     /** This flag is set while receiving events from the DTD */
43     protected boolean m_inDTD = false;
44
45     /** True if the current element is a block element. (seems like
46      * this needs to be a stack. -sb). */

47     private boolean m_inBlockElem = false;
48
49     /**
50      * Map that tells which XML characters should have special treatment, and it
51      * provides character to entity name lookup.
52      */

53     protected static final CharInfo m_htmlcharInfo =
54 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
55
CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
56
57     /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
58     static final Trie m_elementFlags = new Trie();
59
60     static {
61
62         // HTML 4.0 loose DTD
63
m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
64         m_elementFlags.put(
65             "FRAME",
66             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
67         m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
68         m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
69         m_elementFlags.put(
70             "ISINDEX",
71             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
72         m_elementFlags.put(
73             "APPLET",
74             new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
75         m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
76         m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
77         m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
78
79         // HTML 4.0 strict DTD
80
m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
81         m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
82         m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
83         m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
84         m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
85         m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
86         m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
87         m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
88         m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
89         m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
90         m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
91         m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
92         m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
93         m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
94         m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
95         m_elementFlags.put(
96             "SUP",
97             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
98         m_elementFlags.put(
99             "SUB",
100             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
101         m_elementFlags.put(
102             "SPAN",
103             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
104         m_elementFlags.put(
105             "BDO",
106             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
107         m_elementFlags.put(
108             "BR",
109             new ElemDesc(
110                 0
111                     | ElemDesc.SPECIAL
112                     | ElemDesc.ASPECIAL
113                     | ElemDesc.EMPTY
114                     | ElemDesc.BLOCK));
115         m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
116         m_elementFlags.put(
117             "ADDRESS",
118             new ElemDesc(
119                 0
120                     | ElemDesc.BLOCK
121                     | ElemDesc.BLOCKFORM
122                     | ElemDesc.BLOCKFORMFIELDSET));
123         m_elementFlags.put(
124             "DIV",
125             new ElemDesc(
126                 0
127                     | ElemDesc.BLOCK
128                     | ElemDesc.BLOCKFORM
129                     | ElemDesc.BLOCKFORMFIELDSET));
130         m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
131         m_elementFlags.put(
132             "MAP",
133             new ElemDesc(
134                 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
135         m_elementFlags.put(
136             "AREA",
137             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
138         m_elementFlags.put(
139             "LINK",
140             new ElemDesc(
141                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
142         m_elementFlags.put(
143             "IMG",
144             new ElemDesc(
145                 0
146                     | ElemDesc.SPECIAL
147                     | ElemDesc.ASPECIAL
148                     | ElemDesc.EMPTY
149                     | ElemDesc.WHITESPACESENSITIVE));
150         m_elementFlags.put(
151             "OBJECT",
152             new ElemDesc(
153                 0
154                     | ElemDesc.SPECIAL
155                     | ElemDesc.ASPECIAL
156                     | ElemDesc.HEADMISC
157                     | ElemDesc.WHITESPACESENSITIVE));
158         m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
159         m_elementFlags.put(
160             "HR",
161             new ElemDesc(
162                 0
163                     | ElemDesc.BLOCK
164                     | ElemDesc.BLOCKFORM
165                     | ElemDesc.BLOCKFORMFIELDSET
166                     | ElemDesc.EMPTY));
167         m_elementFlags.put(
168             "P",
169             new ElemDesc(
170                 0
171                     | ElemDesc.BLOCK
172                     | ElemDesc.BLOCKFORM
173                     | ElemDesc.BLOCKFORMFIELDSET));
174         m_elementFlags.put(
175             "H1",
176             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
177         m_elementFlags.put(
178             "H2",
179             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
180         m_elementFlags.put(
181             "H3",
182             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
183         m_elementFlags.put(
184             "H4",
185             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
186         m_elementFlags.put(
187             "H5",
188             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
189         m_elementFlags.put(
190             "H6",
191             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
192         m_elementFlags.put(
193             "PRE",
194             new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
195         m_elementFlags.put(
196             "Q",
197             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
198         m_elementFlags.put(
199             "BLOCKQUOTE",
200             new ElemDesc(
201                 0
202                     | ElemDesc.BLOCK
203                     | ElemDesc.BLOCKFORM
204                     | ElemDesc.BLOCKFORMFIELDSET));
205         m_elementFlags.put("INS", new ElemDesc(0));
206         m_elementFlags.put("DEL", new ElemDesc(0));
207         m_elementFlags.put(
208             "DL",
209             new ElemDesc(
210                 0
211                     | ElemDesc.BLOCK
212                     | ElemDesc.BLOCKFORM
213                     | ElemDesc.BLOCKFORMFIELDSET));
214         m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
215         m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
216         m_elementFlags.put(
217             "OL",
218             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
219         m_elementFlags.put(
220             "UL",
221             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
222         m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
223         m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
224         m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
225         m_elementFlags.put(
226             "INPUT",
227             new ElemDesc(
228                 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
229         m_elementFlags.put(
230             "SELECT",
231             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
232         m_elementFlags.put("OPTGROUP", new ElemDesc(0));
233         m_elementFlags.put("OPTION", new ElemDesc(0));
234         m_elementFlags.put(
235             "TEXTAREA",
236             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
237         m_elementFlags.put(
238             "FIELDSET",
239             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
240         m_elementFlags.put("LEGEND", new ElemDesc(0));
241         m_elementFlags.put(
242             "BUTTON",
243             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
244         m_elementFlags.put(
245             "TABLE",
246             new ElemDesc(
247                 0
248                     | ElemDesc.BLOCK
249                     | ElemDesc.BLOCKFORM
250                     | ElemDesc.BLOCKFORMFIELDSET));
251         m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
252         m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
253         m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
254         m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
255         m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
256         m_elementFlags.put(
257             "COL",
258             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
259         m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
260         m_elementFlags.put("TH", new ElemDesc(0));
261         m_elementFlags.put("TD", new ElemDesc(0));
262         m_elementFlags.put(
263             "HEAD",
264             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
265         m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
266         m_elementFlags.put(
267             "BASE",
268             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
269         m_elementFlags.put(
270             "META",
271             new ElemDesc(
272                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
273         m_elementFlags.put(
274             "STYLE",
275             new ElemDesc(
276                 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
277         m_elementFlags.put(
278             "SCRIPT",
279             new ElemDesc(
280                 0
281                     | ElemDesc.SPECIAL
282                     | ElemDesc.ASPECIAL
283                     | ElemDesc.HEADMISC
284                     | ElemDesc.RAW));
285         m_elementFlags.put(
286             "NOSCRIPT",
287             new ElemDesc(
288                 0
289                     | ElemDesc.BLOCK
290                     | ElemDesc.BLOCKFORM
291                     | ElemDesc.BLOCKFORMFIELDSET));
292         m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
293
294         // From "John Ky" <hand@syd.speednet.com.au
295
// Transitional Document Type Definition ()
296
// file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
297
m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
298
299         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
300
m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
301         m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
302
303         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
304
m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
305
306         // From "John Ky" <hand@syd.speednet.com.au
307
m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
308
309         // HTML 4.0, section 16.5
310
m_elementFlags.put(
311             "IFRAME",
312             new ElemDesc(
313                 0
314                     | ElemDesc.BLOCK
315                     | ElemDesc.BLOCKFORM
316                     | ElemDesc.BLOCKFORMFIELDSET));
317         // NS4 extensions
318
m_elementFlags.put(
319             "LAYER",
320             new ElemDesc(
321                 0
322                     | ElemDesc.BLOCK
323                     | ElemDesc.BLOCKFORM
324                     | ElemDesc.BLOCKFORMFIELDSET));
325         m_elementFlags.put(
326             "ILAYER",
327             new ElemDesc(
328                 0
329                     | ElemDesc.BLOCK
330                     | ElemDesc.BLOCKFORM
331                     | ElemDesc.BLOCKFORMFIELDSET));
332
333         ElemDesc elemDesc;
334
335         elemDesc = (ElemDesc) m_elementFlags.get("AREA");
336
337         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
338         elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
339
340         elemDesc = (ElemDesc) m_elementFlags.get("BASE");
341
342         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
343
344         elemDesc = (ElemDesc) m_elementFlags.get("BLOCKQUOTE");
345
346         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
347
348         elemDesc = (ElemDesc) m_elementFlags.get("Q");
349
350         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
351
352         elemDesc = (ElemDesc) m_elementFlags.get("INS");
353
354         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
355
356         elemDesc = (ElemDesc) m_elementFlags.get("DEL");
357
358         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
359
360         elemDesc = (ElemDesc) m_elementFlags.get("A");
361
362         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
363         elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
364
365         elemDesc = (ElemDesc) m_elementFlags.get("LINK");
366         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
367
368         elemDesc = (ElemDesc) m_elementFlags.get("INPUT");
369
370         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
371         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
372         elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
373         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
374         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
375         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
376
377         elemDesc = (ElemDesc) m_elementFlags.get("SELECT");
378
379         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
380         elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
381
382         elemDesc = (ElemDesc) m_elementFlags.get("OPTGROUP");
383
384         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
385
386         elemDesc = (ElemDesc) m_elementFlags.get("OPTION");
387
388         elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
389         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
390
391         elemDesc = (ElemDesc) m_elementFlags.get("TEXTAREA");
392
393         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
394         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
395
396         elemDesc = (ElemDesc) m_elementFlags.get("BUTTON");
397
398         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
399
400         elemDesc = (ElemDesc) m_elementFlags.get("SCRIPT");
401
402         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
403         elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
404         elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
405
406         elemDesc = (ElemDesc) m_elementFlags.get("IMG");
407
408         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
409         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
410         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
411         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
412
413         elemDesc = (ElemDesc) m_elementFlags.get("OBJECT");
414
415         elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
416         elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
417         elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
418         elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
419         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
420         elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
421
422         elemDesc = (ElemDesc) m_elementFlags.get("FORM");
423
424         elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
425
426         elemDesc = (ElemDesc) m_elementFlags.get("HEAD");
427
428         elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
429
430         // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
431
elemDesc = (ElemDesc) m_elementFlags.get("FRAME");
432
433         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
434         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
435
436         // HTML 4.0, section 16.5
437
elemDesc = (ElemDesc) m_elementFlags.get("IFRAME");
438
439         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
440         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
441
442         // NS4 extensions
443
elemDesc = (ElemDesc) m_elementFlags.get("LAYER");
444
445         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
446
447         elemDesc = (ElemDesc) m_elementFlags.get("ILAYER");
448
449         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
450
451         elemDesc = (ElemDesc) m_elementFlags.get("DIV");
452
453         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
454     }
455
456     /**
457      * Dummy element for elements not found.
458      */

459     static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
460
461     /** True if URLs should be specially escaped with the %xx form. */
462     private boolean m_specialEscapeURLs = true;
463
464     /** True if the META tag should be omitted. */
465     private boolean m_omitMetaTag = false;
466
467     /**
468      * Tells if the formatter should use special URL escaping.
469      *
470      * @param bool True if URLs should be specially escaped with the %xx form.
471      */

472     public void setSpecialEscapeURLs(boolean bool)
473     {
474         m_specialEscapeURLs = bool;
475     }
476
477     /**
478      * Tells if the formatter should omit the META tag.
479      *
480      * @param bool True if the META tag should be omitted.
481      */

482     public void setOmitMetaTag(boolean bool)
483     {
484         m_omitMetaTag = bool;
485     }
486
487     /**
488      * Specifies an output format for this serializer. It the
489      * serializer has already been associated with an output format,
490      * it will switch to the new format. This method should not be
491      * called while the serializer is in the process of serializing
492      * a document.
493      *
494      * @param format The output format to use
495      */

496     public void setOutputFormat(Properties JavaDoc format)
497     {
498  
499         m_specialEscapeURLs =
500             OutputPropertyUtils.getBooleanProperty(
501                 OutputPropertiesFactory.S_USE_URL_ESCAPING,
502                 format);
503
504         m_omitMetaTag =
505             OutputPropertyUtils.getBooleanProperty(
506                 OutputPropertiesFactory.S_OMIT_META_TAG,
507                 format);
508
509         super.setOutputFormat(format);
510     }
511
512     /**
513      * Tells if the formatter should use special URL escaping.
514      *
515      * @return True if URLs should be specially escaped with the %xx form.
516      */

517     private final boolean getSpecialEscapeURLs()
518     {
519         return m_specialEscapeURLs;
520     }
521
522     /**
523      * Tells if the formatter should omit the META tag.
524      *
525      * @return True if the META tag should be omitted.
526      */

527     private final boolean getOmitMetaTag()
528     {
529         return m_omitMetaTag;
530     }
531
532     /**
533      * Get a description of the given element.
534      *
535      * @param name non-null name of element, case insensitive.
536      *
537      * @return non-null reference to ElemDesc, which may be m_dummy if no
538      * element description matches the given name.
539      */

540     public static final ElemDesc getElemDesc(String JavaDoc name)
541     {
542         /* this method used to return m_dummy when name was null
543          * but now it doesn't check and and requires non-null name.
544          */

545         Object JavaDoc obj = m_elementFlags.get(name);
546         if (null != obj)
547             return (ElemDesc)obj;
548         return m_dummy;
549     }
550
551     /**
552      * Default constructor.
553      */

554     public ToHTMLStream()
555     {
556
557         super();
558         m_charInfo = m_htmlcharInfo;
559         // initialize namespaces
560
m_prefixMap = new NamespaceMappings();
561
562     }
563
564     /** The name of the current element. */
565 // private String m_currentElementName = null;
566

567     /**
568      * Receive notification of the beginning of a document.
569      *
570      * @throws org.xml.sax.SAXException Any SAX exception, possibly
571      * wrapping another exception.
572      *
573      * @throws org.xml.sax.SAXException
574      */

575     protected void startDocumentInternal() throws org.xml.sax.SAXException JavaDoc
576     {
577         super.startDocumentInternal();
578
579         m_needToCallStartDocument = false;
580         m_needToOutputDocTypeDecl = true;
581         m_startNewLine = false;
582         setOmitXMLDeclaration(true);
583
584         if (true == m_needToOutputDocTypeDecl)
585         {
586             String JavaDoc doctypeSystem = getDoctypeSystem();
587             String JavaDoc doctypePublic = getDoctypePublic();
588             if ((null != doctypeSystem) || (null != doctypePublic))
589             {
590                 final java.io.Writer JavaDoc writer = m_writer;
591                 try
592                 {
593                 writer.write("<!DOCTYPE HTML");
594
595                 if (null != doctypePublic)
596                 {
597                     writer.write(" PUBLIC \"");
598                     writer.write(doctypePublic);
599                     writer.write('"');
600                 }
601
602                 if (null != doctypeSystem)
603                 {
604                     if (null == doctypePublic)
605                         writer.write(" SYSTEM \"");
606                     else
607                         writer.write('"');
608
609                     writer.write(doctypeSystem);
610                     writer.write('"');
611                 }
612
613                 writer.write('>');
614                 outputLineSep();
615                 }
616                 catch(IOException JavaDoc e)
617                 {
618                     throw new SAXException JavaDoc(e);
619                 }
620             }
621         }
622
623         m_needToOutputDocTypeDecl = false;
624     }
625
626     /**
627      * Receive notification of the end of a document.
628      *
629      * @throws org.xml.sax.SAXException Any SAX exception, possibly
630      * wrapping another exception.
631      *
632      * @throws org.xml.sax.SAXException
633      */

634     public final void endDocument() throws org.xml.sax.SAXException JavaDoc
635     {
636         
637         flushPending();
638         if (m_doIndent && !m_isprevtext)
639         {
640             try
641             {
642             outputLineSep();
643             }
644             catch(IOException JavaDoc e)
645             {
646                 throw new SAXException JavaDoc(e);
647             }
648         }
649
650         flushWriter();
651         if (m_tracer != null)
652             super.fireEndDoc();
653     }
654
655     /**
656      * Receive notification of the beginning of an element.
657      *
658      *
659      * @param namespaceURI
660      * @param localName
661      * @param name The element type name.
662      * @param atts The attributes attached to the element, if any.
663      * @throws org.xml.sax.SAXException Any SAX exception, possibly
664      * wrapping another exception.
665      * @see #endElement
666      * @see org.xml.sax.AttributeList
667      */

668     public void startElement(
669         String JavaDoc namespaceURI,
670         String JavaDoc localName,
671         String JavaDoc name,
672         Attributes JavaDoc atts)
673         throws org.xml.sax.SAXException JavaDoc
674     {
675
676         ElemContext elemContext = m_elemContext;
677
678         // clean up any pending things first
679
if (elemContext.m_startTagOpen)
680         {
681             closeStartTag();
682             elemContext.m_startTagOpen = false;
683         }
684         else if (m_cdataTagOpen)
685         {
686             closeCDATA();
687             m_cdataTagOpen = false;
688         }
689         else if (m_needToCallStartDocument)
690         {
691             startDocumentInternal();
692             m_needToCallStartDocument = false;
693         }
694
695
696         // if this element has a namespace then treat it like XML
697
if (null != namespaceURI && namespaceURI.length() > 0)
698         {
699             super.startElement(namespaceURI, localName, name, atts);
700
701             return;
702         }
703         
704         try
705         {
706             ElemDesc elemDesc = getElemDesc(name);
707             int elemFlags = elemDesc.getFlags();
708
709             // deal with indentation issues first
710
if (m_doIndent)
711             {
712
713                 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
714                 if (m_ispreserve)
715                     m_ispreserve = false;
716                 else if (
717                     (null != elemContext.m_elementName)
718                     && (!m_inBlockElem
719                         || isBlockElement) /* && !isWhiteSpaceSensitive */
720                     )
721                 {
722                     m_startNewLine = true;
723
724                     indent();
725
726                 }
727                 m_inBlockElem = !isBlockElement;
728             }
729
730             // save any attributes for later processing
731
if (atts != null)
732                 addAttributes(atts);
733
734             m_isprevtext = false;
735             final java.io.Writer JavaDoc writer = m_writer;
736             writer.write('<');
737             writer.write(name);
738
739
740
741             if (m_tracer != null)
742                 firePseudoAttributes();
743             
744             if ((elemFlags & ElemDesc.EMPTY) != 0)
745             {
746                 // an optimization for elements which are expected
747
// to be empty.
748
m_elemContext = elemContext.push();
749                 /* XSLTC sometimes calls namespaceAfterStartElement()
750                  * so we need to remember the name
751                  */

752                 m_elemContext.m_elementName = name;
753                 m_elemContext.m_elementDesc = elemDesc;
754                 return;
755             }
756             else
757             {
758                 elemContext = elemContext.push(namespaceURI,localName,name);
759                 m_elemContext = elemContext;
760                 elemContext.m_elementDesc = elemDesc;
761                 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
762             }
763             
764
765             if ((elemFlags & ElemDesc.HEADELEM) != 0)
766             {
767                 // This is the <HEAD> element, do some special processing
768
closeStartTag();
769                 elemContext.m_startTagOpen = false;
770                 if (!m_omitMetaTag)
771                 {
772                     if (m_doIndent)
773                         indent();
774                     writer.write(
775                         "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
776                     String JavaDoc encoding = getEncoding();
777                     String JavaDoc encode = Encodings.getMimeEncoding(encoding);
778                     writer.write(encode);
779                     writer.write("\">");
780                 }
781             }
782         }
783         catch (IOException JavaDoc e)
784         {
785             throw new SAXException JavaDoc(e);
786         }
787     }
788
789     /**
790      * Receive notification of the end of an element.
791      *
792      *
793      * @param namespaceURI
794      * @param localName
795      * @param name The element type name
796      * @throws org.xml.sax.SAXException Any SAX exception, possibly
797      * wrapping another exception.
798      */

799     public final void endElement(
800         final String JavaDoc namespaceURI,
801         final String JavaDoc localName,
802         final String JavaDoc name)
803         throws org.xml.sax.SAXException JavaDoc
804     {
805         // deal with any pending issues
806
if (m_cdataTagOpen)
807             closeCDATA();
808
809         // if the element has a namespace, treat it like XML, not HTML
810
if (null != namespaceURI && namespaceURI.length() > 0)
811         {
812             super.endElement(namespaceURI, localName, name);
813
814             return;
815         }
816
817         try
818         {
819
820             ElemContext elemContext = m_elemContext;
821             final ElemDesc elemDesc = elemContext.m_elementDesc;
822             final int elemFlags = elemDesc.getFlags();
823             final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
824
825             // deal with any indentation issues
826
if (m_doIndent)
827             {
828                 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
829                 boolean shouldIndent = false;
830
831                 if (m_ispreserve)
832                 {
833                     m_ispreserve = false;
834                 }
835                 else if (m_doIndent && (!m_inBlockElem || isBlockElement))
836                 {
837                     m_startNewLine = true;
838                     shouldIndent = true;
839                 }
840                 if (!elemContext.m_startTagOpen && shouldIndent)
841                     indent(elemContext.m_currentElemDepth - 1);
842                 m_inBlockElem = !isBlockElement;
843             }
844
845             final java.io.Writer JavaDoc writer = m_writer;
846             if (!elemContext.m_startTagOpen)
847             {
848                 writer.write("</");
849                 writer.write(name);
850                 writer.write('>');
851             }
852             else
853             {
854                 // the start-tag open when this method was called,
855
// so we need to process it now.
856

857                 if (m_tracer != null)
858                     super.fireStartElem(name);
859
860                 // the starting tag was still open when we received this endElement() call
861
// so we need to process any gathered attributes NOW, before they go away.
862
int nAttrs = m_attributes.getLength();
863                 if (nAttrs > 0)
864                 {
865                     processAttributes(m_writer, nAttrs);
866                     // clear attributes object for re-use with next element
867
m_attributes.clear();
868                 }
869                 if (!elemEmpty)
870                 {
871                     // As per Dave/Paul recommendation 12/06/2000
872
// if (shouldIndent)
873
// writer.write('>');
874
// indent(m_currentIndent);
875

876                     writer.write("></");
877                     writer.write(name);
878                     writer.write('>');
879                 }
880                 else
881                 {
882                     writer.write('>');
883                 }
884             }
885             
886             // clean up because the element has ended
887
if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
888                 m_ispreserve = true;
889             m_isprevtext = false;
890
891             // fire off the end element event
892
if (m_tracer != null)
893                 super.fireEndElem(name);
894                            
895             // OPTIMIZE-EMPTY
896
if (elemEmpty)
897             {
898                 // a quick exit if the HTML element had no children.
899
// This block of code can be removed if the corresponding block of code
900
// in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
901
m_elemContext = elemContext.m_prev;
902                 return;
903             }
904
905             // some more clean because the element has ended.
906
if (!elemContext.m_startTagOpen)
907             {
908                 if (m_doIndent && !m_preserves.isEmpty())
909                     m_preserves.pop();
910             }
911             m_elemContext = elemContext.m_prev;
912 // m_isRawStack.pop();
913
}
914         catch (IOException JavaDoc e)
915         {
916             throw new SAXException JavaDoc(e);
917         }
918     }
919
920     /**
921      * Process an attribute.
922      * @param writer The writer to write the processed output to.
923      * @param name The name of the attribute.
924      * @param value The value of the attribute.
925      * @param elemDesc The description of the HTML element
926      * that has this attribute.
927      *
928      * @throws org.xml.sax.SAXException
929      */

930     protected void processAttribute(
931         java.io.Writer JavaDoc writer,
932         String JavaDoc name,
933         String JavaDoc value,
934         ElemDesc elemDesc)
935         throws IOException JavaDoc
936     {
937         writer.write(' ');
938
939         if ( ((value.length() == 0) || value.equalsIgnoreCase(name))
940             && elemDesc != null
941             && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
942         {
943             writer.write(name);
944         }
945         else
946         {
947             // %REVIEW% %OPT%
948
// Two calls to single-char write may NOT
949
// be more efficient than one to string-write...
950
writer.write(name);
951             writer.write("=\"");
952             if ( elemDesc != null
953                 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
954                 writeAttrURI(writer, value, m_specialEscapeURLs);
955             else
956                 writeAttrString(writer, value, this.getEncoding());
957             writer.write('"');
958
959         }
960     }
961
962     /**
963      * Tell if a character is an ASCII digit.
964      */

965     private boolean isASCIIDigit(char c)
966     {
967         return (c >= '0' && c <= '9');
968     }
969
970     /**
971      * Make an integer into an HH hex value.
972      * Does no checking on the size of the input, since this
973      * is only meant to be used locally by writeAttrURI.
974      *
975      * @param i must be a value less than 255.
976      *
977      * @return should be a two character string.
978      */

979     private static String JavaDoc makeHHString(int i)
980     {
981         String JavaDoc s = Integer.toHexString(i).toUpperCase();
982         if (s.length() == 1)
983         {
984             s = "0" + s;
985         }
986         return s;
987     }
988
989     /**
990     * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
991     * @param str must be 2 characters long
992     *
993     * @return true or false
994     */

995     private boolean isHHSign(String JavaDoc str)
996     {
997         boolean sign = true;
998         try
999         {
1000            char r = (char) Integer.parseInt(str, 16);
1001        }
1002        catch (NumberFormatException JavaDoc e)
1003        {
1004            sign = false;
1005        }
1006        return sign;
1007    }
1008
1009    /**
1010     * Write the specified <var>string</var> after substituting non ASCII characters,
1011     * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1012     *
1013     * @param string String to convert to XML format.
1014     * @param doURLEscaping True if we should try to encode as
1015     * per http://www.ietf.org/rfc/rfc2396.txt.
1016     *
1017     * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1018     */

1019    public void writeAttrURI(
1020        final java.io.Writer JavaDoc writer, String JavaDoc string, boolean doURLEscaping)
1021        throws IOException JavaDoc
1022    {
1023        // http://www.ietf.org/rfc/rfc2396.txt says:
1024
// A URI is always in an "escaped" form, since escaping or unescaping a
1025
// completed URI might change its semantics. Normally, the only time
1026
// escape encodings can safely be made is when the URI is being created
1027
// from its component parts; each component may have its own set of
1028
// characters that are reserved, so only the mechanism responsible for
1029
// generating or interpreting that component can determine whether or
1030
// not escaping a character will change its semantics. Likewise, a URI
1031
// must be separated into its components before the escaped characters
1032
// within those components can be safely decoded.
1033
//
1034
// ...So we do our best to do limited escaping of the URL, without
1035
// causing damage. If the URL is already properly escaped, in theory, this
1036
// function should not change the string value.
1037

1038        final int end = string.length();
1039        if (end > m_attrBuff.length)
1040        {
1041           m_attrBuff = new char[end*2 + 1];
1042        }
1043        string.getChars(0,end, m_attrBuff, 0);
1044        final char[] chars = m_attrBuff;
1045
1046        int cleanStart = 0;
1047        int cleanLength = 0;
1048        
1049        
1050        char ch = 0;
1051        for (int i = 0; i < end; i++)
1052        {
1053            ch = chars[i];
1054
1055            if ((ch < 32) || (ch > 126))
1056            {
1057                if (cleanLength > 0)
1058                {
1059                    writer.write(chars, cleanStart, cleanLength);
1060                    cleanLength = 0;
1061                }
1062                if (doURLEscaping)
1063                {
1064                    // Encode UTF16 to UTF8.
1065
// Reference is Unicode, A Primer, by Tony Graham.
1066
// Page 92.
1067

1068                    // Note that Kay doesn't escape 0x20...
1069
// if(ch == 0x20) // Not sure about this... -sb
1070
// {
1071
// writer.write(ch);
1072
// }
1073
// else
1074
if (ch <= 0x7F)
1075                    {
1076                        writer.write('%');
1077                        writer.write(makeHHString(ch));
1078                    }
1079                    else if (ch <= 0x7FF)
1080                    {
1081                        // Clear low 6 bits before rotate, put high 4 bits in low byte,
1082
// and set two high bits.
1083
int high = (ch >> 6) | 0xC0;
1084                        int low = (ch & 0x3F) | 0x80;
1085                        // First 6 bits, + high bit
1086
writer.write('%');
1087                        writer.write(makeHHString(high));
1088                        writer.write('%');
1089                        writer.write(makeHHString(low));
1090                    }
1091                    else if (isUTF16Surrogate(ch)) // high surrogate
1092
{
1093                        // I'm sure this can be done in 3 instructions, but I choose
1094
// to try and do it exactly like it is done in the book, at least
1095
// until we are sure this is totally clean. I don't think performance
1096
// is a big issue with this particular function, though I could be
1097
// wrong. Also, the stuff below clearly does more masking than
1098
// it needs to do.
1099

1100                        // Clear high 6 bits.
1101
int highSurrogate = ((int) ch) & 0x03FF;
1102
1103                        // Middle 4 bits (wwww) + 1
1104
// "Note that the value of wwww from the high surrogate bit pattern
1105
// is incremented to make the uuuuu bit pattern in the scalar value
1106
// so the surrogate pair don't address the BMP."
1107
int wwww = ((highSurrogate & 0x03C0) >> 6);
1108                        int uuuuu = wwww + 1;
1109
1110                        // next 4 bits
1111
int zzzz = (highSurrogate & 0x003C) >> 2;
1112
1113                        // low 2 bits
1114
int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1115
1116                        // Get low surrogate character.
1117
ch = chars[++i];
1118
1119                        // Clear high 6 bits.
1120
int lowSurrogate = ((int) ch) & 0x03FF;
1121
1122                        // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1123
yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1124
1125                        // bottom 6 bits.
1126
int xxxxxx = (lowSurrogate & 0x003F);
1127
1128                        int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1129
int byte2 =
1130                            0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1131                        int byte3 = 0x80 | yyyyyy;
1132                        int byte4 = 0x80 | xxxxxx;
1133
1134                        writer.write('%');
1135                        writer.write(makeHHString(byte1));
1136                        writer.write('%');
1137                        writer.write(makeHHString(byte2));
1138                        writer.write('%');
1139                        writer.write(makeHHString(byte3));
1140                        writer.write('%');
1141                        writer.write(makeHHString(byte4));
1142                    }
1143                    else
1144                    {
1145                        int high = (ch >> 12) | 0xE0; // top 4 bits
1146
int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1147                        // middle 6 bits
1148
int low = (ch & 0x3F) | 0x80;
1149                        // First 6 bits, + high bit
1150
writer.write('%');
1151                        writer.write(makeHHString(high));
1152                        writer.write('%');
1153                        writer.write(makeHHString(middle));
1154                        writer.write('%');
1155                        writer.write(makeHHString(low));
1156                    }
1157
1158                }
1159                else if (escapingNotNeeded(ch))
1160                {
1161                    writer.write(ch);
1162                }
1163                else
1164                {
1165                    writer.write("&#");
1166                    writer.write(Integer.toString(ch));
1167                    writer.write(';');
1168                }
1169                // In this character range we have first written out any previously accumulated
1170
// "clean" characters, then processed the current more complicated character,
1171
// which may have incremented "i".
1172
// We now we reset the next possible clean character.
1173
cleanStart = i + 1;
1174            }
1175            // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1176
// not allowing quotes in the URI proper syntax, nor in the fragment
1177
// identifier, we believe that it's OK to double escape quotes.
1178
else if (ch == '"')
1179            {
1180                // If the character is a '%' number number, try to avoid double-escaping.
1181
// There is a question if this is legal behavior.
1182

1183                // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1184
// The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1185

1186                // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1187

1188                // We are no longer escaping '%'
1189

1190                if (cleanLength > 0)
1191                {
1192                    writer.write(chars, cleanStart, cleanLength);
1193                    cleanLength = 0;
1194                }
1195                
1196                
1197                // Mike Kay encodes this as &#34;, so he may know something I don't?
1198
if (doURLEscaping)
1199                    writer.write("%22");
1200                else
1201                    writer.write("&quot;"); // we have to escape this, I guess.
1202

1203                // We have written out any clean characters, then the escaped '%' and now we
1204
// We now we reset the next possible clean character.
1205
cleanStart = i + 1;
1206            }
1207            else
1208            {
1209                // no processing for this character, just count how
1210
// many characters in a row that we have that need no processing
1211
cleanLength++;
1212            }
1213        }
1214        
1215        // are there any clean characters at the end of the array
1216
// that we haven't processed yet?
1217
if (cleanLength > 1)
1218        {
1219            // if the whole string can be written out as-is do so
1220
// otherwise write out the clean chars at the end of the
1221
// array
1222
if (cleanStart == 0)
1223                writer.write(string);
1224            else
1225                writer.write(chars, cleanStart, cleanLength);
1226        }
1227        else if (cleanLength == 1)
1228        {
1229            // a little optimization for 1 clean character
1230
// (we could have let the previous if(...) handle them all)
1231
writer.write(ch);
1232        }
1233    }
1234
1235    /**
1236     * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1237     * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1238     *
1239     * @param string String to convert to XML format.
1240     * @param encoding CURRENTLY NOT IMPLEMENTED.
1241     *
1242     * @throws org.xml.sax.SAXException
1243     */

1244    public void writeAttrString(
1245        final java.io.Writer JavaDoc writer, String JavaDoc string, String JavaDoc encoding)
1246        throws IOException JavaDoc
1247    {
1248        final int end = string.length();
1249        if (end > m_attrBuff.length)
1250        {
1251            m_attrBuff = new char[end * 2 + 1];
1252        }
1253        string.getChars(0, end, m_attrBuff, 0);
1254        final char[] chars = m_attrBuff;
1255
1256        
1257
1258        int cleanStart = 0;
1259        int cleanLength = 0;
1260
1261        char ch = 0;
1262        for (int i = 0; i < end; i++)
1263        {
1264            ch = chars[i];
1265
1266            // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1267
// System.out.println("ch: "+(int)ch);
1268
// System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1269
// System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1270
if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
1271            {
1272                cleanLength++;
1273            }
1274            else if ('<' == ch || '>' == ch)
1275            {
1276                cleanLength++; // no escaping in this case, as specified in 15.2
1277
}
1278            else if (
1279                ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1280            {
1281                cleanLength++; // no escaping in this case, as specified in 15.2
1282
}
1283            else
1284            {
1285                if (cleanLength > 0)
1286                {
1287                    writer.write(chars,cleanStart,cleanLength);
1288                    cleanLength = 0;
1289                }
1290                int pos = accumDefaultEntity(writer, ch, i, chars, end, false, false);
1291
1292                if (i != pos)
1293                {
1294                    i = pos - 1;
1295                }
1296                else
1297                {
1298                    if (isUTF16Surrogate(ch))
1299                    {
1300 
1301                            writeUTF16Surrogate(ch, chars, i, end);
1302                            i++; // two input characters processed
1303
// this increments by one and the for()
1304
// loop itself increments by another one.
1305
}
1306
1307                    // The next is kind of a hack to keep from escaping in the case
1308
// of Shift_JIS and the like.
1309

1310                    /*
1311                    else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1312                    && (ch != 160))
1313                    {
1314                    writer.write(ch); // no escaping in this case
1315                    }
1316                    else
1317                    */

1318                    String JavaDoc entityName = m_charInfo.getEntityNameForChar(ch);
1319                    if (null != entityName)
1320                    {
1321                        writer.write('&');
1322                        writer.write(entityName);
1323                        writer.write(';');
1324                    }
1325                    else if (escapingNotNeeded(ch))
1326                    {
1327                        writer.write(ch); // no escaping in this case
1328
}
1329                    else
1330                    {
1331                        writer.write("&#");
1332                        writer.write(Integer.toString(ch));
1333                        writer.write(';');
1334                    }
1335                }
1336                cleanStart = i + 1;
1337            }
1338        } // end of for()
1339

1340        // are there any clean characters at the end of the array
1341
// that we haven't processed yet?
1342
if (cleanLength > 1)
1343        {
1344            // if the whole string can be written out as-is do so
1345
// otherwise write out the clean chars at the end of the
1346
// array
1347
if (cleanStart == 0)
1348                writer.write(string);
1349            else
1350                writer.write(chars, cleanStart, cleanLength);
1351        }
1352        else if (cleanLength == 1)
1353        {
1354            // a little optimization for 1 clean character
1355
// (we could have let the previous if(...) handle them all)
1356
writer.write(ch);
1357        }
1358    }
1359
1360
1361
1362    /**
1363     * Receive notification of character data.
1364     *
1365     * <p>The Parser will call this method to report each chunk of
1366     * character data. SAX parsers may return all contiguous character
1367     * data in a single chunk, or they may split it into several
1368     * chunks; however, all of the characters in any single event
1369     * must come from the same external entity, so that the Locator
1370     * provides useful information.</p>
1371     *
1372     * <p>The application must not attempt to read from the array
1373     * outside of the specified range.</p>
1374     *
1375     * <p>Note that some parsers will report whitespace using the
1376     * ignorableWhitespace() method rather than this one (validating
1377     * parsers must do so).</p>
1378     *
1379     * @param chars The characters from the XML document.
1380     * @param start The start position in the array.
1381     * @param length The number of characters to read from the array.
1382     * @throws org.xml.sax.SAXException Any SAX exception, possibly
1383     * wrapping another exception.
1384     * @see #ignorableWhitespace
1385     * @see org.xml.sax.Locator
1386     *
1387     * @throws org.xml.sax.SAXException
1388     */

1389    public final void characters(char chars[], int start, int length)
1390        throws org.xml.sax.SAXException JavaDoc
1391    {
1392
1393        if (m_elemContext.m_isRaw)
1394        {
1395            try
1396            {
1397                if (m_elemContext.m_startTagOpen)
1398                {
1399                    closeStartTag();
1400                    m_elemContext.m_startTagOpen = false;
1401                }
1402                m_ispreserve = true;
1403                
1404// With m_ispreserve just set true it looks like shouldIndent()
1405
// will always return false, so drop any possible indentation.
1406
// if (shouldIndent())
1407
// indent();
1408

1409                // writer.write("<![CDATA[");
1410
// writer.write(chars, start, length);
1411
writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1412
1413                // writer.write("]]>");
1414

1415                // time to generate characters event
1416
if (m_tracer != null)
1417                    super.fireCharEvent(chars, start, length);
1418                
1419                return;
1420            }
1421            catch (IOException JavaDoc ioe)
1422            {
1423                throw new org.xml.sax.SAXException JavaDoc(
1424                    XMLMessages.createXMLMessage(
1425                        XMLErrorResources.ER_OIERROR,
1426                        null),
1427                    ioe);
1428                //"IO error", ioe);
1429
}
1430        }
1431        else
1432        {
1433            super.characters(chars, start, length);
1434        }
1435    }
1436
1437    /**
1438     * Receive notification of cdata.
1439     *
1440     * <p>The Parser will call this method to report each chunk of
1441     * character data. SAX parsers may return all contiguous character
1442     * data in a single chunk, or they may split it into several
1443     * chunks; however, all of the characters in any single event
1444     * must come from the same external entity, so that the Locator
1445     * provides useful information.</p>
1446     *
1447     * <p>The application must not attempt to read from the array
1448     * outside of the specified range.</p>
1449     *
1450     * <p>Note that some parsers will report whitespace using the
1451     * ignorableWhitespace() method rather than this one (validating
1452     * parsers must do so).</p>
1453     *
1454     * @param ch The characters from the XML document.
1455     * @param start The start position in the array.
1456     * @param length The number of characters to read from the array.
1457     * @throws org.xml.sax.SAXException Any SAX exception, possibly
1458     * wrapping another exception.
1459     * @see #ignorableWhitespace
1460     * @see org.xml.sax.Locator
1461     *
1462     * @throws org.xml.sax.SAXException
1463     */

1464    public final void cdata(char ch[], int start, int length)
1465        throws org.xml.sax.SAXException JavaDoc
1466    {
1467
1468        if ((null != m_elemContext.m_elementName)
1469            && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1470                || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1471        {
1472            try
1473            {
1474                if (m_elemContext.m_startTagOpen)
1475                {
1476                    closeStartTag();
1477                    m_elemContext.m_startTagOpen = false;
1478                }
1479
1480                m_ispreserve = true;
1481
1482                if (shouldIndent())
1483                    indent();
1484
1485                // writer.write(ch, start, length);
1486
writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1487            }
1488            catch (IOException JavaDoc ioe)
1489            {
1490                throw new org.xml.sax.SAXException JavaDoc(
1491                    XMLMessages.createXMLMessage(
1492                        XMLErrorResources.ER_OIERROR,
1493                        null),
1494                    ioe);
1495                //"IO error", ioe);
1496
}
1497        }
1498        else
1499        {
1500            super.cdata(ch, start, length);
1501        }
1502    }
1503
1504    /**
1505     * Receive notification of a processing instruction.
1506     *
1507     * @param target The processing instruction target.
1508     * @param data The processing instruction data, or null if
1509     * none was supplied.
1510     * @throws org.xml.sax.SAXException Any SAX exception, possibly
1511     * wrapping another exception.
1512     *
1513     * @throws org.xml.sax.SAXException
1514     */

1515    public void processingInstruction(String JavaDoc target, String JavaDoc data)
1516        throws org.xml.sax.SAXException JavaDoc
1517    {
1518
1519        // Process any pending starDocument and startElement first.
1520
flushPending();
1521        
1522        // Use a fairly nasty hack to tell if the next node is supposed to be
1523
// unescaped text.
1524
if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1525        {
1526            startNonEscaping();
1527        }
1528        else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1529        {
1530            endNonEscaping();
1531        }
1532        else
1533        {
1534            try
1535            {
1536            if (m_elemContext.m_startTagOpen)
1537            {
1538                closeStartTag();
1539                m_elemContext.m_startTagOpen = false;
1540            }
1541            else if (m_needToCallStartDocument)
1542                startDocumentInternal();
1543
1544            if (shouldIndent())
1545                indent();
1546
1547            final java.io.Writer JavaDoc writer = m_writer;
1548            //writer.write("<?" + target);
1549
writer.write("<?");
1550            writer.write(target);
1551
1552            if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1553                writer.write(' ');
1554
1555            //writer.write(data + ">"); // different from XML
1556
writer.write(data); // different from XML
1557
writer.write('>'); // different from XML
1558

1559            // Always output a newline char if not inside of an
1560
// element. The whitespace is not significant in that
1561
// case.
1562
if (m_elemContext.m_currentElemDepth <= 0)
1563                outputLineSep();
1564
1565            m_startNewLine = true;
1566            }
1567            catch(IOException JavaDoc e)
1568            {
1569                throw new SAXException JavaDoc(e);
1570            }
1571        }
1572               
1573        // now generate the PI event
1574
if (m_tracer != null)
1575            super.fireEscapingEvent(target, data);
1576     }
1577
1578    /**
1579     * Receive notivication of a entityReference.
1580     *
1581     * @param name non-null reference to entity name string.
1582     *
1583     * @throws org.xml.sax.SAXException
1584     */

1585    public final void entityReference(String JavaDoc name)
1586        throws org.xml.sax.SAXException JavaDoc
1587    {
1588        try
1589        {
1590
1591        final java.io.Writer JavaDoc writer = m_writer;
1592        writer.write('&');
1593        writer.write(name);
1594        writer.write(';');
1595        
1596        } catch(IOException JavaDoc e)
1597        {
1598            throw new SAXException JavaDoc(e);
1599        }
1600    }
1601    /**
1602     * @see org.apache.xml.serializer.ExtendedContentHandler#endElement(String)
1603     */

1604    public final void endElement(String JavaDoc elemName) throws SAXException JavaDoc
1605    {
1606        endElement(null, null, elemName);
1607    }
1608
1609    /**
1610     * Process the attributes, which means to write out the currently
1611     * collected attributes to the writer. The attributes are not
1612     * cleared by this method
1613     *
1614     * @param writer the writer to write processed attributes to.
1615     * @param nAttrs the number of attributes in m_attributes
1616     * to be processed
1617     *
1618     * @throws org.xml.sax.SAXException
1619     */

1620    public void processAttributes(java.io.Writer JavaDoc writer, int nAttrs)
1621        throws IOException JavaDoc,SAXException JavaDoc
1622    {
1623            /*
1624             * process the collected attributes
1625             */

1626            for (int i = 0; i < nAttrs; i++)
1627            {
1628                processAttribute(
1629                    writer,
1630                    m_attributes.getQName(i),
1631                    m_attributes.getValue(i),
1632                    m_elemContext.m_elementDesc);
1633            }
1634    }
1635
1636    /**
1637     * For the enclosing elements starting tag write out out any attributes
1638     * followed by ">"
1639     *
1640     *@throws org.xml.sax.SAXException
1641     */

1642    protected void closeStartTag() throws SAXException JavaDoc
1643    {
1644            try
1645            {
1646
1647            // finish processing attributes, time to fire off the start element event
1648
if (m_tracer != null)
1649                super.fireStartElem(m_elemContext.m_elementName);
1650            
1651            int nAttrs = m_attributes.getLength();
1652            if (nAttrs>0)
1653            {
1654                processAttributes(m_writer, nAttrs);
1655                // clear attributes object for re-use with next element
1656
m_attributes.clear();
1657            }
1658
1659            m_writer.write('>');
1660
1661            /* whether Xalan or XSLTC, we have the prefix mappings now, so
1662             * lets determine if the current element is specified in the cdata-
1663             * section-elements list.
1664             */

1665            if (m_cdataSectionElements != null)
1666                m_elemContext.m_isCdataSection = isCdataSection();
1667            if (m_doIndent)
1668            {
1669                m_isprevtext = false;
1670                m_preserves.push(m_ispreserve);
1671            }
1672            
1673            }
1674            catch(IOException JavaDoc e)
1675            {
1676                throw new SAXException JavaDoc(e);
1677            }
1678    }
1679    /**
1680     * Initialize the serializer with the specified output stream and output
1681     * format. Must be called before calling any of the serialize methods.
1682     *
1683     * @param output The output stream to use
1684     * @param format The output format
1685     * @throws UnsupportedEncodingException The encoding specified in the
1686     * output format is not supported
1687     */

1688    protected synchronized void init(OutputStream JavaDoc output, Properties JavaDoc format)
1689        throws UnsupportedEncodingException JavaDoc
1690    {
1691        if (null == format)
1692        {
1693            format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML);
1694         }
1695        super.init(output,format, false);
1696    }
1697    
1698        /**
1699         * Specifies an output stream to which the document should be
1700         * serialized. This method should not be called while the
1701         * serializer is in the process of serializing a document.
1702         * <p>
1703         * The encoding specified in the output properties is used, or
1704         * if no encoding was specified, the default for the selected
1705         * output method.
1706         *
1707         * @param output The output stream
1708         */

1709        public void setOutputStream(OutputStream JavaDoc output)
1710        {
1711
1712            try
1713            {
1714                Properties JavaDoc format;
1715                if (null == m_format)
1716                    format = OutputPropertiesFactory.getDefaultMethodProperties(Method.HTML);
1717                else
1718                    format = m_format;
1719                init(output, format, true);
1720            }
1721            catch (UnsupportedEncodingException JavaDoc uee)
1722            {
1723
1724                // Should have been warned in init, I guess...
1725
}
1726        }
1727        /**
1728         * This method is used when a prefix/uri namespace mapping
1729         * is indicated after the element was started with a
1730         * startElement() and before and endElement().
1731         * startPrefixMapping(prefix,uri) would be used before the
1732         * startElement() call.
1733         * @param uri the URI of the namespace
1734         * @param prefix the prefix associated with the given URI.
1735         *
1736         * @see org.apache.xml.serializer.ExtendedContentHandler#namespaceAfterStartElement(String, String)
1737         */

1738        public void namespaceAfterStartElement(String JavaDoc prefix, String JavaDoc uri)
1739            throws SAXException JavaDoc
1740        {
1741            // hack for XSLTC with finding URI for default namespace
1742
if (m_elemContext.m_elementURI == null)
1743            {
1744                String JavaDoc prefix1 = getPrefixPart(m_elemContext.m_elementName);
1745                if (prefix1 == null && EMPTYSTRING.equals(prefix))
1746                {
1747                    // the elements URI is not known yet, and it
1748
// doesn't have a prefix, and we are currently
1749
// setting the uri for prefix "", so we have
1750
// the uri for the element... lets remember it
1751
m_elemContext.m_elementURI = uri;
1752                }
1753            }
1754            startPrefixMapping(prefix,uri,false);
1755        }
1756
1757    public void startDTD(String JavaDoc name, String JavaDoc publicId, String JavaDoc systemId)
1758        throws SAXException JavaDoc
1759    {
1760        m_inDTD = true;
1761        super.startDTD(name, publicId, systemId);
1762    }
1763
1764    /**
1765     * Report the end of DTD declarations.
1766     * @throws org.xml.sax.SAXException The application may raise an exception.
1767     * @see #startDTD
1768     */

1769    public void endDTD() throws org.xml.sax.SAXException JavaDoc
1770    {
1771        m_inDTD = false;
1772        /* for ToHTMLStream the DOCTYPE is entirely output in the
1773         * startDocumentInternal() method, so don't do anything here
1774         */

1775    }
1776    /**
1777     * This method does nothing.
1778     */

1779    public void attributeDecl(
1780        String JavaDoc eName,
1781        String JavaDoc aName,
1782        String JavaDoc type,
1783        String JavaDoc valueDefault,
1784        String JavaDoc value)
1785        throws SAXException JavaDoc
1786    {
1787        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1788
}
1789
1790    /**
1791     * This method does nothing.
1792     */

1793    public void elementDecl(String JavaDoc name, String JavaDoc model) throws SAXException JavaDoc
1794    {
1795        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1796
}
1797    /**
1798     * This method does nothing.
1799     */

1800    public void internalEntityDecl(String JavaDoc name, String JavaDoc value)
1801        throws SAXException JavaDoc
1802    {
1803        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1804
}
1805    /**
1806     * This method does nothing.
1807     */

1808    public void externalEntityDecl(
1809        String JavaDoc name,
1810        String JavaDoc publicId,
1811        String JavaDoc systemId)
1812        throws SAXException JavaDoc
1813    {
1814        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1815
}
1816
1817    /**
1818     * This method is used to add an attribute to the currently open element.
1819     * The caller has guaranted that this attribute is unique, which means that it
1820     * not been seen before and will not be seen again.
1821     *
1822     * @param name the qualified name of the attribute
1823     * @param value the value of the attribute which can contain only
1824     * ASCII printable characters characters in the range 32 to 127 inclusive.
1825     * @param flags the bit values of this integer give optimization information.
1826     */

1827    public void addUniqueAttribute(String JavaDoc name, String JavaDoc value, int flags)
1828        throws SAXException JavaDoc
1829    {
1830        try
1831        {
1832            final java.io.Writer JavaDoc writer = m_writer;
1833            if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1834            {
1835                // "flags" has indicated that the characters
1836
// '>' '<' '&' and '"' are not in the value and
1837
// m_htmlcharInfo has recorded that there are no other
1838
// entities in the range 0 to 127 so we write out the
1839
// value directly
1840
writer.write(' ');
1841                writer.write(name);
1842                writer.write("=\"");
1843                writer.write(value);
1844                writer.write('"');
1845            }
1846            else if (
1847                (flags & HTML_ATTREMPTY) > 0
1848                    && (value.length() == 0 || value.equalsIgnoreCase(name)))
1849            {
1850                writer.write(' ');
1851                writer.write(name);
1852            }
1853            else
1854            {
1855                writer.write(' ');
1856                writer.write(name);
1857                writer.write("=\"");
1858                if ((flags & HTML_ATTRURL) > 0)
1859                {
1860                    writeAttrURI(writer, value, m_specialEscapeURLs);
1861                }
1862                else
1863                {
1864                    writeAttrString(writer, value, this.getEncoding());
1865                }
1866                writer.write('"');
1867            }
1868        } catch (IOException JavaDoc e) {
1869            throw new SAXException JavaDoc(e);
1870        }
1871    }
1872
1873    public void comment(char ch[], int start, int length)
1874            throws SAXException JavaDoc
1875    {
1876        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1877
if (m_inDTD)
1878            return;
1879        super.comment(ch, start, length);
1880    }
1881    
1882    public boolean reset()
1883    {
1884        boolean ret = super.reset();
1885        if (!ret)
1886            return false;
1887        initToHTMLStream();
1888        return true;
1889    }
1890    
1891    private void initToHTMLStream()
1892    {
1893// m_elementDesc = null;
1894
m_inBlockElem = false;
1895        m_inDTD = false;
1896// m_isRawStack.clear();
1897
m_omitMetaTag = false;
1898        m_specialEscapeURLs = true;
1899    }
1900}
1901
Popular Tags