KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xml > internal > serialize > HTMLdtd


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58
59 // Aug 21, 2000:
60
// Fixed bug in isElement and made HTMLdtd public.
61
// Contributed by Eric SCHAEFFER" <eschaeffer@posterconseil.com>
62

63
64 package com.sun.org.apache.xml.internal.serialize;
65
66 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
67
68 import java.io.InputStream JavaDoc;
69 import java.io.InputStreamReader JavaDoc;
70 import java.io.BufferedReader JavaDoc;
71 import java.util.Hashtable JavaDoc;
72 import java.util.Locale JavaDoc;
73
74
75 /**
76  * Utility class for accessing information specific to HTML documents.
77  * The HTML DTD is expressed as three utility function groups. Two methods
78  * allow for checking whether an element requires an open tag on printing
79  * ({@link #isEmptyTag}) or on parsing ({@link #isOptionalClosing}).
80  * <P>
81  * Two other methods translate character references from name to value and
82  * from value to name. A small entities resource is loaded into memory the
83  * first time any of these methods is called for fast and efficient access.
84  *
85  *
86  * @version $Revision: 1.17 $ $Date: 2004/02/10 17:25:26 $
87  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
88  */

89 public final class HTMLdtd
90 {
91
92     /**
93      * Public identifier for HTML 4.01 (Strict) document type.
94      */

95     public static final String JavaDoc HTMLPublicId = "-//W3C//DTD HTML 4.01//EN";
96
97     /**
98      * System identifier for HTML 4.01 (Strict) document type.
99      */

100     public static final String JavaDoc HTMLSystemId =
101         "http://www.w3.org/TR/html4/strict.dtd";
102
103     /**
104      * Public identifier for XHTML 1.0 (Strict) document type.
105      */

106     public static final String JavaDoc XHTMLPublicId =
107         "-//W3C//DTD XHTML 1.0 Strict//EN";
108
109     /**
110      * System identifier for XHTML 1.0 (Strict) document type.
111      */

112     public static final String JavaDoc XHTMLSystemId =
113         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
114
115     /**
116      * Table of reverse character reference mapping. Character codes are held
117      * as single-character strings, mapped to their reference name.
118      */

119     private static Hashtable JavaDoc _byChar;
120
121
122     /**
123      * Table of entity name to value mapping. Entities are held as strings,
124      * character references as <TT>Character</TT> objects.
125      */

126     private static Hashtable JavaDoc _byName;
127
128
129     private static Hashtable JavaDoc _boolAttrs;
130
131
132     /**
133      * Holds element definitions.
134      */

135     private static Hashtable JavaDoc _elemDefs;
136
137
138     /**
139      * Locates the HTML entities file that is loaded upon initialization.
140      * This file is a resource loaded with the default class loader.
141      */

142     private static final String JavaDoc ENTITIES_RESOURCE = "HTMLEntities.res";
143
144
145     /**
146      * Only opening tag should be printed.
147      */

148     private static final int ONLY_OPENING = 0x0001;
149
150     /**
151      * Element contains element content only.
152      */

153     private static final int ELEM_CONTENT = 0x0002;
154
155
156     /**
157      * Element preserve spaces.
158      */

159     private static final int PRESERVE = 0x0004;
160
161
162     /**
163      * Optional closing tag.
164      */

165     private static final int OPT_CLOSING = 0x0008;
166
167
168     /**
169      * Element is empty (also means only opening tag)
170      */

171     private static final int EMPTY = 0x0010 | ONLY_OPENING;
172
173
174     /**
175      * Allowed to appear in head.
176      */

177     private static final int ALLOWED_HEAD = 0x0020;
178
179
180     /**
181      * When opened, closes P.
182      */

183     private static final int CLOSE_P = 0x0040;
184
185
186     /**
187      * When opened, closes DD or DT.
188      */

189     private static final int CLOSE_DD_DT = 0x0080;
190
191
192     /**
193      * When opened, closes itself.
194      */

195     private static final int CLOSE_SELF = 0x0100;
196
197
198     /**
199      * When opened, closes another table section.
200      */

201     private static final int CLOSE_TABLE = 0x0200;
202
203
204     /**
205      * When opened, closes TH or TD.
206      */

207     private static final int CLOSE_TH_TD = 0x04000;
208
209
210     /**
211      * Returns true if element is declared to be empty. HTML elements are
212      * defines as empty in the DTD, not by the document syntax.
213      *
214      * @param tagName The element tag name (upper case)
215      * @return True if element is empty
216      */

217     public static boolean isEmptyTag( String JavaDoc tagName )
218     {
219         return isElement( tagName, EMPTY );
220     }
221
222
223     /**
224      * Returns true if element is declared to have element content.
225      * Whitespaces appearing inside element content will be ignored,
226      * other text will simply report an error.
227      *
228      * @param tagName The element tag name (upper case)
229      * @return True if element content
230      */

231     public static boolean isElementContent( String JavaDoc tagName )
232     {
233         return isElement( tagName, ELEM_CONTENT );
234     }
235
236
237     /**
238      * Returns true if element's textual contents preserves spaces.
239      * This only applies to PRE and TEXTAREA, all other HTML elements
240      * do not preserve space.
241      *
242      * @param tagName The element tag name (upper case)
243      * @return True if element's text content preserves spaces
244      */

245     public static boolean isPreserveSpace( String JavaDoc tagName )
246     {
247         return isElement( tagName, PRESERVE );
248     }
249
250
251     /**
252      * Returns true if element's closing tag is optional and need not
253      * exist. An error will not be reported for such elements if they
254      * are not closed. For example, <tt>LI</tt> is most often not closed.
255      *
256      * @param tagName The element tag name (upper case)
257      * @return True if closing tag implied
258      */

259     public static boolean isOptionalClosing( String JavaDoc tagName )
260     {
261         return isElement( tagName, OPT_CLOSING );
262     }
263
264
265     /**
266      * Returns true if element's closing tag is generally not printed.
267      * For example, <tt>LI</tt> should not print the closing tag.
268      *
269      * @param tagName The element tag name (upper case)
270      * @return True if only opening tag should be printed
271      */

272     public static boolean isOnlyOpening( String JavaDoc tagName )
273     {
274         return isElement( tagName, ONLY_OPENING );
275     }
276
277
278     /**
279      * Returns true if the opening of one element (<tt>tagName</tt>) implies
280      * the closing of another open element (<tt>openTag</tt>). For example,
281      * every opening <tt>LI</tt> will close the previously open <tt>LI</tt>,
282      * and every opening <tt>BODY</tt> will close the previously open <tt>HEAD</tt>.
283      *
284      * @param tagName The newly opened element
285      * @param openTag The already opened element
286      * @return True if closing tag closes opening tag
287      */

288     public static boolean isClosing( String JavaDoc tagName, String JavaDoc openTag )
289     {
290         // Several elements are defined as closing the HEAD
291
if ( openTag.equalsIgnoreCase( "HEAD" ) )
292             return ! isElement( tagName, ALLOWED_HEAD );
293         // P closes iteself
294
if ( openTag.equalsIgnoreCase( "P" ) )
295             return isElement( tagName, CLOSE_P );
296         // DT closes DD, DD closes DT
297
if ( openTag.equalsIgnoreCase( "DT" ) || openTag.equalsIgnoreCase( "DD" ) )
298             return isElement( tagName, CLOSE_DD_DT );
299         // LI and OPTION close themselves
300
if ( openTag.equalsIgnoreCase( "LI" ) || openTag.equalsIgnoreCase( "OPTION" ) )
301             return isElement( tagName, CLOSE_SELF );
302         // Each of these table sections closes all the others
303
if ( openTag.equalsIgnoreCase( "THEAD" ) || openTag.equalsIgnoreCase( "TFOOT" ) ||
304              openTag.equalsIgnoreCase( "TBODY" ) || openTag.equalsIgnoreCase( "TR" ) ||
305              openTag.equalsIgnoreCase( "COLGROUP" ) )
306             return isElement( tagName, CLOSE_TABLE );
307         // TD closes TH and TH closes TD
308
if ( openTag.equalsIgnoreCase( "TH" ) || openTag.equalsIgnoreCase( "TD" ) )
309             return isElement( tagName, CLOSE_TH_TD );
310         return false;
311     }
312
313
314     /**
315      * Returns true if the specified attribute it a URI and should be
316      * escaped appropriately. In HTML URIs are escaped differently
317      * than normal attributes.
318      *
319      * @param tagName The element's tag name
320      * @param attrName The attribute's name
321      */

322     public static boolean isURI( String JavaDoc tagName, String JavaDoc attrName )
323     {
324         // Stupid checks.
325
return ( attrName.equalsIgnoreCase( "href" ) || attrName.equalsIgnoreCase( "src" ) );
326     }
327
328
329     /**
330      * Returns true if the specified attribute is a boolean and should be
331      * printed without the value. This applies to attributes that are true
332      * if they exist, such as selected (OPTION/INPUT).
333      *
334      * @param tagName The element's tag name
335      * @param attrName The attribute's name
336      */

337     public static boolean isBoolean( String JavaDoc tagName, String JavaDoc attrName )
338     {
339         String JavaDoc[] attrNames;
340
341         attrNames = (String JavaDoc[]) _boolAttrs.get( tagName.toUpperCase(Locale.ENGLISH) );
342         if ( attrNames == null )
343             return false;
344         for ( int i = 0 ; i < attrNames.length ; ++i )
345             if ( attrNames[ i ].equalsIgnoreCase( attrName ) )
346                 return true;
347         return false;
348     }
349
350
351     /**
352      * Returns the value of an HTML character reference by its name. If the
353      * reference is not found or was not defined as a character reference,
354      * returns EOF (-1).
355      *
356      * @param name Name of character reference
357      * @return Character code or EOF (-1)
358      */

359     public static int charFromName( String JavaDoc name )
360     {
361         Object JavaDoc value;
362
363         initialize();
364         value = _byName.get( name );
365         if ( value != null && value instanceof Integer JavaDoc )
366             return ( (Integer JavaDoc) value ).intValue();
367         else
368             return -1;
369     }
370
371
372     /**
373      * Returns the name of an HTML character reference based on its character
374      * value. Only valid for entities defined from character references. If no
375      * such character value was defined, return null.
376      *
377      * @param value Character value of entity
378      * @return Entity's name or null
379      */

380     public static String JavaDoc fromChar(int value )
381     {
382        if (value > 0xffff)
383             return null;
384
385         String JavaDoc name;
386
387         initialize();
388         name = (String JavaDoc) _byChar.get( new Integer JavaDoc( value ) );
389         return name;
390     }
391
392
393     /**
394      * Initialize upon first access. Will load all the HTML character references
395      * into a list that is accessible by name or character value and is optimized
396      * for character substitution. This method may be called any number of times
397      * but will execute only once.
398      */

399     private static void initialize()
400     {
401         InputStream JavaDoc is = null;
402         BufferedReader JavaDoc reader = null;
403         int index;
404         String JavaDoc name;
405         String JavaDoc value;
406         int code;
407         String JavaDoc line;
408
409         // Make sure not to initialize twice.
410
if ( _byName != null )
411             return;
412         try {
413             _byName = new Hashtable JavaDoc();
414             _byChar = new Hashtable JavaDoc();
415             is = HTMLdtd.class.getResourceAsStream( ENTITIES_RESOURCE );
416             if ( is == null ) {
417                 throw new RuntimeException JavaDoc(
418                     DOMMessageFormatter.formatMessage(
419                     DOMMessageFormatter.SERIALIZER_DOMAIN,
420                     "ResourceNotFound", new Object JavaDoc[] {ENTITIES_RESOURCE}));
421             }
422             reader = new BufferedReader JavaDoc( new InputStreamReader JavaDoc( is, "ASCII" ) );
423             line = reader.readLine();
424             while ( line != null ) {
425                 if ( line.length() == 0 || line.charAt( 0 ) == '#' ) {
426                     line = reader.readLine();
427                     continue;
428                 }
429                 index = line.indexOf( ' ' );
430                 if ( index > 1 ) {
431                     name = line.substring( 0, index );
432                     ++index;
433                     if ( index < line.length() ) {
434                         value = line.substring( index );
435                         index = value.indexOf( ' ' );
436                         if ( index > 0 )
437                             value = value.substring( 0, index );
438                         code = Integer.parseInt( value );
439                                         defineEntity( name, (char) code );
440                     }
441                 }
442                 line = reader.readLine();
443             }
444             is.close();
445         } catch ( Exception JavaDoc except ) {
446             throw new RuntimeException JavaDoc(
447                 DOMMessageFormatter.formatMessage(
448                 DOMMessageFormatter.SERIALIZER_DOMAIN,
449                 "ResourceNotLoaded", new Object JavaDoc[] {ENTITIES_RESOURCE, except.toString()}));
450         } finally {
451             if ( is != null ) {
452                 try {
453                     is.close();
454                 } catch ( Exception JavaDoc except ) { }
455             }
456         }
457     }
458
459
460     /**
461      * Defines a new character reference. The reference's name and value are
462      * supplied. Nothing happens if the character reference is already defined.
463      * <P>
464      * Unlike internal entities, character references are a string to single
465      * character mapping. They are used to map non-ASCII characters both on
466      * parsing and printing, primarily for HTML documents. '&lt;amp;' is an
467      * example of a character reference.
468      *
469      * @param name The entity's name
470      * @param value The entity's value
471      */

472     private static void defineEntity( String JavaDoc name, char value )
473     {
474         if ( _byName.get( name ) == null ) {
475             _byName.put( name, new Integer JavaDoc( value ) );
476             _byChar.put( new Integer JavaDoc( value ), name );
477         }
478     }
479
480
481     private static void defineElement( String JavaDoc name, int flags )
482     {
483         _elemDefs.put( name, new Integer JavaDoc( flags ) );
484     }
485
486
487     private static void defineBoolean( String JavaDoc tagName, String JavaDoc attrName )
488     {
489         defineBoolean( tagName, new String JavaDoc[] { attrName } );
490     }
491
492
493     private static void defineBoolean( String JavaDoc tagName, String JavaDoc[] attrNames )
494     {
495         _boolAttrs.put( tagName, attrNames );
496     }
497
498
499     private static boolean isElement( String JavaDoc name, int flag )
500     {
501         Integer JavaDoc flags;
502
503         flags = (Integer JavaDoc) _elemDefs.get( name.toUpperCase(Locale.ENGLISH) );
504         if ( flags == null )
505             return false;
506         else
507             return ( ( flags.intValue() & flag ) == flag );
508     }
509
510
511     static
512     {
513         _elemDefs = new Hashtable JavaDoc();
514         defineElement( "ADDRESS", CLOSE_P );
515         defineElement( "AREA", EMPTY );
516         defineElement( "BASE", EMPTY | ALLOWED_HEAD );
517         defineElement( "BASEFONT", EMPTY );
518         defineElement( "BLOCKQUOTE", CLOSE_P );
519         defineElement( "BODY", OPT_CLOSING );
520         defineElement( "BR", EMPTY );
521         defineElement( "COL", EMPTY );
522         defineElement( "COLGROUP", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
523         defineElement( "DD", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
524         defineElement( "DIV", CLOSE_P );
525         defineElement( "DL", ELEM_CONTENT | CLOSE_P );
526         defineElement( "DT", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
527         defineElement( "FIELDSET", CLOSE_P );
528         defineElement( "FORM", CLOSE_P );
529         defineElement( "FRAME", EMPTY | OPT_CLOSING );
530         defineElement( "H1", CLOSE_P );
531         defineElement( "H2", CLOSE_P );
532         defineElement( "H3", CLOSE_P );
533         defineElement( "H4", CLOSE_P );
534         defineElement( "H5", CLOSE_P );
535         defineElement( "H6", CLOSE_P );
536         defineElement( "HEAD", ELEM_CONTENT | OPT_CLOSING );
537         defineElement( "HR", EMPTY | CLOSE_P );
538         defineElement( "HTML", ELEM_CONTENT | OPT_CLOSING );
539         defineElement( "IMG", EMPTY );
540         defineElement( "INPUT", EMPTY );
541         defineElement( "ISINDEX", EMPTY | ALLOWED_HEAD );
542         defineElement( "LI", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
543         defineElement( "LINK", EMPTY | ALLOWED_HEAD );
544         defineElement( "MAP", ALLOWED_HEAD );
545         defineElement( "META", EMPTY | ALLOWED_HEAD );
546         defineElement( "OL", ELEM_CONTENT | CLOSE_P );
547         defineElement( "OPTGROUP", ELEM_CONTENT );
548         defineElement( "OPTION", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
549         defineElement( "P", OPT_CLOSING | CLOSE_P | CLOSE_SELF );
550         defineElement( "PARAM", EMPTY );
551         defineElement( "PRE", PRESERVE | CLOSE_P );
552         defineElement( "SCRIPT", ALLOWED_HEAD | PRESERVE );
553         defineElement( "NOSCRIPT", ALLOWED_HEAD | PRESERVE );
554         defineElement( "SELECT", ELEM_CONTENT );
555         defineElement( "STYLE", ALLOWED_HEAD | PRESERVE );
556         defineElement( "TABLE", ELEM_CONTENT | CLOSE_P );
557         defineElement( "TBODY", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
558         defineElement( "TD", OPT_CLOSING | CLOSE_TH_TD );
559         defineElement( "TEXTAREA", PRESERVE );
560         defineElement( "TFOOT", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
561         defineElement( "TH", OPT_CLOSING | CLOSE_TH_TD );
562         defineElement( "THEAD", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
563         defineElement( "TITLE", ALLOWED_HEAD );
564         defineElement( "TR", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
565         defineElement( "UL", ELEM_CONTENT | CLOSE_P );
566
567         _boolAttrs = new Hashtable JavaDoc();
568         defineBoolean( "AREA", "href" );
569         defineBoolean( "BUTTON", "disabled" );
570         defineBoolean( "DIR", "compact" );
571         defineBoolean( "DL", "compact" );
572         defineBoolean( "FRAME", "noresize" );
573         defineBoolean( "HR", "noshade" );
574         defineBoolean( "IMAGE", "ismap" );
575         defineBoolean( "INPUT", new String JavaDoc[] { "defaultchecked", "checked", "readonly", "disabled" } );
576         defineBoolean( "LINK", "link" );
577         defineBoolean( "MENU", "compact" );
578         defineBoolean( "OBJECT", "declare" );
579         defineBoolean( "OL", "compact" );
580         defineBoolean( "OPTGROUP", "disabled" );
581         defineBoolean( "OPTION", new String JavaDoc[] { "default-selected", "selected", "disabled" } );
582         defineBoolean( "SCRIPT", "defer" );
583         defineBoolean( "SELECT", new String JavaDoc[] { "multiple", "disabled" } );
584         defineBoolean( "STYLE", "disabled" );
585         defineBoolean( "TD", "nowrap" );
586         defineBoolean( "TH", "nowrap" );
587         defineBoolean( "TEXTAREA", new String JavaDoc[] { "disabled", "readonly" } );
588         defineBoolean( "UL", "compact" );
589
590         initialize();
591     }
592
593
594
595 }
596
597
Popular Tags