KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xml > serialize > HTMLdtd


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58
59 // Aug 21, 2000:
60
// Fixed bug in isElement and made HTMLdtd public.
61
// Contributed by Eric SCHAEFFER" <eschaeffer@posterconseil.com>
62

63
64 package org.enhydra.apache.xml.serialize;
65
66
67 import java.io.BufferedReader JavaDoc;
68 import java.io.InputStream JavaDoc;
69 import java.io.InputStreamReader JavaDoc;
70 import java.util.Hashtable JavaDoc;
71
72
73 /**
74  * Utility class for accessing information specific to HTML documents.
75  * The HTML DTD is expressed as three utility function groups. Two methods
76  * allow for checking whether an element requires an open tag on printing
77  * ({@link #isEmptyTag}) or on parsing ({@link #isOptionalClosing}).
78  * <P>
79  * Two other methods translate character references from name to value and
80  * from value to name. A small entities resource is loaded into memory the
81  * first time any of these methods is called for fast and efficient access.
82  *
83  *
84  * @version $Revision: 1.2 $ $Date: 2005/01/26 08:28:45 $
85  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
86  */

87 public final class HTMLdtd
88 {
89
90     /**
91      * Public identifier for HTML document type.
92      */

93     public static final String JavaDoc HTMLPublicId = "-//W3C//DTD HTML 4.0//EN";
94
95     /**
96      * System identifier for HTML document type.
97      */

98     public static final String JavaDoc HTMLSystemId =
99         "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";
100
101     /**
102      * Public identifier for XHTML document type.
103      */

104     public static final String JavaDoc XHTMLPublicId =
105         "-//W3C//DTD XHTML 1.0 Strict//EN";
106
107     /**
108      * System identifier for XHTML document type.
109      */

110     public static final String JavaDoc XHTMLSystemId =
111         "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";
112     /**
113      * Table of reverse character reference mapping. Character codes are held
114      * as single-character strings, mapped to their reference name.
115      */

116     private static Hashtable JavaDoc _byChar;
117
118
119     /**
120      * Table of entity name to value mapping. Entities are held as strings,
121      * character references as <TT>Character</TT> objects.
122      */

123     private static Hashtable JavaDoc _byName;
124
125
126     private static Hashtable JavaDoc _boolAttrs;
127
128
129     /**
130      * Holds element definitions.
131      */

132     private static Hashtable JavaDoc _elemDefs;
133
134
135     /**
136      * Locates the HTML entities file that is loaded upon initialization.
137      * This file is a resource loaded with the default class loader.
138      */

139     private static final String JavaDoc ENTITIES_RESOURCE = "HTMLEntities.res";
140
141
142     /**
143      * Only opening tag should be printed.
144      */

145     private static final int ONLY_OPENING = 0x0001;
146
147     /**
148      * Element contains element content only.
149      */

150     private static final int ELEM_CONTENT = 0x0002;
151
152
153     /**
154      * Element preserve spaces.
155      */

156     private static final int PRESERVE = 0x0004;
157
158
159     /**
160      * Optional closing tag.
161      */

162     private static final int OPT_CLOSING = 0x0008;
163
164
165     /**
166      * Element is empty (also means only opening tag)
167      */

168     private static final int EMPTY = 0x0010 | ONLY_OPENING;
169
170
171     /**
172      * Allowed to appear in head.
173      */

174     private static final int ALLOWED_HEAD = 0x0020;
175
176
177     /**
178      * When opened, closes P.
179      */

180     private static final int CLOSE_P = 0x0040;
181
182
183     /**
184      * When opened, closes DD or DT.
185      */

186     private static final int CLOSE_DD_DT = 0x0080;
187
188
189     /**
190      * When opened, closes itself.
191      */

192     private static final int CLOSE_SELF = 0x0100;
193
194
195     /**
196      * When opened, closes another table section.
197      */

198     private static final int CLOSE_TABLE = 0x0200;
199
200
201     /**
202      * When opened, closes TH or TD.
203      */

204     private static final int CLOSE_TH_TD = 0x04000;
205
206
207     /**
208      * Returns true if element is declared to be empty. HTML elements are
209      * defines as empty in the DTD, not by the document syntax.
210      *
211      * @param tagName The element tag name (upper case)
212      * @return True if element is empty
213      */

214     public static boolean isEmptyTag( String JavaDoc tagName )
215     {
216         return isElement( tagName, EMPTY );
217     }
218
219
220     /**
221      * Returns true if element is declared to have element content.
222      * Whitespaces appearing inside element content will be ignored,
223      * other text will simply report an error.
224      *
225      * @param tagName The element tag name (upper case)
226      * @return True if element content
227      */

228     public static boolean isElementContent( String JavaDoc tagName )
229     {
230         return isElement( tagName, ELEM_CONTENT );
231     }
232
233
234     /**
235      * Returns true if element's textual contents preserves spaces.
236      * This only applies to PRE and TEXTAREA, all other HTML elements
237      * do not preserve space.
238      *
239      * @param tagName The element tag name (upper case)
240      * @return True if element's text content preserves spaces
241      */

242     public static boolean isPreserveSpace( String JavaDoc tagName )
243     {
244         return isElement( tagName, PRESERVE );
245     }
246
247
248     /**
249      * Returns true if element's closing tag is optional and need not
250      * exist. An error will not be reported for such elements if they
251      * are not closed. For example, <tt>LI</tt> is most often not closed.
252      *
253      * @param tagName The element tag name (upper case)
254      * @return True if closing tag implied
255      */

256     public static boolean isOptionalClosing( String JavaDoc tagName )
257     {
258         return isElement( tagName, OPT_CLOSING );
259     }
260
261
262     /**
263      * Returns true if element's closing tag is generally not printed.
264      * For example, <tt>LI</tt> should not print the closing tag.
265      *
266      * @param tagName The element tag name (upper case)
267      * @return True if only opening tag should be printed
268      */

269     public static boolean isOnlyOpening( String JavaDoc tagName )
270     {
271         return isElement( tagName, ONLY_OPENING );
272     }
273
274
275     /**
276      * Returns true if the opening of one element (<tt>tagName</tt>) implies
277      * the closing of another open element (<tt>openTag</tt>). For example,
278      * every opening <tt>LI</tt> will close the previously open <tt>LI</tt>,
279      * and every opening <tt>BODY</tt> will close the previously open <tt>HEAD</tt>.
280      *
281      * @param tagName The newly opened element
282      * @param openTag The already opened element
283      * @return True if closing tag closes opening tag
284      */

285     public static boolean isClosing( String JavaDoc tagName, String JavaDoc openTag )
286     {
287         // Several elements are defined as closing the HEAD
288
if ( openTag.equalsIgnoreCase( "HEAD" ) )
289             return ! isElement( tagName, ALLOWED_HEAD );
290         // P closes iteself
291
if ( openTag.equalsIgnoreCase( "P" ) )
292             return isElement( tagName, CLOSE_P );
293         // DT closes DD, DD closes DT
294
if ( openTag.equalsIgnoreCase( "DT" ) || openTag.equalsIgnoreCase( "DD" ) )
295             return isElement( tagName, CLOSE_DD_DT );
296         // LI and OPTION close themselves
297
if ( openTag.equalsIgnoreCase( "LI" ) || openTag.equalsIgnoreCase( "OPTION" ) )
298             return isElement( tagName, CLOSE_SELF );
299         // Each of these table sections closes all the others
300
if ( openTag.equalsIgnoreCase( "THEAD" ) || openTag.equalsIgnoreCase( "TFOOT" ) ||
301              openTag.equalsIgnoreCase( "TBODY" ) || openTag.equalsIgnoreCase( "TR" ) ||
302              openTag.equalsIgnoreCase( "COLGROUP" ) )
303             return isElement( tagName, CLOSE_TABLE );
304         // TD closes TH and TH closes TD
305
if ( openTag.equalsIgnoreCase( "TH" ) || openTag.equalsIgnoreCase( "TD" ) )
306             return isElement( tagName, CLOSE_TH_TD );
307         return false;
308     }
309
310
311     /**
312      * Returns true if the specified attribute it a URI and should be
313      * escaped appropriately. In HTML URIs are escaped differently
314      * than normal attributes.
315      *
316      * @param tagName The element's tag name
317      * @param attrName The attribute's name
318      */

319     public static boolean isURI( String JavaDoc tagName, String JavaDoc attrName )
320     {
321         // Stupid checks.
322
return ( attrName.equalsIgnoreCase( "href" ) || attrName.equalsIgnoreCase( "src" ) );
323     }
324
325
326     /**
327      * Returns true if the specified attribute is a boolean and should be
328      * printed without the value. This applies to attributes that are true
329      * if they exist, such as selected (OPTION/INPUT).
330      *
331      * @param tagName The element's tag name
332      * @param attrName The attribute's name
333      */

334     public static boolean isBoolean( String JavaDoc tagName, String JavaDoc attrName )
335     {
336         String JavaDoc[] attrNames;
337
338         attrNames = (String JavaDoc[]) _boolAttrs.get( tagName.toUpperCase() );
339         if ( attrNames == null )
340             return false;
341         for ( int i = 0 ; i < attrNames.length ; ++i )
342             if ( attrNames[ i ].equalsIgnoreCase( attrName ) )
343                 return true;
344         return false;
345     }
346
347
348     /**
349      * Returns the value of an HTML character reference by its name. If the
350      * reference is not found or was not defined as a character reference,
351      * returns EOF (-1).
352      *
353      * @param name Name of character reference
354      * @return Character code or EOF (-1)
355      */

356     public static int charFromName( String JavaDoc name )
357     {
358         Object JavaDoc value;
359
360         initialize();
361         value = _byName.get( name );
362         if ( value != null && value instanceof Integer JavaDoc )
363             return ( (Integer JavaDoc) value ).intValue();
364         else
365             return -1;
366     }
367
368
369     /**
370      * Returns the name of an HTML character reference based on its character
371      * value. Only valid for entities defined from character references. If no
372      * such character value was defined, return null.
373      *
374      * @param value Character value of entity
375      * @return Entity's name or null
376      */

377     public static String JavaDoc fromChar(int value )
378     {
379        if (value > 0xffff)
380             return null;
381
382         String JavaDoc name;
383
384         initialize();
385         name = (String JavaDoc) _byChar.get( new Integer JavaDoc( value ) );
386         return name;
387     }
388
389
390     /**
391      * Initialize upon first access. Will load all the HTML character references
392      * into a list that is accessible by name or character value and is optimized
393      * for character substitution. This method may be called any number of times
394      * but will execute only once.
395      */

396     private static void initialize()
397     {
398         InputStream JavaDoc is = null;
399         BufferedReader JavaDoc reader = null;
400         int index;
401         String JavaDoc name;
402         String JavaDoc value;
403         int code;
404         String JavaDoc line;
405
406         // Make sure not to initialize twice.
407
if ( _byName != null )
408             return;
409         try {
410             _byName = new Hashtable JavaDoc();
411             _byChar = new Hashtable JavaDoc();
412             is = HTMLdtd.class.getResourceAsStream( ENTITIES_RESOURCE );
413             if ( is == null )
414                 throw new RuntimeException JavaDoc( "SER003 The resource [" + ENTITIES_RESOURCE + "] could not be found.\n" + ENTITIES_RESOURCE);
415             reader = new BufferedReader JavaDoc( new InputStreamReader JavaDoc( is ) );
416             line = reader.readLine();
417             while ( line != null ) {
418                 if ( line.length() == 0 || line.charAt( 0 ) == '#' ) {
419                     line = reader.readLine();
420                     continue;
421                 }
422                 index = line.indexOf( ' ' );
423                 if ( index > 1 ) {
424                     name = line.substring( 0, index );
425                     ++index;
426                     if ( index < line.length() ) {
427                         value = line.substring( index );
428                         index = value.indexOf( ' ' );
429                         if ( index > 0 )
430                             value = value.substring( 0, index );
431                         code = Integer.parseInt( value );
432                                         defineEntity( name, (char) code );
433                     }
434                 }
435                 line = reader.readLine();
436             }
437             is.close();
438         } catch ( Exception JavaDoc except ) {
439             throw new RuntimeException JavaDoc( "SER003 The resource [" + ENTITIES_RESOURCE + "] could not load: " +
440                                         except.toString() + "\n" + ENTITIES_RESOURCE + "\t" + except.toString());
441         } finally {
442             if ( is != null ) {
443                 try {
444                     is.close();
445                 } catch ( Exception JavaDoc except ) { }
446             }
447         }
448     }
449
450
451     /**
452      * Defines a new character reference. The reference's name and value are
453      * supplied. Nothing happens if the character reference is already defined.
454      * <P>
455      * Unlike internal entities, character references are a string to single
456      * character mapping. They are used to map non-ASCII characters both on
457      * parsing and printing, primarily for HTML documents. '&lt;amp;' is an
458      * example of a character reference.
459      *
460      * @param name The entity's name
461      * @param value The entity's value
462      */

463     private static void defineEntity( String JavaDoc name, char value )
464     {
465         if ( _byName.get( name ) == null ) {
466             _byName.put( name, new Integer JavaDoc( value ) );
467             _byChar.put( new Integer JavaDoc( value ), name );
468         }
469     }
470
471
472     private static void defineElement( String JavaDoc name, int flags )
473     {
474         _elemDefs.put( name, new Integer JavaDoc( flags ) );
475     }
476
477
478     private static void defineBoolean( String JavaDoc tagName, String JavaDoc attrName )
479     {
480         defineBoolean( tagName, new String JavaDoc[] { attrName } );
481     }
482
483
484     private static void defineBoolean( String JavaDoc tagName, String JavaDoc[] attrNames )
485     {
486         _boolAttrs.put( tagName, attrNames );
487     }
488
489
490     private static boolean isElement( String JavaDoc name, int flag )
491     {
492         Integer JavaDoc flags;
493
494         flags = (Integer JavaDoc) _elemDefs.get( name.toUpperCase() );
495         if ( flags == null )
496             return false;
497         else
498             return ( ( flags.intValue() & flag ) == flag );
499     }
500
501
502     static
503     {
504         _elemDefs = new Hashtable JavaDoc();
505         defineElement( "ADDRESS", CLOSE_P );
506         defineElement( "AREA", EMPTY );
507         defineElement( "BASE", EMPTY | ALLOWED_HEAD );
508         defineElement( "BASEFONT", EMPTY );
509         defineElement( "BLOCKQUOTE", CLOSE_P );
510         defineElement( "BODY", OPT_CLOSING );
511         defineElement( "BR", EMPTY );
512         defineElement( "COL", EMPTY );
513         defineElement( "COLGROUP", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
514         defineElement( "DD", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
515         defineElement( "DIV", CLOSE_P );
516         defineElement( "DL", ELEM_CONTENT | CLOSE_P );
517         defineElement( "DT", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
518         defineElement( "FIELDSET", CLOSE_P );
519         defineElement( "FORM", CLOSE_P );
520         defineElement( "FRAME", EMPTY | OPT_CLOSING );
521         defineElement( "H1", CLOSE_P );
522         defineElement( "H2", CLOSE_P );
523         defineElement( "H3", CLOSE_P );
524         defineElement( "H4", CLOSE_P );
525         defineElement( "H5", CLOSE_P );
526         defineElement( "H6", CLOSE_P );
527         defineElement( "HEAD", ELEM_CONTENT | OPT_CLOSING );
528         defineElement( "HR", EMPTY | CLOSE_P );
529         defineElement( "HTML", ELEM_CONTENT | OPT_CLOSING );
530         defineElement( "IMG", EMPTY );
531         defineElement( "INPUT", EMPTY );
532         defineElement( "ISINDEX", EMPTY | ALLOWED_HEAD );
533         defineElement( "LI", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
534         defineElement( "LINK", EMPTY | ALLOWED_HEAD );
535         defineElement( "MAP", ALLOWED_HEAD );
536         defineElement( "META", EMPTY | ALLOWED_HEAD );
537         defineElement( "OL", ELEM_CONTENT | CLOSE_P );
538         defineElement( "OPTGROUP", ELEM_CONTENT );
539         defineElement( "OPTION", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
540         defineElement( "P", OPT_CLOSING | CLOSE_P | CLOSE_SELF );
541         defineElement( "PARAM", EMPTY );
542         defineElement( "PRE", PRESERVE | CLOSE_P );
543         defineElement( "SCRIPT", ALLOWED_HEAD | PRESERVE );
544         defineElement( "NOSCRIPT", ALLOWED_HEAD | PRESERVE );
545         defineElement( "SELECT", ELEM_CONTENT );
546         defineElement( "STYLE", ALLOWED_HEAD | PRESERVE );
547         defineElement( "TABLE", ELEM_CONTENT | CLOSE_P );
548         defineElement( "TBODY", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
549         defineElement( "TD", OPT_CLOSING | CLOSE_TH_TD );
550         defineElement( "TEXTAREA", PRESERVE );
551         defineElement( "TFOOT", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
552         defineElement( "TH", OPT_CLOSING | CLOSE_TH_TD );
553         defineElement( "THEAD", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
554         defineElement( "TITLE", ALLOWED_HEAD );
555         defineElement( "TR", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
556         defineElement( "UL", ELEM_CONTENT | CLOSE_P );
557
558         _boolAttrs = new Hashtable JavaDoc();
559         defineBoolean( "AREA", "href" );
560         defineBoolean( "BUTTON", "disabled" );
561         defineBoolean( "DIR", "compact" );
562         defineBoolean( "DL", "compact" );
563         defineBoolean( "FRAME", "noresize" );
564         defineBoolean( "HR", "noshade" );
565         defineBoolean( "IMAGE", "ismap" );
566         defineBoolean( "INPUT", new String JavaDoc[] { "defaultchecked", "checked", "readonly", "disabled" } );
567         defineBoolean( "LINK", "link" );
568         defineBoolean( "MENU", "compact" );
569         defineBoolean( "OBJECT", "declare" );
570         defineBoolean( "OL", "compact" );
571         defineBoolean( "OPTGROUP", "disabled" );
572         defineBoolean( "OPTION", new String JavaDoc[] { "default-selected", "selected", "disabled" } );
573         defineBoolean( "SCRIPT", "defer" );
574         defineBoolean( "SELECT", new String JavaDoc[] { "multiple", "disabled" } );
575         defineBoolean( "STYLE", "disabled" );
576         defineBoolean( "TD", "nowrap" );
577         defineBoolean( "TH", "nowrap" );
578         defineBoolean( "TEXTAREA", new String JavaDoc[] { "disabled", "readonly" } );
579         defineBoolean( "UL", "compact" );
580
581         initialize();
582     }
583
584
585
586 }
587
588
Popular Tags