KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > wings > template > parser > SGMLTag


1 /*
2  * Copyright (c) 1997-1999 The Java Apache Project. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in
13  * the documentation and/or other materials provided with the
14  * distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  * software must display the following acknowledgment:
18  * "This product includes software developed by the Java Apache
19  * Project for use in the Apache JServ servlet engine project
20  * (http://java.apache.org/)."
21  *
22  * 4. The names "Apache JServ", "Apache JServ Servlet Engine" and
23  * "Java Apache Project" must not be used to endorse or promote products
24  * derived from this software without prior written permission.
25  *
26  * 5. Products derived from this software may not be called "Apache JServ"
27  * nor may "Apache" nor "Apache JServ" appear in their names without
28  * prior written permission of the Java Apache Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  * acknowledgment:
32  * "This product includes software developed by the Java Apache
33  * Project for use in the Apache JServ servlet engine project
34  * (http://java.apache.org/)."
35  *
36  * THIS SOFTWARE IS PROVIDED BY THE JAVA APACHE PROJECT "AS IS" AND ANY
37  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE JAVA APACHE PROJECT OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47  * OF THE POSSIBILITY OF SUCH DAMAGE.
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Java Apache Group. For more information
51  * on the Java Apache Project and the Apache JServ Servlet Engine project,
52  * please see <http://java.apache.org/>.
53  */

54
55 /*
56  * $Id: SGMLTag.java,v 1.5 2004/12/01 07:54:28 hengels Exp $
57  * Copyright 2000,2005 wingS development team.
58  *
59  * This file is part of wingS (http://www.j-wings.org).
60  *
61  * wingS is free software; you can redistribute it and/or modify
62  * it under the terms of the GNU Lesser General Public License
63  * as published by the Free Software Foundation; either version 2.1
64  * of the License, or (at your option) any later version.
65  *
66  * Please see COPYING for the complete licence.
67  */

68 package org.wings.template.parser;
69
70 import java.io.IOException JavaDoc;
71 import java.io.Reader JavaDoc;
72 import java.io.StringReader JavaDoc;
73 import java.util.HashMap JavaDoc;
74 import java.util.Iterator JavaDoc;
75 import java.util.LinkedHashMap JavaDoc;
76 import java.util.LinkedList JavaDoc;
77
78 /**
79  * Convenient class for parsing SGML tokens from a page.
80  * <p/>
81  * <p>This class is optimized for speed, not ease of use.
82  * (Though I'd contend its fairly easy to use anyway!).
83  * <p/>
84  * <p>Other than earlier versions of this class this one reads
85  * its content from a <code>Reader</code> to avoid reading
86  * the whole file into a String before parsing it.
87  * The Reader is required to support the <code>mark()</code>
88  * operation.
89  * <p/>
90  * <p>Tags are only read enough to find out what the tag name is;
91  * If you want to read the full tag call <code>parse(inputReader)</code>.
92  * This is done so that applications don't spend time processing
93  * tags about which they care little.
94  * <p/>
95  * <p>Here's a sample piece of code which uses this class to read
96  * all SGML tags on a page:
97  * <p/>
98  * <pre>
99  * void showTags(PrintWriter out, Reader input)
100  * {
101  * SGMLTag tag = new SGMLTag(input);
102  * while (!tag.finished()) {
103  * out.println ("tag: " + tag.toString());
104  * tag = new SGMLTag (input);
105  * }
106  * }
107  * </pre>
108  *
109  * @author <a HREF="mailto:williams@ugsolutions.com">Tim Williams</a>
110  * @author <a HREF="mailto:zeller@think.de">Henner Zeller</a>
111  * @version $Revision: 1.5 $ $Date: 2004/12/01 07:54:28 $
112  */

113
114 /*
115  * TODO: (hen)
116  * - read incomplete TAGs <input type="checkbox" checked> => checked=1
117  */

118
119 public class SGMLTag {
120     public final static char singleQuote = '\'';
121     public final static char doubleQuote = '\"';
122
123     /**
124      * Name of this SGML tag, in uppercase format.
125      * This is only public for compatibility reasons.
126      */

127     private String JavaDoc name = null;
128
129     /**
130      * The token that closes this tag.
131      * Different for SSI and SGML tags.
132      */

133     private String JavaDoc closeTag = null;
134
135     /**
136      * Number of characters skipped <
137      */

138     private int offset = 0;
139
140     /* These attributes are to be compatible with the 'old'
141      * SGMLTag using Strings
142      */

143     private int start = 0;
144     private int end = 0;
145
146     // private stuff
147
private LinkedList JavaDoc attrs = null; // tag attributes (mixed)
148
private LinkedHashMap JavaDoc values = null; // tag attribute values (uc)
149
private boolean wellFormed = true; // looks good?
150
private boolean attr_ready = false;
151
152     // comment delimitation
153
static final String JavaDoc COMMENT_START = "!--", COMMENT_END = "-->";
154     static final String JavaDoc SSI_START = COMMENT_START + "#", SSI_END = COMMENT_END;
155
156     /**
157      * for historical reasons only; behaves like the
158      * old SGMLTag().
159      */

160     private SGMLTag(String JavaDoc textContent, int begin) {
161         PositionReader r = new PositionReader(new StringReader JavaDoc(textContent));
162         try {
163             r.skip(begin);
164             offset = begin;
165             searchStart(r);
166             start = offset;
167             // do a full parse here; since the usage of the
168
// String based SGMLTag() is deprecated this
169
// performance penalty doesn't matter
170
parse(r);
171         } catch (IOException JavaDoc reading_from_string_should_never_fail) {
172             offset = -1;
173         }
174         end = (int) r.getPosition();
175     }
176
177     /**
178      * Create new SGML tag reference, starting at current location
179      * of the Reader.
180      * At first, only the type of tag (first argument) is read if
181      * <code>parseIt</code> is false.
182      * Tag may not be well-formed: if interested, call "parse(input)"
183      * directly afterwards (without reading any characters
184      * from the Reader) to get the attributes.
185      * <p/>
186      * <p>Note that this constructor skips over any HTML-style comments,
187      * as denoted by matched <tt>&lt;--</tt> ... <tt>--&gt;</tt> pairs.
188      *
189      * @param input the Reader being parsed for SGML tags
190      * @param parseIt boolean which denotes if SGMLTag should be
191      * parsed fully
192      * @see #attributes
193      */

194     public SGMLTag(Reader JavaDoc input, boolean parseIt)
195             throws IOException JavaDoc {
196         searchStart(input);
197         if (parseIt) readAttributes(input);
198     }
199
200     /**
201      * Create new SGML tag reference, starting at current location
202      * of the Reader. Read all attributes.
203      * <p/>
204      * <p>Note that this constructor skips over any HTML-style comments,
205      * as denoted by matched <tt>&lt;--</tt> ... <tt>--&gt;</tt> pairs.
206      *
207      * @param input the Reader being parsed for SGML tags
208      * @see #attributes
209      */

210     public SGMLTag(Reader JavaDoc input)
211             throws IOException JavaDoc {
212         this(input, true);
213     }
214
215     public void parse(Reader JavaDoc input)
216             throws IOException JavaDoc {
217         readAttributes(input);
218     }
219
220     /**
221      * Skip over any HTML-style comments,
222      * as denoted by matched <tt>&lt;--</tt> ... <tt>--&gt;</tt> pairs.
223      *
224      * @param input the reader being parsed for SGMLtags
225      */

226     protected void searchStart(Reader JavaDoc input)
227             throws IOException JavaDoc {
228         int c = 0, num;
229         char buff[] = new char[8]; // must at least hold the length of COMMENT_(START|END)
230
String JavaDoc cmpStr;
231
232         // skipping over comments, find first tag
233
while (true) {
234             // find starting character of SGML tag
235
while (c >= 0 && c != '<') {
236                 c = input.read();
237                 offset++;
238             }
239             if (c == -1) {
240                 offset = -1;
241                 return;
242             } // EOF
243
offset--;
244
245             /* -- check if we just found a comment
246              * <!--# - SSI Commands start just like
247              * ordinary comments, so we've to make sure
248              * that exclude these (<!--) but not those (<!--#)
249              */

250             input.mark(SSI_START.length());
251             int pos;
252             num = 0;
253             for (pos = 0; pos >= 0 && num < SSI_START.length(); num += pos)
254                 pos = input.read(buff, pos, SSI_START.length() - pos);
255             if (pos == -1) {
256                 offset = -1;
257                 return;
258             } // EOF
259

260             cmpStr = new String JavaDoc(buff, 0, num);
261             if (SSI_START.equals(cmpStr) ||
262                     !(cmpStr.startsWith(COMMENT_START))) {
263                 input.reset();
264                 break; // No comment .. real start of a SGML / SSI Tag
265
}
266
267             /*
268              * ok, we got an comment; but since we read SSI_START length
269              * characters, we've to reset and just read COMMENT_START so
270              * we're in a defined state ..
271              */

272             input.reset();
273             num = 0;
274             for (pos = 0; pos >= 0 && num < COMMENT_START.length(); num += pos)
275                 pos = input.read(buff, pos, COMMENT_START.length() - pos);
276             // since length(COMMENT_START) < length(SSI_START) (which we
277
// already successfully read), we don't have to check for EOF here
278

279             offset += COMMENT_START.length() + 1; // +1 for the starting '<'
280
// otherwise skip extent of commented area
281
boolean endOfComment = false;
282             int len = 0, ringHead = 0;
283             int checkpos, p;
284             while (!endOfComment) {
285                 c = input.read();
286                 if (c == -1) {
287                     offset = -1;
288                     return;
289                 } // EOF
290
len++;
291                 offset++;
292                 // since we don't have '-1' here anymore, cast is save:
293
buff[ringHead] = (char) c; // buffer is a ringbuffer
294
if (len >= COMMENT_END.length()) {
295                     // compare, beginning from the last position backward
296
for (checkpos = ringHead + buff.length, p = COMMENT_END.length() - 1;
297                          p >= 0; --checkpos, --p) {
298                         if (COMMENT_END.charAt(p) != buff[checkpos % buff.length])
299                             break;
300                     }
301                     endOfComment = (p == -1);
302                 }
303                 ringHead = (++ringHead) % buff.length;
304             }
305
306         }
307
308         // get the name
309
// do not skip Whitespaces, since the Tagname must
310
// start just after the '<'
311
name = nextToken(input, false);
312         if (name != null)
313             name = name.toUpperCase();
314
315         // set the token that closes this tag
316
if (name != null && name.startsWith(SSI_START)) {
317             closeTag = SSI_END; // SSI tag
318
} else {
319             closeTag = ">"; // SGML tag
320
}
321     }
322
323
324     /**
325      * Checked whether this tag indicates we're at the end of the list.
326      * Note: The end tag is not usuable as an SGML tag.
327      *
328      * @return true if this tag represents end of tags, and is not usuable
329      */

330     public boolean finished() {
331         return offset == -1 && name == null;
332     }
333
334     /**
335      * Check name of tag.
336      * (Comparision is case-insensitive.)
337      *
338      * @return true if passed tag matches this one.
339      */

340     public boolean isNamed(String JavaDoc name) {
341         return this.name != null && this.name.equals(name.toUpperCase());
342     }
343
344     /**
345      * Check for well-formedness of this tag.
346      * Note that calling this method causes rest of tag to be parsed.
347      *
348      * @return true if tag is a well-formed SGML tag, false otherwise
349      */

350     public boolean isWellFormed() {
351         if (name == null) return false;
352         if (!attr_ready || values == null) return false;
353         return wellFormed;
354     }
355
356     /**
357      * returns the number of chars skipped before the
358      * starting '&lt'
359      */

360     public int getOffset() {
361         return offset;
362     }
363
364     /**
365      * get the Name of this SGML tag, in uppercase format.
366      * For example, P for paragraph, B for bold, etc.
367      * This value is set to null when whitespace or another
368      * problem was encountered where the tag would be.
369      */

370     public String JavaDoc getName() {
371         return name;
372     }
373
374     /**
375      * Get list of attribute names.
376      *
377      * @param upperCase true returns names in all uppercase (good for
378      * case-insensitive applications), false returns attribute names
379      * with same case as in original text
380      * @return enumeration of attribute names specified as strings,
381      * or null if this tag is poorly formed
382      */

383     public Iterator JavaDoc attributes(boolean upperCase) {
384         // check to make sure attributes have been read
385
if (!isWellFormed())
386             return null;
387
388         // or return uppercase names?
389
if (upperCase) {
390             return values.keySet().iterator();
391         } else {
392             return attrs.iterator();
393         }
394     }
395
396     /**
397      * Get attribute value, or default if not set.
398      * Case is ignored, <tt>value("a")</tt> will return the same
399      * result as <tt>value("A")</tt>. Note also that if wish to
400      * check whether value was set, you can pass <tt>null</tt>
401      * as the defaultValue.
402      *
403      * @param attributeName attribute for which to check
404      * @param defaultValue value if attribute unset
405      * @return value of attribute, or defaultValue if not available
406      */

407     public String JavaDoc value(String JavaDoc attributeName, String JavaDoc defaultValue) {
408         if (!isWellFormed())
409             return null;
410         String JavaDoc value = (String JavaDoc) values.get(attributeName.toUpperCase());
411         return value == null ? defaultValue : value;
412     }
413
414     /**
415      * Attempt to read attributes from tag if not already read.
416      *
417      * @return true if everything was read fine, false otherwise
418      */

419     private boolean readAttributes(Reader JavaDoc input)
420             throws IOException JavaDoc {
421         // just try to read Attributes once
422

423         if (attr_ready)
424             return wellFormed && values != null;
425         attr_ready = true;
426
427         if (values == null && wellFormed) {
428             String JavaDoc key = null, token;
429             wellFormed = false;
430             attrs = new LinkedList JavaDoc();
431             values = new LinkedHashMap JavaDoc();
432
433             while (true) {
434                 // check for valid value tag (or end delimiter)
435
if (key == null)
436                     key = nextToken(input);
437
438                 // close-Tag
439
if (key != null && key.equals(closeTag)) {
440                     wellFormed = true;
441                     break;
442                 }
443
444                 // close-Tag
445
if (key != null && key.equals("/>")) {
446                     wellFormed = true;
447                     break;
448                 }
449
450                 // 'key'-part
451
if (key == null
452                         || isDelimiter(key.charAt(0))
453                         || key.charAt(0) == doubleQuote
454                         || key.charAt(0) == singleQuote)
455                     break;
456
457                 // ok, we have a key. Now insure that we have an equals sign
458
token = nextToken(input);
459                 if (token == null || token.charAt(0) != '=') {
460                     attrs.add(key);
461                     if (token == null)
462                         break;
463                     key = token; // this token is the next key
464
continue;
465                 }
466
467                 // read value of tag
468
token = nextToken(input);
469                 if (token == null || isDelimiter(token.charAt(0)))
470                     break;
471
472                 // strip quotes
473
if (token.charAt(0) == doubleQuote || token.charAt(0) == singleQuote)
474                     token = token.substring(1, token.length() - 1);
475
476                 // store attribute name with original case
477
String JavaDoc upperCase = key.toUpperCase();
478                 if (!values.containsKey(upperCase))
479                     attrs.add(key);
480
481                 // store assignment in case-insensitive manner
482
values.put(upperCase, token);
483                 key = null; // clear this key; next token is our next key.
484
}
485         }
486         return wellFormed && values != null;
487     }
488
489     /**
490      * Read next token from string.
491      * A token is a space-delimited word, a string in quotes
492      * (returned with quotes), a delimiter such as a greater-than,
493      * less-than, or equals sign.
494      * Quotes marks inside quoted strings may be escaped with a
495      * backslash (\) character.
496      *
497      * @return next token, or null if whitespace was encountered
498      */

499     public String JavaDoc nextToken(Reader JavaDoc input)
500             throws IOException JavaDoc {
501         return nextToken(input, true);
502     }
503
504     /**
505      * Read next token from string.
506      * A token is a space-delimited word, a string in quotes
507      * (returned with quotes), a delimiter such as a greater-than,
508      * less-than, or equals sign.
509      * Quotes marks inside quoted strings may be escaped with a
510      * backslash (\) character.
511      *
512      * @return next token, or null if whitespace was encountered
513      */

514     public String JavaDoc nextToken(Reader JavaDoc input, boolean skipWhitespaces)
515             throws IOException JavaDoc {
516         StringBuffer JavaDoc token = new StringBuffer JavaDoc();
517
518         if (skipWhitespaces)
519             skipWhiteSpace(input);
520
521         input.mark(1);
522         int c = input.read();
523
524         if (c == -1) {
525             offset = -1;
526             return null;
527         }
528
529         // quoted string? (handle both single and double)
530
if (c == doubleQuote || c == singleQuote) {
531             boolean inSingle = false;
532             boolean inDouble = false;
533             if (c == singleQuote) inSingle = true; else inDouble = true;
534             token.append((char) c);
535             do {
536                 c = input.read();
537                 if (c == -1) {
538                     offset = -1;
539                     String JavaDoc reportString = token.toString();
540                     if (reportString.length() > 30) {
541                         reportString = reportString.substring(0, 30) +
542                                 " (truncated, length is " + reportString.length() + ")";
543                     }
544                     throw new IOException JavaDoc("EOF in String: " + reportString);
545                 }
546                 if (c == '\\') {
547                     int quoted = input.read();
548                     if (quoted >= 0) token.append((char) quoted);
549                 } else
550                     token.append((char) c);
551             } while ((inDouble && c != doubleQuote) || (inSingle && c != singleQuote));
552         }
553
554         // parameter delimiter? read just one
555
else if (isDelimiter((char) c)) {
556             token.append((char) c);
557         }
558
559         // Inserted for token "-->".
560
// Like a word token, but includes the delimiter ">".
561
else if (c == '-') {
562             do {
563                 token.append((char) c);
564                 input.mark(1);
565                 c = input.read();
566             } while (c >= 0 &&
567                     !Character.isWhitespace((char) c) &&
568                     !isDelimiter((char) c));
569             input.reset();
570             token.append((char) input.read());
571         }
572
573         // If we did not skip Whitespaces but actually got one
574
// this token is empty.
575
else if (!skipWhitespaces &&
576                 Character.isWhitespace((char) c)) {
577             input.reset();
578             return null;
579         }
580
581         // word token or />
582
else {
583             do {
584                 token.append((char) c);
585                 input.mark(1);
586                 c = input.read();
587             } while (c >= 0 &&
588                     !Character.isWhitespace((char) c) &&
589                     !isDelimiter((char) c));
590             if (token.length() == 1 && token.charAt(0) == '/')
591                 token.append((char) c);
592             else
593                 input.reset();
594         }
595         return token.toString();
596     }
597
598     /**
599      * could be overwritten
600      */

601     public static int skipWhiteSpace(Reader JavaDoc r)
602             throws IOException JavaDoc {
603         int c, len = 0;
604         do {
605             r.mark(1);
606             c = r.read();
607             len++;
608         } while (c >= 0 && Character.isWhitespace((char) c));
609         r.reset();
610         return len - 1;
611     }
612
613     /**
614      * Return value of attribute (parameter) setting in SGML tag.
615      * @param key name (uppercase) of attribute for which to check
616      * @param defaultValue value if attribute unset
617      * @deprecated use <tt>attributes()</tt> and <tt>value()</tt> instead
618      * @see #attributes
619      * @see #value
620      * @return value of that attribute, or default if not defined
621      */

622     public String JavaDoc getAttribute(String JavaDoc key, String JavaDoc defaultValue) {
623         return value(key, defaultValue);
624     }
625
626     /**
627      * Return tag attributes and values.
628      * @return parameter key / value pairs
629      * @deprecated use <tt>attributes()</tt> and <tt>value()</tt> instead
630      * @see #attributes
631      * @see #value
632      */

633     public HashMap JavaDoc getAttributes() {
634         return isWellFormed() ? values : null;
635     }
636
637     /**
638      * Decide whether character is SGML delimiter or equals.
639      *
640      * @param c character in question
641      * @return true if character is an SGML delimiter
642      */

643     private static boolean isDelimiter(char c) {
644         return c == '<' || c == '=' || c == '>';
645     }
646
647     /**
648      * Render this tag as a string.
649      *
650      * @return SGML tag as string, showing range and values
651      */

652     public String JavaDoc toString() {
653         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
654         str.append("[SGMLTag ").append(name).append(": (").append(getOffset()).append(",---)");
655         if (attrs != null && wellFormed) {
656             Iterator JavaDoc iter = attributes(true);
657             while (iter.hasNext()) {
658                 String JavaDoc key = (String JavaDoc) iter.next();
659                 str.append(" ").append(key).append("=\"").append(value(key, null)).append("\"");
660             }
661         } else {
662             str.append(" *MALFORMED TAG*");
663         }
664         str.append(" ]");
665         return str.toString();
666     }
667 }
668
669
670
671
Popular Tags