KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > w3c > tidy > Lexer


1 /*
2  * @(#)Lexer.java 1.11 2000/08/16
3  *
4  */

5
6 package org.w3c.tidy;
7
8 /**
9  *
10  * Lexer for html parser
11  *
12  * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13  * See Tidy.java for the copyright notice.
14  * Derived from <a HREF="http://www.w3.org/People/Raggett/tidy">
15  * HTML Tidy Release 4 Aug 2000</a>
16  *
17  * @author Dave Raggett <dsr@w3.org>
18  * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19  * @version 1.0, 1999/05/22
20  * @version 1.0.1, 1999/05/29
21  * @version 1.1, 1999/06/18 Java Bean
22  * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23  * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24  * @version 1.4, 1999/09/04 DOM support
25  * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26  * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27  * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28  * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29  * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30  * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31  * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
32  */

33
34 /*
35   Given a file stream fp it returns a sequence of tokens.
36
37      GetToken(fp) gets the next token
38      UngetToken(fp) provides one level undo
39
40   The tags include an attribute list:
41
42     - linked list of attribute/value nodes
43     - each node has 2 null-terminated strings.
44     - entities are replaced in attribute values
45
46   white space is compacted if not in preformatted mode
47   If not in preformatted mode then leading white space
48   is discarded and subsequent white space sequences
49   compacted to single space chars.
50
51   If XmlTags is no then Tag names are folded to upper
52   case and attribute names to lower case.
53
54  Not yet done:
55     - Doctype subset and marked sections
56 */

57
58 import java.io.PrintWriter JavaDoc;
59 import java.util.Stack JavaDoc;
60 import java.util.Vector JavaDoc;
61
62 public class Lexer {
63
64
65     public StreamIn in; /* file stream */
66     public PrintWriter JavaDoc errout; /* error output stream */
67     public short badAccess; /* for accessibility errors */
68     public short badLayout; /* for bad style errors */
69     public short badChars; /* for bad char encodings */
70     public short badForm; /* for mismatched/mispositioned form tags */
71     public short warnings; /* count of warnings in this document */
72     public short errors; /* count of errors */
73     public int lines; /* lines seen */
74     public int columns; /* at start of current token */
75     public boolean waswhite; /* used to collapse contiguous white space */
76     public boolean pushed; /* true after token has been pushed back */
77     public boolean insertspace; /* when space is moved after end tag */
78     public boolean excludeBlocks; /* Netscape compatibility */
79     public boolean exiled; /* true if moved out of table */
80     public boolean isvoyager; /* true if xmlns attribute on html element */
81     public short versions; /* bit vector of HTML versions */
82     public int doctype; /* version as given by doctype (if any) */
83     public boolean badDoctype; /* e.g. if html or PUBLIC is missing */
84     public int txtstart; /* start of current node */
85     public int txtend; /* end of current node */
86     public short state; /* state of lexer's finite state machine */
87     public Node token;
88
89     /*
90       lexer character buffer
91
92       parse tree nodes span onto this buffer
93       which contains the concatenated text
94       contents of all of the elements.
95
96      lexsize must be reset for each file.
97     */

98     public byte[] lexbuf; /* byte buffer of UTF-8 chars */
99     public int lexlength; /* allocated */
100     public int lexsize; /* used */
101
102     /* Inline stack for compatibility with Mosaic */
103     public Node inode; /* for deferring text node */
104     public int insert; /* for inferring inline tags */
105     public Stack JavaDoc istack;
106     public int istackbase; /* start of frame */
107
108     public Style styles; /* used for cleaning up presentation markup */
109
110     public Configuration configuration;
111     protected int seenBodyEndTag; /* used by parser */
112     private Vector JavaDoc nodeList;
113
114     public Lexer(StreamIn in, Configuration configuration)
115     {
116         this.in = in;
117         this.lines = 1;
118         this.columns = 1;
119         this.state = LEX_CONTENT;
120         this.badAccess = 0;
121         this.badLayout = 0;
122         this.badChars = 0;
123         this.badForm = 0;
124         this.warnings = 0;
125         this.errors = 0;
126         this.waswhite = false;
127         this.pushed = false;
128         this.insertspace = false;
129         this.exiled = false;
130         this.isvoyager = false;
131         this.versions = Dict.VERS_EVERYTHING;
132         this.doctype = Dict.VERS_UNKNOWN;
133         this.badDoctype = false;
134         this.txtstart = 0;
135         this.txtend = 0;
136         this.token = null;
137         this.lexbuf = null;
138         this.lexlength = 0;
139         this.lexsize = 0;
140         this.inode = null;
141         this.insert = -1;
142         this.istack = new Stack JavaDoc();
143         this.istackbase = 0;
144         this.styles = null;
145         this.configuration = configuration;
146         this.seenBodyEndTag = 0;
147         this.nodeList = new Vector JavaDoc();
148     }
149
150     public Node newNode()
151     {
152         Node node = new Node();
153         nodeList.addElement(node);
154         return node;
155     }
156
157     public Node newNode(short type, byte[] textarray, int start, int end)
158     {
159         Node node = new Node(type, textarray, start, end);
160         nodeList.addElement(node);
161         return node;
162     }
163
164     public Node newNode(short type, byte[] textarray, int start, int end, String JavaDoc element)
165     {
166         Node node = new Node(type, textarray, start, end, element, configuration.tt);
167         nodeList.addElement(node);
168         return node;
169     }
170
171     public Node cloneNode(Node node)
172     {
173         Node cnode = (Node)node.clone();
174         nodeList.addElement(cnode);
175         for (AttVal att = cnode.attributes; att != null; att = att.next) {
176             if (att.asp != null)
177                 nodeList.addElement(att.asp);
178             if (att.php != null)
179                 nodeList.addElement(att.php);
180         }
181         return cnode;
182     }
183
184     public AttVal cloneAttributes(AttVal attrs)
185     {
186         AttVal cattrs = (AttVal)attrs.clone();
187         for (AttVal att = cattrs; att != null; att = att.next) {
188             if (att.asp != null)
189                 nodeList.addElement(att.asp);
190             if (att.php != null)
191                 nodeList.addElement(att.php);
192         }
193         return cattrs;
194     }
195
196     protected void updateNodeTextArrays(byte[] oldtextarray, byte[] newtextarray)
197     {
198         Node node;
199         for (int i = 0; i < nodeList.size(); i++) {
200             node = (Node)(nodeList.elementAt(i));
201             if (node.textarray == oldtextarray)
202                 node.textarray = newtextarray;
203         }
204     }
205
206     /* used for creating preformatted text from Word2000 */
207     public Node newLineNode()
208     {
209         Node node = newNode();
210
211         node.textarray = this.lexbuf;
212         node.start = this.lexsize;
213         addCharToLexer((int)'\n');
214         node.end = this.lexsize;
215         return node;
216     }
217
218     // Should always be able convert to/from UTF-8, so encoding exceptions are
219
// converted to an Error to avoid adding throws declarations in
220
// lots of methods.
221

222     public static byte[] getBytes(String JavaDoc str) {
223         try {
224             return str.getBytes("UTF8");
225         } catch (java.io.UnsupportedEncodingException JavaDoc e) {
226             throw new Error JavaDoc("string to UTF-8 conversion failed: " + e.getMessage());
227         }
228     }
229
230     public static String JavaDoc getString(byte[] bytes, int offset, int length) {
231         try {
232             return new String JavaDoc(bytes, offset, length, "UTF8");
233         } catch (java.io.UnsupportedEncodingException JavaDoc e) {
234             throw new Error JavaDoc("UTF-8 to string conversion failed: " + e.getMessage());
235         }
236     }
237
238     public boolean endOfInput()
239     {
240         return this.in.isEndOfStream();
241     }
242
243     public void addByte(int c)
244     {
245         if (this.lexsize + 1 >= this.lexlength)
246         {
247             while (this.lexsize + 1 >= this.lexlength)
248             {
249                 if (this.lexlength == 0)
250                     this.lexlength = 8192;
251                 else
252                     this.lexlength = this.lexlength * 2;
253             }
254
255             byte[] temp = this.lexbuf;
256             this.lexbuf = new byte[ this.lexlength ];
257             if (temp != null)
258             {
259                 System.arraycopy( temp, 0, this.lexbuf, 0, temp.length );
260                 updateNodeTextArrays(temp, this.lexbuf);
261             }
262         }
263
264         this.lexbuf[this.lexsize++] = (byte)c;
265         this.lexbuf[this.lexsize] = (byte)'\0'; /* debug */
266     }
267
268     public void changeChar(byte c)
269     {
270         if (this.lexsize > 0)
271         {
272             this.lexbuf[this.lexsize-1] = c;
273         }
274     }
275
276     /* store char c as UTF-8 encoded byte stream */
277     public void addCharToLexer(int c)
278     {
279         if (c < 128)
280             addByte(c);
281         else if (c <= 0x7FF)
282         {
283             addByte(0xC0 | (c >> 6));
284             addByte(0x80 | (c & 0x3F));
285         }
286         else if (c <= 0xFFFF)
287         {
288             addByte(0xE0 | (c >> 12));
289             addByte(0x80 | ((c >> 6) & 0x3F));
290             addByte(0x80 | (c & 0x3F));
291         }
292         else if (c <= 0x1FFFFF)
293         {
294             addByte(0xF0 | (c >> 18));
295             addByte(0x80 | ((c >> 12) & 0x3F));
296             addByte(0x80 | ((c >> 6) & 0x3F));
297             addByte(0x80 | (c & 0x3F));
298         }
299         else
300         {
301             addByte(0xF8 | (c >> 24));
302             addByte(0x80 | ((c >> 18) & 0x3F));
303             addByte(0x80 | ((c >> 12) & 0x3F));
304             addByte(0x80 | ((c >> 6) & 0x3F));
305             addByte(0x80 | (c & 0x3F));
306         }
307     }
308
309     public void addStringToLexer(String JavaDoc str)
310     {
311         for ( int i = 0; i < str.length(); i++ ) {
312             addCharToLexer( (int)str.charAt(i) );
313         }
314     }
315
316     /*
317       No longer attempts to insert missing ';' for unknown
318       enitities unless one was present already, since this
319       gives unexpected results.
320
321       For example: <a HREF="something.htm?foo&bar&fred">
322       was tidied to: <a HREF="something.htm?foo&amp;bar;&amp;fred;">
323       rather than: <a HREF="something.htm?foo&amp;bar&amp;fred">
324
325       My thanks for Maurice Buxton for spotting this.
326     */

327     public void parseEntity(short mode)
328     {
329         short map;
330         int start;
331         boolean first = true;
332         boolean semicolon = false;
333         boolean numeric = false;
334         int c, ch, startcol;
335         String JavaDoc str;
336
337         start = this.lexsize - 1; /* to start at "&" */
338         startcol = this.in.curcol - 1;
339
340         while (true)
341         {
342             c = this.in.readChar();
343             if (c == StreamIn.EndOfStream) break;
344             if (c == ';')
345             {
346                 semicolon = true;
347                 break;
348             }
349
350             if (first && c == '#')
351             {
352                 addCharToLexer(c);
353                 first = false;
354                 numeric = true;
355                 continue;
356             }
357
358             first = false;
359             map = MAP((char)c);
360
361             /* AQ: Added flag for numeric entities so that numeric entities
362                with missing semi-colons are recognized.
363                Eg. "&#114e&#112;..." is recognized as "rep"
364             */

365             if (numeric && ((c == 'x') || ((map & DIGIT) != 0)))
366             {
367                 addCharToLexer(c);
368                 continue;
369             }
370             if (!numeric && ((map & NAMECHAR) != 0))
371             {
372                 addCharToLexer(c);
373                 continue;
374             }
375
376             /* otherwise put it back */
377
378             this.in.ungetChar(c);
379             break;
380         }
381
382         str = getString( this.lexbuf, start, this.lexsize - start );
383         ch = EntityTable.getDefaultEntityTable().entityCode( str );
384
385         /* deal with unrecognized entities */
386         if (ch <= 0)
387         {
388             /* set error position just before offending chararcter */
389             this.lines = this.in.curline;
390             this.columns = startcol;
391
392             if (this.lexsize > start +1 )
393             {
394                 Report.entityError(this, Report.UNKNOWN_ENTITY, str, ch);
395
396                 if (semicolon)
397                     addCharToLexer(';');
398             }
399             else /* naked & */
400             {
401                 Report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch);
402             }
403         }
404         else
405         {
406             if (c != ';') /* issue warning if not terminated by ';' */
407             {
408                 /* set error position just before offending chararcter */
409                 this.lines = this.in.curline;
410                 this.columns = startcol;
411                 Report.entityError(this, Report.MISSING_SEMICOLON, str, c);
412             }
413
414             this.lexsize = start;
415
416             if (ch == 160 && (mode & Preformatted) != 0)
417                 ch = ' ';
418
419             addCharToLexer(ch);
420
421             if (ch == '&' && !this.configuration.QuoteAmpersand)
422             {
423                 addCharToLexer('a');
424                 addCharToLexer('m');
425                 addCharToLexer('p');
426                 addCharToLexer(';');
427             }
428         }
429     }
430
431     public char parseTagName()
432     {
433         short map;
434         int c;
435
436         /* fold case of first char in buffer */
437
438         c = this.lexbuf[this.txtstart];
439         map = MAP((char)c);
440
441         if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
442         {
443             c += (int)((int)'a' - (int)'A');
444             this.lexbuf[this.txtstart] = (byte)c;
445         }
446
447         while (true)
448         {
449             c = this.in.readChar();
450             if (c == StreamIn.EndOfStream) break;
451             map = MAP((char)c);
452
453             if ((map & NAMECHAR) == 0)
454                 break;
455
456             /* fold case of subsequent chars */
457
458             if (!this.configuration.XmlTags && (map & UPPERCASE) != 0)
459                 c += (int)((int)'a' - (int)'A');
460
461             addCharToLexer(c);
462         }
463
464         this.txtend = this.lexsize;
465         return (char)c;
466     }
467
468     public void addStringLiteral(String JavaDoc str)
469     {
470         for ( int i = 0; i < str.length(); i++ ) {
471             addCharToLexer( (int)str.charAt(i) );
472         }
473     }
474
475     /* choose what version to use for new doctype */
476     public short HTMLVersion()
477     {
478         short versions;
479
480         versions = this.versions;
481
482         if ((versions & Dict.VERS_HTML20) != 0)
483             return Dict.VERS_HTML20;
484
485         if ((versions & Dict.VERS_HTML32) != 0)
486             return Dict.VERS_HTML32;
487
488         if ((versions & Dict.VERS_HTML40_STRICT) != 0)
489             return Dict.VERS_HTML40_STRICT;
490
491         if ((versions & Dict.VERS_HTML40_LOOSE) != 0)
492             return Dict.VERS_HTML40_LOOSE;
493
494         if ((versions & Dict.VERS_FRAMES) != 0)
495             return Dict.VERS_FRAMES;
496
497         return Dict.VERS_UNKNOWN;
498     }
499
500     public String JavaDoc HTMLVersionName()
501     {
502         short guessed;
503         int j;
504
505         guessed = apparentVersion();
506
507         for (j = 0; j < W3CVersion.length; ++j)
508         {
509             if (guessed == W3CVersion[j].code)
510             {
511                 if (this.isvoyager)
512                     return W3CVersion[j].voyagerName;
513
514                 return W3CVersion[j].name;
515             }
516         }
517
518         return null;
519     }
520
521     /* add meta element for Tidy */
522     public boolean addGenerator(Node root)
523     {
524         AttVal attval;
525         Node node;
526         Node head = root.findHEAD(configuration.tt);
527
528         if (head != null)
529         {
530             for (node = head.content; node != null; node = node.next)
531             {
532                 if (node.tag == configuration.tt.tagMeta)
533                 {
534                     attval = node.getAttrByName("name");
535
536                     if (attval != null && attval.value != null &&
537                         Lexer.wstrcasecmp(attval.value, "generator") == 0)
538                     {
539                         attval = node.getAttrByName("content");
540
541                         if (attval != null && attval.value != null &&
542                             attval.value.length() >= 9 &&
543                             Lexer.wstrcasecmp(attval.value.substring(0, 9), "HTML Tidy") == 0)
544                         {
545                             return false;
546                         }
547                     }
548                 }
549             }
550
551             node = this.inferredTag("meta");
552             node.addAttribute("content", "HTML Tidy, see www.w3.org");
553             node.addAttribute("name", "generator");
554             Node.insertNodeAtStart(head, node);
555             return true;
556         }
557
558         return false;
559     }
560
561     /* return true if substring s is in p and isn't all in upper case */
562     /* this is used to check the case of SYSTEM, PUBLIC, DTD and EN */
563     /* len is how many chars to check in p */
564     private static boolean findBadSubString(String JavaDoc s, String JavaDoc p, int len)
565     {
566         int n = s.length();
567         int i = 0;
568         String JavaDoc ps;
569
570         while (n < len)
571         {
572             ps = p.substring(i, i + n);
573             if (wstrcasecmp(s, ps) == 0)
574                 return (!ps.equals(s.substring(0, n)));
575
576             ++i;
577             --len;
578         }
579
580         return false;
581     }
582
583     public boolean checkDocTypeKeyWords(Node doctype)
584     {
585         int len = doctype.end - doctype.start;
586         String JavaDoc s = getString(this.lexbuf, doctype.start, len);
587
588         return !(
589             findBadSubString("SYSTEM", s, len) ||
590             findBadSubString("PUBLIC", s, len) ||
591             findBadSubString("//DTD", s, len) ||
592             findBadSubString("//W3C", s, len) ||
593             findBadSubString("//EN", s, len)
594             );
595     }
596
597     /* examine <!DOCTYPE> to identify version */
598     public short findGivenVersion(Node doctype)
599     {
600         String JavaDoc p, s;
601         int i, j;
602         int len;
603         String JavaDoc str1;
604         String JavaDoc str2;
605
606         /* if root tag for doctype isn't html give up now */
607         str1 = getString(this.lexbuf, doctype.start, 5);
608         if (wstrcasecmp(str1, "html ") != 0)
609             return 0;
610
611         if (!checkDocTypeKeyWords(doctype))
612             Report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
613
614         /* give up if all we are given is the system id for the doctype */
615         str1 = getString(this.lexbuf, doctype.start + 5, 7);
616         if (wstrcasecmp(str1, "SYSTEM ") == 0)
617         {
618             /* but at least ensure the case is correct */
619             if (!str1.substring(0, 6).equals("SYSTEM"))
620                 System.arraycopy( getBytes("SYSTEM"), 0,
621                                   this.lexbuf, doctype.start + 5, 6 );
622             return 0; /* unrecognized */
623         }
624
625         if (wstrcasecmp(str1, "PUBLIC ") == 0)
626         {
627             if (!str1.substring(0, 6).equals("PUBLIC"))
628                 System.arraycopy( getBytes("PUBLIC "), 0,
629                                   this.lexbuf, doctype.start + 5, 6 );
630         }
631         else
632             this.badDoctype = true;
633
634         for (i = doctype.start; i < doctype.end; ++i)
635         {
636             if (this.lexbuf[i] == (byte)'"')
637             {
638                 str1 = getString( this.lexbuf, i + 1, 12 );
639                 str2 = getString( this.lexbuf, i + 1, 13 );
640                 if (str1.equals("-//W3C//DTD "))
641                 {
642                     /* compute length of identifier e.g. "HTML 4.0 Transitional" */
643                     for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
644                     len = j - i - 13;
645                     p = getString( this.lexbuf, i + 13, len );
646
647                     for (j = 1; j < W3CVersion.length; ++j)
648                     {
649                         s = W3CVersion[j].name;
650                         if (len == s.length() && s.equals(p))
651                             return W3CVersion[j].code;
652                     }
653
654                     /* else unrecognized version */
655                 }
656                 else if (str2.equals("-//IETF//DTD "))
657                 {
658                     /* compute length of identifier e.g. "HTML 2.0" */
659                     for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte)'/'; ++j);
660                     len = j - i - 14;
661
662                     p = getString( this.lexbuf, i + 14, len );
663                     s = W3CVersion[0].name;
664                     if (len == s.length() && s.equals(p))
665                         return W3CVersion[0].code;
666
667                     /* else unrecognized version */
668                 }
669                 break;
670             }
671         }
672
673         return 0;
674     }
675
676     public void fixHTMLNameSpace(Node root, String JavaDoc profile)
677     {
678         Node node;
679         AttVal prev, attr;
680
681         for (node = root.content;
682                 node != null && node.tag != configuration.tt.tagHtml; node = node.next);
683
684         if (node != null)
685         {
686             prev = null;
687
688             for (attr = node.attributes; attr != null; attr = attr.next)
689             {
690                 if (attr.attribute.equals("xmlns"))
691                     break;
692
693                 prev = attr;
694             }
695
696             if (attr != null)
697             {
698                 if (!attr.value.equals(profile))
699                 {
700                     Report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE);
701                     attr.value = profile;
702                 }
703             }
704             else
705             {
706                 attr = new AttVal( node.attributes, null, (int)'"',
707                                    "xmlns", profile );
708                 attr.dict =
709                     AttributeTable.getDefaultAttributeTable().findAttribute( attr );
710                 node.attributes = attr;
711             }
712         }
713     }
714
715     public boolean setXHTMLDocType(Node root)
716     {
717         String JavaDoc fpi = " ";
718         String JavaDoc sysid = "";
719         String JavaDoc namespace = XHTML_NAMESPACE;
720         Node doctype;
721
722         doctype = root.findDocType();
723
724         if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT)
725         {
726             if (doctype != null)
727                 Node.discardElement(doctype);
728             return true;
729         }
730
731         if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO)
732         {
733             /* see what flavor of XHTML this document matches */
734             if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
735             { /* use XHTML strict */
736                 fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
737                 sysid = voyager_strict;
738             }
739             else if ((this.versions & Dict.VERS_LOOSE) != 0)
740             {
741                 fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
742                 sysid = voyager_loose;
743             }
744             else if ((this.versions & Dict.VERS_FRAMES) != 0)
745             { /* use XHTML frames */
746                 fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
747                 sysid = voyager_frameset;
748             }
749             else /* lets assume XHTML transitional */
750             {
751                 fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
752                 sysid = voyager_loose;
753             }
754         }
755         else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT)
756         {
757             fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
758             sysid = voyager_strict;
759         }
760         else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE)
761         {
762             fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
763             sysid = voyager_loose;
764         }
765
766         fixHTMLNameSpace(root, namespace);
767
768         if (doctype == null)
769         {
770             doctype = newNode(Node.DocTypeTag, this.lexbuf, 0, 0);
771             doctype.next = root.content;
772             doctype.parent = root;
773             doctype.prev = null;
774             root.content = doctype;
775         }
776
777         if (configuration.docTypeMode == Configuration.DOCTYPE_USER &&
778             configuration.docTypeStr != null)
779         {
780             fpi = configuration.docTypeStr;
781             sysid = "";
782         }
783
784         this.txtstart = this.lexsize;
785         this.txtend = this.lexsize;
786
787         /* add public identifier */
788         addStringLiteral("html PUBLIC ");
789
790         /* check if the fpi is quoted or not */
791         if (fpi.charAt(0) == '"')
792             addStringLiteral(fpi);
793         else
794         {
795             addStringLiteral("\"");
796             addStringLiteral(fpi);
797             addStringLiteral("\"");
798         }
799
800         if (sysid.length() + 6 >= this.configuration.wraplen)
801             addStringLiteral("\n\"");
802         else
803             addStringLiteral("\n \"");
804
805         /* add system identifier */
806         addStringLiteral(sysid);
807         addStringLiteral("\"");
808
809         this.txtend = this.lexsize;
810
811         doctype.start = this.txtstart;
812         doctype.end = this.txtend;
813
814         return false;
815     }
816
817     public short apparentVersion()
818     {
819         switch (this.doctype)
820         {
821         case Dict.VERS_UNKNOWN:
822             return HTMLVersion();
823
824         case Dict.VERS_HTML20:
825             if ((this.versions & Dict.VERS_HTML20) != 0)
826                 return Dict.VERS_HTML20;
827
828             break;
829
830         case Dict.VERS_HTML32:
831             if ((this.versions & Dict.VERS_HTML32) != 0)
832                 return Dict.VERS_HTML32;
833
834             break; /* to replace old version by new */
835
836         case Dict.VERS_HTML40_STRICT:
837             if ((this.versions & Dict.VERS_HTML40_STRICT) != 0)
838                 return Dict.VERS_HTML40_STRICT;
839
840             break;
841
842         case Dict.VERS_HTML40_LOOSE:
843             if ((this.versions & Dict.VERS_HTML40_LOOSE) != 0)
844                 return Dict.VERS_HTML40_LOOSE;
845
846             break; /* to replace old version by new */
847
848         case Dict.VERS_FRAMES:
849             if ((