KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > w3c > tidy > ParserImpl


1 /*
2  * @(#)ParserImpl.java 1.11 2000/08/16
3  *
4  */

5
6 package org.w3c.tidy;
7
8 /**
9  *
10  * HTML Parser implementation
11  *
12  * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13  * See Tidy.java for the copyright notice.
14  * Derived from <a HREF="http://www.w3.org/People/Raggett/tidy">
15  * HTML Tidy Release 4 Aug 2000</a>
16  *
17  * @author Dave Raggett <dsr@w3.org>
18  * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19  * @version 1.0, 1999/05/22
20  * @version 1.0.1, 1999/05/29
21  * @version 1.1, 1999/06/18 Java Bean
22  * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23  * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24  * @version 1.4, 1999/09/04 DOM support
25  * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26  * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27  * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28  * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29  * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30  * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31  * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
32  */

33
34 public class ParserImpl {
35
36     //private static int SeenBodyEndTag; /* AQ: moved into lexer structure */
37

38     private static void parseTag(Lexer lexer, Node node, short mode)
39     {
40         // Local fix by GLP 2000-12-21. Need to reset insertspace if this
41
// is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
42
// Remove this code once the fix is made in Tidy.
43

44 /****** (Original code follows)
45         if ((node.tag.model & Dict.CM_EMPTY) != 0)
46         {
47             lexer.waswhite = false;
48             return;
49         }
50         else if (!((node.tag.model & Dict.CM_INLINE) != 0))
51             lexer.insertspace = false;
52 *******/

53
54         if (!((node.tag.model & Dict.CM_INLINE) != 0))
55             lexer.insertspace = false;
56
57         if ((node.tag.model & Dict.CM_EMPTY) != 0)
58         {
59             lexer.waswhite = false;
60             return;
61         }
62
63         if (node.tag.parser == null || node.type == Node.StartEndTag)
64             return;
65
66         node.tag.parser.parse(lexer, node, mode);
67     }
68
69     private static void moveToHead(Lexer lexer, Node element, Node node)
70     {
71         Node head;
72         TagTable tt = lexer.configuration.tt;
73
74
75         if (node.type == Node.StartTag || node.type == Node.StartEndTag)
76         {
77             Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
78
79             while (element.tag != tt.tagHtml)
80                 element = element.parent;
81
82             for (head = element.content; head != null; head = head.next)
83             {
84                 if (head.tag == tt.tagHead)
85                 {
86                     Node.insertNodeAtEnd(head, node);
87                     break;
88                 }
89             }
90
91             if (node.tag.parser != null)
92                 parseTag(lexer, node, Lexer.IgnoreWhitespace);
93         }
94         else
95         {
96             Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
97         }
98     }
99
100     public static class ParseHTML implements Parser {
101
102         public void parse( Lexer lexer, Node html, short mode )
103         {
104             Node node, head;
105             Node frameset = null;
106             Node noframes = null;
107
108             lexer.configuration.XmlTags = false;
109             lexer.seenBodyEndTag = 0;
110             TagTable tt = lexer.configuration.tt;
111
112             for (;;)
113             {
114                 node = lexer.getToken(Lexer.IgnoreWhitespace);
115
116                 if (node == null)
117                 {
118                     node = lexer.inferredTag("head");
119                     break;
120                 }
121
122                 if (node.tag == tt.tagHead)
123                     break;
124
125                 if (node.tag == html.tag && node.type == Node.EndTag)
126                 {
127                     Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
128                     continue;
129                 }
130
131                 /* deal with comments etc. */
132                 if (Node.insertMisc(html, node))
133                     continue;
134
135                 lexer.ungetToken();
136                 node = lexer.inferredTag("head");
137                 break;
138             }
139
140             head = node;
141             Node.insertNodeAtEnd(html, head);
142             getParseHead().parse(lexer, head, mode);
143
144             for (;;)
145             {
146                 node = lexer.getToken(Lexer.IgnoreWhitespace);
147
148                 if (node == null)
149                 {
150                     if (frameset == null) /* create an empty body */
151                         node = lexer.inferredTag("body");
152
153                     return;
154                 }
155
156                 /* robustly handle html tags */
157                 if (node.tag == html.tag)
158                 {
159                     if (node.type != Node.StartTag && frameset == null)
160                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
161
162                     continue;
163                 }
164
165                 /* deal with comments etc. */
166                 if (Node.insertMisc(html, node))
167                     continue;
168
169                 /* if frameset document coerce <body> to <noframes> */
170                 if (node.tag == tt.tagBody)
171                 {
172                     if (node.type != Node.StartTag)
173                     {
174                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
175                         continue;
176                     }
177
178                     if (frameset != null)
179                     {
180                         lexer.ungetToken();
181
182                         if (noframes == null)
183                         {
184                             noframes = lexer.inferredTag("noframes");
185                             Node.insertNodeAtEnd(frameset, noframes);
186                             Report.warning(lexer, html, noframes, Report.INSERTING_TAG);
187                         }
188
189                         parseTag(lexer, noframes, mode);
190                         continue;
191                     }
192
193                     break; /* to parse body */
194                 }
195
196                 /* flag an error if we see more than one frameset */
197                 if (node.tag == tt.tagFrameset)
198                 {
199                     if (node.type != Node.StartTag)
200                     {
201                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
202                         continue;
203                     }
204
205                     if (frameset != null)
206                         Report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
207                     else
208                         frameset = node;
209
210                     Node.insertNodeAtEnd(html, node);
211                     parseTag(lexer, node, mode);
212
213                     /*
214                       see if it includes a noframes element so
215                       that we can merge subsequent noframes elements
216                     */

217
218                     for (node = frameset.content; node != null; node = node.next)
219                     {
220                         if (node.tag == tt.tagNoframes)
221                             noframes = node;
222                     }
223                     continue;
224                 }
225
226                 /* if not a frameset document coerce <noframes> to <body> */
227                 if (node.tag == tt.tagNoframes)
228                 {
229                     if (node.type != Node.StartTag)
230                     {
231                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
232                         continue;
233                     }
234
235                     if (frameset == null)
236                     {
237                         Report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
238                         node = lexer.inferredTag("body");
239                         break;
240                     }
241
242                     if (noframes == null)
243                     {
244                         noframes = node;
245                         Node.insertNodeAtEnd(frameset, noframes);
246                     }
247
248                     parseTag(lexer, noframes, mode);
249                     continue;
250                 }
251
252                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
253                 {
254                     if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
255                     {
256                         moveToHead(lexer, html, node);
257                         continue;
258                     }
259                 }
260
261                 lexer.ungetToken();
262
263                 /* insert other content into noframes element */
264
265                 if (frameset != null)
266                 {
267                     if (noframes == null)
268                     {
269                         noframes = lexer.inferredTag("noframes");
270                         Node.insertNodeAtEnd(frameset, noframes);
271                     }
272                     else
273                         Report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
274
275                     parseTag(lexer, noframes, mode);
276                     continue;
277                 }
278
279                 node = lexer.inferredTag("body");
280                 break;
281             }
282
283             /* node must be body */
284
285             Node.insertNodeAtEnd(html, node);
286             parseTag(lexer, node, mode);
287         }
288
289     };
290
291     public static class ParseHead implements Parser {
292
293         public void parse( Lexer lexer, Node head, short mode )
294         {
295             Node node;
296             int HasTitle = 0;
297             int HasBase = 0;
298             TagTable tt = lexer.configuration.tt;
299
300             while (true)
301             {
302                 node = lexer.getToken(Lexer.IgnoreWhitespace);
303                 if (node == null) break;
304                 if (node.tag == head.tag && node.type == Node.EndTag)
305                 {
306                     head.closed = true;
307                     break;
308                 }
309
310                 if (node.type == Node.TextNode)
311                 {
312                     lexer.ungetToken();
313                     break;
314                 }
315
316                 /* deal with comments etc. */
317                 if (Node.insertMisc(head, node))
318                     continue;
319
320                 if (node.type == Node.DocTypeTag)
321                 {
322                     Node.insertDocType(lexer, head, node);
323                     continue;
324                 }
325
326                 /* discard unknown tags */
327                 if (node.tag == null)
328                 {
329                     Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
330                     continue;
331                 }
332         
333                 if (!((node.tag.model & Dict.CM_HEAD) != 0))
334                 {
335                     lexer.ungetToken();
336                     break;
337                 }
338
339                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
340                 {
341                     if (node.tag == tt.tagTitle)
342                     {
343                         ++HasTitle;
344
345                         if (HasTitle > 1)
346                             Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
347                     }
348                     else if (node.tag == tt.tagBase)
349                     {
350                         ++HasBase;
351
352                         if (HasBase > 1)
353                             Report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
354                     }
355                     else if (node.tag == tt.tagNoscript)
356                         Report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
357
358                     Node.insertNodeAtEnd(head, node);
359                     parseTag(lexer, node, Lexer.IgnoreWhitespace);
360                     continue;
361                 }
362
363                 /* discard unexpected text nodes and end tags */
364                 Report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
365             }
366
367             if (HasTitle == 0)
368             {
369                 Report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
370                 Node.insertNodeAtEnd(head, lexer.inferredTag( "title"));
371             }
372         }
373
374     };
375
376     public static class ParseTitle implements Parser {
377
378         public void parse( Lexer lexer, Node title, short mode )
379         {
380             Node node;
381
382             while (true)
383             {
384                 node = lexer.getToken(Lexer.MixedContent);
385                 if (node == null) break;
386                 if (node.tag == title.tag && node.type == Node.EndTag)
387                 {
388                     title.closed = true;
389                     Node.trimSpaces(lexer, title);
390                     return;
391                 }
392
393                 if (node.type == Node.TextNode)
394                 {
395                     /* only called for 1st child */
396                     if (title.content == null)
397                         Node.trimInitialSpace(lexer, title, node);
398
399                     if (node.start >= node.end)
400                     {
401                         continue;
402                     }
403
404                     Node.insertNodeAtEnd(title, node);
405                     continue;
406                 }
407
408                 /* deal with comments etc. */
409                 if (Node.insertMisc(title, node))
410                     continue;
411
412                 /* discard unknown tags */
413                 if (node.tag == null)
414                 {
415                     Report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
416                     continue;
417                 }
418
419                 /* pushback unexpected tokens */
420                 Report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
421                 lexer.ungetToken();
422                 Node.trimSpaces(lexer, title);
423                 return;
424             }
425
426             Report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
427         }
428
429     };
430
431     public static class ParseScript implements Parser {
432
433         public void parse( Lexer lexer, Node script, short mode )
434         {
435         /*
436           This isn't quite right for CDATA content as it recognises
437           tags within the content and parses them accordingly.
438           This will unfortunately screw up scripts which include
439           < + letter, < + !, < + ? or < + / + letter
440         */

441
442             Node node;
443
444             node = lexer.getCDATA( script);
445
446             if (node != null)
447                 Node.insertNodeAtEnd(script, node);
448         }
449
450     };
451
452     public static class ParseBody implements Parser {
453
454         public void parse( Lexer lexer, Node body, short mode )
455         {
456             Node node;
457             boolean checkstack, iswhitenode;
458
459             mode = Lexer.IgnoreWhitespace;
460             checkstack = true;
461             TagTable tt = lexer.configuration.tt;
462
463             while (true)
464             {
465                 node = lexer.getToken(mode);
466                 if (node == null) break;
467                 if (node.tag == body.tag && node.type == Node.EndTag)
468                 {
469                     body.closed = true;
470                     Node.trimSpaces(lexer, body);
471                     lexer.seenBodyEndTag = 1;
472                     mode = Lexer.IgnoreWhitespace;
473
474                     if (body.parent.tag == tt.tagNoframes)
475                         break;
476
477                     continue;
478                 }
479         
480                 if (node.tag == tt.tagNoframes)
481                 {
482                     if (node.type == Node.StartTag)
483                     {
484                         Node.insertNodeAtEnd(body, node);
485                         getParseBlock().parse(lexer, node, mode);
486                         continue;
487                     }
488
489                     if (node.type == Node.EndTag &&
490                         body.parent.tag == tt.tagNoframes)
491                     {
492                         Node.trimSpaces(lexer, body);
493                         lexer.ungetToken();
494                         break;
495                     }
496                 }
497
498                 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)
499                     && body.parent.tag == tt.tagNoframes)
500                 {
501                     Node.trimSpaces(lexer, body);
502                     lexer.ungetToken();
503                     break;
504                 }
505         
506                 if (node.tag == tt.tagHtml)
507                 {
508                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
509                         Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
510
511                     continue;
512                 }
513
514                 iswhitenode = false;
515
516                 if (node.type == Node.TextNode &&
517                        node.end <= node.start + 1 &&
518                        node.textarray[node.start] == (byte)' ')
519                     iswhitenode = true;
520
521                 /* deal with comments etc. */
522                 if (Node.insertMisc(body, node))
523                     continue;
524
525                 if (lexer.seenBodyEndTag == 1 && !iswhitenode)
526                 {
527                     ++lexer.seenBodyEndTag;
528                     Report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
529                 }
530
531                 /* mixed content model permits text */
532                 if (node.type == Node.TextNode)
533                 {
534                     if (iswhitenode && mode == Lexer.IgnoreWhitespace)
535                     {
536                         continue;
537                     }
538
539                     if (lexer.configuration.EncloseBodyText && !iswhitenode)
540                     {
541                         Node para;
542                 
543                         lexer.ungetToken();
544                         para = lexer.inferredTag("p");
545                         Node.insertNodeAtEnd(body, para);
546                         parseTag(lexer, para, mode);
547                         mode = Lexer.MixedContent;
548                         continue;
549                     }
550                     else /* strict doesn't allow text here */
551                         lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
552
553                     if (checkstack)
554                     {
555                         checkstack = false;
556
557                         if (lexer.inlineDup( node) > 0)
558                             continue;
559                     }
560
561                     Node.insertNodeAtEnd(body, node);
562                     mode = Lexer.MixedContent;
563                     continue;
564                 }
565
566                 if (node.type == Node.DocTypeTag)
567                 {
568                     Node.insertDocType(lexer, body, node);
569                     continue;
570                 }
571                 /* discard unknown and PARAM tags */
572                 if (node.tag == null || node.tag == tt.tagParam)
573                 {
574                     Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
575                     continue;
576                 }
577
578                 /*
579                   Netscape allows LI and DD directly in BODY
580                   We infer UL or DL respectively and use this
581                   boolean to exclude block-level elements so as
582                   to match Netscape's observed behaviour.
583                 */

584                 lexer.excludeBlocks = false;
585         
586                 if (!((node.tag.model & Dict.CM_BLOCK) != 0) &&
587                     !((node.tag.model & Dict.CM_INLINE) != 0))
588                 {
589                     /* avoid this error message being issued twice */
590                     if (!((node.tag.model & Dict.CM_HEAD) != 0))
591                         Report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
592
593                     if ((node.tag.model & Dict.CM_HTML) != 0)
594                     {
595                         /* copy body attributes if current body was inferred */
596                         if (node.tag == tt.tagBody && body.implicit
597                                             && body.attributes == null)
598                         {
599                             body.attributes = node.attributes;
600                             node.attributes = null;
601                         }
602
603                         continue;
604                     }
605
606                     if ((node.tag.model & Dict.CM_HEAD) != 0)
607                     {
608                         moveToHead(lexer, body, node);
609                         continue;
610                     }
611
612                     if ((node.tag.model & Dict.CM_LIST) != 0)
613                     {
614                         lexer.ungetToken();
615                         node = lexer.inferredTag( "ul");
616                         Node.addClass(node, "noindent");
617                         lexer.excludeBlocks = true;
618                     }
619                     else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
620                     {
621                         lexer.ungetToken();
622                         node = lexer.inferredTag( "dl");
623                         lexer.excludeBlocks = true;
624                     }
625                     else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
626                     {
627                         lexer.ungetToken();
628                         node = lexer.inferredTag( "table");
629                         lexer.excludeBlocks = true;
630                     }
631                     else
632                     {
633                         /* AQ: The following line is from the official C
634                            version of tidy. It doesn't make sense to me
635                            because the '!' operator has higher precedence
636                            than the '&' operator. It seems to me that the
637                            expression always evaluates to 0.
638
639                            if (!node->tag->model & (CM_ROW | CM_FIELD))
640
641                            AQ: 13Jan2000 fixed in C tidy
642                         */

643                         if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
644                         {
645                             lexer.ungetToken();
646                             return;
647                         }
648
649                         /* ignore </td> </th> <option> etc. */
650                         continue;
651                     }
652                 }
653
654                 if (node.type == Node.EndTag)
655                 {
656                     if (node.tag == tt.tagBr)
657                         node.type = Node.StartTag;
658                     else if (node.tag == tt.tagP)
659                     {
660                         Node.coerceNode(lexer, node, tt.tagBr);
661                         Node.insertNodeAtEnd(body, node);
662                         node = lexer.inferredTag("br");
663                     }
664                     else if ((node.tag.model & Dict.CM_INLINE) != 0)
665                         lexer.popInline(node);
666                 }
667
668                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
669                 {
670                     if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
671                     {
672                         /* HTML4 strict doesn't allow inline content here */
673                         /* but HTML2 does allow img elements as children of body */
674                         if (node.tag == tt.tagImg)
675                             lexer.versions &= ~Dict.VERS_HTML40_STRICT;
676                         else
677                             lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
678
679                         if (checkstack && !node.implicit)
680                         {
681                             checkstack = false;
682
683                             if (lexer.inlineDup( node) > 0)
684                                 continue;
685                         }
686
687                         mode = Lexer.MixedContent;
688                     }
689                     else
690                     {
691                         checkstack = true;
692                         mode = Lexer.IgnoreWhitespace;
693                     }
694
695                     if (node.implicit)
696                         Report.warning(lexer, body, node, Report.INSERTING_TAG);
697
698                     Node.insertNodeAtEnd(body, node);
699                     parseTag(lexer, node, mode);
700                     continue;
701                 }
702
703                 /* discard unexpected tags */
704                 Report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
705             }
706         }
707
708     };
709
710     public static class ParseFrameSet implements Parser {
711
712         public void parse( Lexer lexer, Node frameset, short mode )
713         {
714             Node node;
715             TagTable tt = lexer.configuration.tt;
716
717             lexer.badAccess |= Report.USING_FRAMES;
718
719             while (true)
720             {
721                 node = lexer.getToken(Lexer.IgnoreWhitespace);
722                 if (node == null) break;
723                 if (node.tag == frameset.tag && node.type == Node.EndTag)
724                 {
725                     frameset.closed = true;
726                     Node.trimSpaces(lexer, frameset);
727                     return;
728                 }
729
730                 /* deal with comments etc. */
731                 if (Node.insertMisc(frameset, node))
732                     continue;
733
734                 if (node.tag == null)
735                 {
736                     Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
737                     continue;
738                 }
739
740                 if (node.type == Node.StartTag || node.type == Node.StartEndTag)
741                 {
742                     if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
743                     {
744                         moveToHead(lexer, frameset, node);
745                         continue;
746                     }
747                 }
748
749                 if (node.tag == tt.tagBody)
750                 {
751                     lexer.ungetToken();
752                     node = lexer.inferredTag("noframes");
753                     Report.warning(lexer, frameset, node, Report.INSERTING_TAG);
754                 }
755
756                 if (node.type == Node.StartTag && (node.tag.model & Dict.CM_FRAMES) != 0)
757                 {
758                     Node.insertNodeAtEnd(frameset, node);
759                     lexer.excludeBlocks = false;
760                     parseTag(lexer, node, Lexer.MixedContent);
761                     continue;
762                 }
763                 else if (node.type == Node.StartEndTag && (node.tag.model & Dict.CM_FRAMES) != 0)
764                 {
765                     Node.insertNodeAtEnd(frameset, node);
766                     continue;
767                 }
768
769                 /* discard unexpected tags */
770                 Report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
771             }
772
773             Report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
774         }
775
776     };
777
778     public static class ParseInline implements Parser {
779
780         public void parse( Lexer lexer, Node element, short mode )
781         {
782             Node node, parent;
783             TagTable tt = lexer.configuration.tt;
784
785             if ((element.tag.model & Dict.CM_EMPTY) != 0)
786                 return;
787
788             if (element.tag == tt.tagA)
789             {
790                 if (element.attributes == null)
791                 {
792                     Report.warning(lexer, element.parent, element, Report.DISCARDING_UNEXPECTED);
793                     Node.discardElement(element);
794                     return;
795                 }
796             }
797
798             /*
799              ParseInline is used for some block level elements like H1 to H6
800              For such elements we need to insert inline emphasis tags currently
801              on the inline stack. For Inline elements, we normally push them
802              onto the inline stack provided they aren't implicit or OBJECT/APPLET.
803              This test is carried out in PushInline and PopInline, see istack.c
804              We don't push A or SPAN to replicate current browser behavior
805             */

806             if (((element.tag.model & Dict.CM_BLOCK) != 0) || (element.tag == tt.tagDt))
807                 lexer.inlineDup( null);
808             else if ((element.tag.model & Dict.CM_INLINE) != 0 &&
809                         element.tag != tt.tagA && element.tag != tt.tagSpan)
810                 lexer.pushInline( element);
811
812             if (element.tag == tt.tagNobr)
813                 lexer.badLayout |= Report.USING_NOBR;
814             else if (element.tag == tt.tagFont)
815                 lexer.badLayout |= Report.USING_FONT;
816
817             /* Inline elements may or may not be within a preformatted element */
818             if (mode != Lexer.Preformatted)
819                 mode = Lexer.MixedContent;
820
821             while (true)
822             {
823                 node = lexer.getToken(mode);
824                 if (node == null) break;
825                 /* end tag for current element */
826                 if (node.tag == element.tag && node.type == Node.EndTag)
827                 {
828                     if ((element.tag.model & Dict.CM_INLINE) != 0 &&
829                         element.tag != tt.tagA)
830                         lexer.popInline( node);
831
832                     if (!((mode & Lexer.Preformatted) != 0))
833                         Node.trimSpaces(lexer, element);
834                     /*
835                      if a font element wraps an anchor and nothing else
836                      then move the font element inside the anchor since
837                      otherwise it won't alter the anchor text color
838                     */

839                     if (element.tag == tt.tagFont &&
840                         element.content != null &&
841                         element.content == element.last)
842                     {
843                         Node child = element.content;
844
845                         if (child.tag == tt.tagA)
846                         {
847                             child.parent = element.parent;
848                             child.next = element.next;
849                             child.prev = element.prev;
850
851                             if (child.prev != null)
852                                 child.prev.next = child;
853                             else
854                                 child.parent.content = child;
855
856                             if (child.next != null)
857                                 child.next.prev = child;
858                             else
859                                 child.parent.last = child;
860
861                             element.next = null;
862                             element.prev = null;
863                             element.parent = child;
864                             element.content = child.content;
865                             element.last = child.last;
866                             child.content = element;
867                             child.last = element;
868                             for (child = element.content; child != null; child = child.next)
869                                 child.parent = element;
870                         }
871                     }
872                     element.closed = true;
873                     Node.trimSpaces(lexer, element);
874                     Node.trimEmptyElement(lexer, element);
875                     return;
876                 }
877
878                 /* <u>...<u> map 2nd <u> to </u> if 1st is explicit */
879                 /* otherwise emphasis nesting is probably unintentional */
880                 /* big and small have cumulative effect to leave them alone */
881                 if (node.type == Node.StartTag
882                         && node.tag == element.tag
883                         && lexer.isPushed(node)
884                         && !node.implicit
885                         && !element.implicit
886                         && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0)
887                         && node.tag != tt.tagA
888                         && node.tag != tt.tagFont
889                         && node.tag != tt.tagBig
890                         && node.tag != tt.tagSmall)
891                 {
892                     if (element.content != null && node.attributes == null)
893                     {
894                         Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
895                         node.type = Node.EndTag;
896                         lexer.ungetToken();
897                         continue;
898                     }
899
900                     Report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
901                 }
902
903                 if (node.type == Node.TextNode)
904                 {
905                     /* only called for 1st child */
906                     if (element.content == null &&
907                         !((mode & Lexer.Preformatted) != 0))
908                         Node.trimSpaces(lexer, element);
909
910                     if (node.start >= node.end)
911                     {
912                         continue;
913                     }
914
915                     Node.insertNodeAtEnd(element, node);
916                     continue;
917                 }
918
919                 /* mixed content model so allow text */
920                 if (Node.insertMisc(element, node))
921                     continue;
922
923                 /* deal with HTML tags */
924                 if (node.tag == tt.tagHtml)
925                 {
926                     if (node.type == Node.StartTag || node.type == Node.StartEndTag)
927                     {
928                         Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
929                         continue;
930                     }
931
932                     /* otherwise infer end of inline element */
933                     lexer.ungetToken();
934                     if (!((mode & Lexer.Preformatted) != 0))
935                         Node.trimSpaces(lexer, element);
936                     Node.trimEmptyElement(lexer, element);
937                     return;
938                 }
939
940                 /* within <dt> or <pre> map <p> to <br> */
941                 if (node.tag == tt.tagP &&
942                       node.type == Node.StartTag &&
943                       ((mode & Lexer.Preformatted) != 0 ||
944                        element.tag == tt.tagDt ||
945                       element.isDescendantOf(tt.tagDt)))
946                 {
947                     node.tag = tt.tagBr;
948                     node.element = "br";
949                     Node.trimSpaces(lexer, element);
950                     Node.insertNodeAtEnd(element, node);
951                     continue;
952                 }
953
954                 /* ignore unknown and PARAM tags */
955                 if (node.tag == null || node.tag == tt.tagParam)
956                 {
957                     Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
958                     continue;
959                 }
960
961                 if (node.tag == tt.tagBr && node.type == Node.EndTag)
962                     node.type = Node.StartTag;
963
964                 if (node.type == Node.EndTag)
965                 {
966                     /* coerce </br> to <br> */
967                     if (node.tag == tt.tagBr)
968                         node.type = Node.StartTag;
969                     else if (node.tag == tt.tagP)
970                     {
971                         /* coerce unmatched </p> to <br><br> */
972                         if (!element.isDescendantOf(tt.tagP))
973                         {
974                             Node.coerceNode(lexer, node, tt.tagBr);
975                             Node.trimSpaces(lexer, element);
976                             Node.insertNodeAtEnd(element, node);
977                             node = lexer.inferredTag("br");
978                             continue;
979                         }
980                     }
981                     else if ((node.tag.model & Dict.CM_INLINE) != 0
982                                 && node.tag != tt.tagA
983                                         && !((node.tag.model & Dict.CM_OBJECT) != 0)
984                                         && (element.tag.model & Dict.CM_INLINE) != 0)
985                     {
986                         /* allow any inline end tag to end current element */
987                         lexer.popInline( element);
988
989                         if (element.tag != tt.tagA)
990                         {
991                             if (node.tag == tt.tagA && node.tag != element.tag)
992                             {
993                                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
994                                lexer.ungetToken();
995                             }
996                             else
997                             {
998                                 Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
999                             }
1000
1001                            if (!((mode & Lexer.Preformatted) != 0))
1002                                Node.trimSpaces(lexer, element);
1003                            Node.trimEmptyElement(lexer, element);
1004                            return;
1005                        }
1006
1007                        /* if parent is <a> then discard unexpected inline end tag */
1008                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1009                        continue;
1010                    } /* special case </tr> etc. for stuff moved in front of table */
1011                    else if (lexer.exiled
1012                                && node.tag.model != 0
1013                                && (node.tag.model & Dict.CM_TABLE) != 0)
1014                    {
1015                        lexer.ungetToken();
1016                        Node.trimSpaces(lexer, element);
1017                        Node.trimEmptyElement(lexer, element);
1018                        return;
1019                    }
1020                }
1021
1022                /* allow any header tag to end current header */
1023                if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
1024                {
1025                    if (node.tag == element.tag)
1026                    {
1027                        Report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1028                    }
1029                    else
1030                    {
1031                        Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1032                        lexer.ungetToken();
1033                    }
1034                    if (!((mode & Lexer.Preformatted) != 0))
1035                        Node.trimSpaces(lexer, element);
1036                    Node.trimEmptyElement(lexer, element);
1037                    return;
1038                }
1039
1040                /*
1041                   an <A> tag to ends any open <A> element
1042                   but <A HREF=...> is mapped to </A><A HREF=...>
1043                */

1044                if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
1045                {
1046                 /* coerce <a> to </a> unless it has some attributes */
1047                    if (node.attributes == null)
1048                    {
1049                        node.type = Node.EndTag;
1050                        Report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
1051                        lexer.popInline( node);
1052                        lexer.ungetToken();
1053                        continue;
1054                    }
1055
1056                    lexer.ungetToken();
1057                    Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1058                    lexer.popInline( element);
1059                    if (!((mode & Lexer.Preformatted) != 0))
1060                        Node.trimSpaces(lexer, element);
1061                    Node.trimEmptyElement(lexer, element);
1062                    return;
1063                }
1064
1065                if ((element.tag.model & Dict.CM_HEADING) != 0)
1066                {
1067                    if (node.tag == tt.tagCenter ||
1068                        node.tag == tt.tagDiv)
1069                    {
1070                        if (node.type != Node.StartTag &&
1071                            node.type != Node.StartEndTag)
1072                        {
1073                            Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1074                            continue;
1075                        }
1076
1077                        Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1078
1079                        /* insert center as parent if heading is empty */
1080                        if (element.content == null)
1081                        {
1082                            Node.insertNodeAsParent(element, node);
1083                            continue;
1084                        }
1085
1086                        /* split heading and make center parent of 2nd part */
1087                        Node.insertNodeAfterElement(element, node);
1088
1089                        if (!((mode & Lexer.Preformatted) != 0))
1090                            Node.trimSpaces(lexer, element);
1091
1092                        element = lexer.cloneNode(element);
1093                        element.start = lexer.lexsize;
1094                        element.end = lexer.lexsize;
1095                        Node.insertNodeAtEnd(node, element);
1096                        continue;
1097                    }
1098
1099                    if (node.tag == tt.tagHr)
1100                    {
1101                        if (node.type != Node.StartTag &&
1102                            node.type != Node.StartEndTag)
1103                        {
1104                            Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1105                            continue;
1106                        }
1107
1108                        Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1109
1110                        /* insert hr before heading if heading is empty */
1111                        if (element.content == null)
1112                        {
1113                            Node.insertNodeBeforeElement(element, node);
1114                            continue;
1115                        }
1116
1117                        /* split heading and insert hr before 2nd part */
1118                        Node.insertNodeAfterElement(element, node);
1119
1120                        if (!((mode & Lexer.Preformatted) != 0))
1121                            Node.trimSpaces(lexer, element);
1122
1123                        element = lexer.cloneNode(element);
1124                        element.start = lexer.lexsize;
1125                        element.end = lexer.lexsize;
1126                        Node.insertNodeAfterElement(node, element);
1127                        continue;
1128                    }
1129                }
1130
1131                if (element.tag == tt.tagDt)
1132                {
1133                    if (node.tag == tt.tagHr)
1134                    {
1135                        Node dd;
1136
1137                        if (node.type != Node.StartTag &&
1138                            node.type != Node.StartEndTag)
1139                        {
1140                            Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1141                            continue;
1142                        }
1143
1144                        Report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1145                        dd = lexer.inferredTag("dd");
1146
1147                        /* insert hr within dd before dt if dt is empty */
1148                        if (element.content == null)
1149                        {
1150                            Node.insertNodeBeforeElement(element, dd);
1151                            Node.insertNodeAtEnd(dd, node);
1152                            continue;
1153                        }
1154
1155                        /* split dt and insert hr within dd before 2nd part */
1156                        Node.insertNodeAfterElement(element, dd);
1157                        Node.insertNodeAtEnd(dd, node);
1158
1159                        if (!((mode & Lexer.Preformatted) != 0))
1160                            Node.trimSpaces(lexer, element);
1161
1162                        element = lexer.cloneNode(element);
1163                        element.start = lexer.lexsize;
1164                        element.end = lexer.lexsize;
1165                        Node.insertNodeAfterElement(dd, element);
1166                        continue;
1167                    }
1168                }
1169
1170
1171                /*
1172                  if this is the end tag for an ancestor element
1173                  then infer end tag for this element
1174                */

1175                if (node.type == Node.EndTag)
1176                {
1177                    for (parent = element.parent;
1178                            parent != null; parent = parent.parent)
1179                    {
1180                        if (node.tag == parent.tag)
1181                        {
1182                            if (!((element.tag.model & Dict.CM_OPT) != 0) &&
1183                                !element.implicit)
1184                                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1185
1186                            if (element.tag == tt.tagA)
1187                                lexer.popInline(element);
1188
1189                            lexer.ungetToken();
1190
1191                            if (!((mode & Lexer.Preformatted) != 0))
1192                                Node.trimSpaces(lexer, element);
1193
1194                            Node.trimEmptyElement(lexer, element);
1195                            return;
1196                        }
1197                    }
1198                }
1199
1200                /* block level tags end this element */
1201                if (!((node.tag.model & Dict.CM_INLINE) != 0))
1202                {
1203                    if (node.type != Node.StartTag)
1204                    {
1205                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1206                        continue;
1207                    }
1208
1209                    if (!((element.tag.model & Dict.CM_OPT) != 0))
1210                        Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1211
1212                    if ((node.tag.model & Dict.CM_HEAD) != 0 &&
1213                        !((node.tag.model & Dict.CM_BLOCK) != 0))
1214                    {
1215                        moveToHead(lexer, element, node);
1216                        continue;
1217                    }
1218
1219                    /*
1220                       prevent anchors from propagating into block tags
1221                       except for headings h1 to h6
1222                    */

1223                    if (element.tag == tt.tagA)
1224                    {
1225                        if (node.tag != null &&
1226                            !((node.tag.model & Dict.CM_HEADING) != 0))
1227                            lexer.popInline(element);
1228                        else if (!(element.content != null))
1229                        {
1230                            Node.discardElement(element);
1231                            lexer.ungetToken();
1232                            return;
1233                        }
1234                    }
1235
1236                    lexer.ungetToken();
1237
1238                    if (!((mode & Lexer.Preformatted) != 0))
1239                        Node.trimSpaces(lexer, element);
1240
1241                    Node.trimEmptyElement(lexer, element);
1242                    return;
1243                }
1244
1245                /* parse inline element */
1246                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1247                {
1248                    if (node.implicit)
1249                        Report.warning(lexer, element, node, Report.INSERTING_TAG);
1250
1251                    /* trim white space before <br> */
1252                    if (node.tag == tt.tagBr)
1253                        Node.trimSpaces(lexer, element);
1254            
1255                    Node.insertNodeAtEnd(element, node);
1256                    parseTag(lexer, node, mode);
1257                    continue;
1258                }
1259
1260                /* discard unexpected tags */
1261                Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1262            }
1263
1264            if (!((element.tag.model & Dict.CM_OPT) != 0))
1265                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
1266
1267            Node.trimEmptyElement(lexer, element);
1268        }
1269    };
1270
1271    public static class ParseList implements Parser {
1272
1273        public void parse( Lexer lexer, Node list, short mode )
1274        {
1275            Node node;
1276            Node parent;
1277            TagTable tt = lexer.configuration.tt;
1278
1279            if ((list.tag.model & Dict.CM_EMPTY) != 0)
1280                return;
1281
1282            lexer.insert = -1; /* defer implicit inline start tags */
1283
1284            while (true)
1285            {
1286                node = lexer.getToken(Lexer.IgnoreWhitespace);
1287                if (node == null) break;
1288
1289                if (node.tag == list.tag && node.type == Node.EndTag)
1290                {
1291                    if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1292                        Node.coerceNode(lexer, list, tt.tagUl);
1293
1294                    list.closed = true;
1295                    Node.trimEmptyElement(lexer, list);
1296                    return;
1297                }
1298
1299                /* deal with comments etc. */
1300                if (Node.insertMisc(list, node))
1301                    continue;
1302
1303                if (node.type != Node.TextNode && node.tag == null)
1304                {
1305                    Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1306                    continue;
1307                }
1308
1309                /*
1310                  if this is the end tag for an ancestor element
1311                  then infer end tag for this element
1312                */

1313                if (node.type == Node.EndTag)
1314                {
1315                    if (node.tag == tt.tagForm)
1316                    {
1317                        lexer.badForm = 1;
1318                        Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1319                        continue;
1320                    }
1321
1322                    if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
1323                    {
1324                        Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1325                        lexer.popInline(node);
1326                        continue;
1327                    }
1328
1329                    for (parent = list.parent;
1330                            parent != null; parent = parent.parent)
1331                    {
1332                        if (node.tag == parent.tag)
1333                        {
1334                            Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1335                            lexer.ungetToken();
1336
1337                            if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1338                                Node.coerceNode(lexer, list, tt.tagUl);
1339
1340                            Node.trimEmptyElement(lexer, list);
1341                            return;
1342                        }
1343                    }
1344
1345                    Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1346                    continue;
1347                }
1348
1349                if (node.tag != tt.tagLi)
1350                {
1351                    lexer.ungetToken();
1352
1353                    if (node.tag != null &&
1354                        (node.tag.model & Dict.CM_BLOCK) != 0 &&
1355                        lexer.excludeBlocks)
1356                    {
1357                        Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1358                        Node.trimEmptyElement(lexer, list);
1359                        return;
1360                    }
1361
1362                    node = lexer.inferredTag("li");
1363                    node.addAttribute("style", "list-style: none");
1364                    Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1365                }
1366
1367                /* node should be <LI> */
1368                Node.insertNodeAtEnd(list, node);
1369                parseTag(lexer, node, Lexer.IgnoreWhitespace);
1370            }
1371
1372            if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1373                Node.coerceNode(lexer, list, tt.tagUl);
1374
1375            Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1376            Node.trimEmptyElement(lexer, list);
1377        }
1378
1379    };
1380
1381    public static class ParseDefList implements Parser {
1382
1383        public void parse( Lexer lexer, Node list, short mode )
1384        {
1385            Node node, parent;
1386            TagTable tt = lexer.configuration.tt;
1387
1388            if ((list.tag.model & Dict.CM_EMPTY) != 0)
1389                return;
1390
1391            lexer.insert = -1; /* defer implicit inline start tags */
1392
1393            while (true)
1394            {
1395                node = lexer.getToken(Lexer.IgnoreWhitespace);
1396                if (node == null) break;
1397                if (node.tag == list.tag && node.type == Node.EndTag)
1398                {
1399                    list.closed = true;
1400                    Node.trimEmptyElement(lexer, list);
1401                    return;
1402                }
1403
1404                /* deal with comments etc. */
1405                if (Node.insertMisc(list, node))
1406                    continue;
1407
1408                if (node.type == Node.TextNode)
1409                {
1410                    lexer.ungetToken();
1411                    node = lexer.inferredTag( "dt");
1412                    Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1413                }
1414
1415                if (node.tag == null)
1416                {
1417                    Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1418                    continue;
1419                }
1420
1421                /*
1422                  if this is the end tag for an ancestor element
1423                  then infer end tag for this element
1424                */

1425                if (node.type == Node.EndTag)
1426                {
1427                    if (node.tag == tt.tagForm)
1428                    {
1429                        lexer.badForm = 1;
1430                        Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1431                        continue;
1432                    }
1433
1434                    for (parent = list.parent;
1435                            parent != null; parent = parent.parent)
1436                    {
1437                        if (node.tag == parent.tag)
1438                        {
1439                            Report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1440
1441                            lexer.ungetToken();
1442                            Node.trimEmptyElement(lexer, list);
1443                            return;
1444                        }
1445                    }
1446                }
1447
1448                /* center in a dt or a dl breaks the dl list in two */
1449                if (node.tag == tt.tagCenter)
1450                {
1451                    if (list.content != null)
1452                        Node.insertNodeAfterElement(list, node);
1453                    else /* trim empty dl list */
1454                    {
1455                        Node.insertNodeBeforeElement(list, node);
1456                        Node.discardElement(list);
1457                    }
1458
1459                    /* and parse contents of center */
1460                    parseTag(lexer, node, mode);
1461
1462                    /* now create a new dl element */
1463                    list = lexer.inferredTag("dl");
1464                    Node.insertNodeAfterElement(node, list);
1465                    continue;
1466                }
1467
1468                if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
1469                {
1470                    lexer.ungetToken();
1471
1472                    if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
1473                    {
1474                        Report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
1475                        Node.trimEmptyElement(lexer, list);
1476                        return;
1477                    }
1478
1479                    /* if DD appeared directly in BODY then exclude blocks */
1480                    if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
1481                    {
1482                        Node.trimEmptyElement(lexer, list);
1483                        return;
1484                    }
1485
1486                    node = lexer.inferredTag( "dd");
1487                    Report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1488                }
1489
1490                if (node.type == Node.EndTag)
1491                {
1492                    Report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1493                    continue;
1494                }
1495        
1496                /* node should be <DT> or <DD>*/
1497                Node.insertNodeAtEnd(list, node);
1498                parseTag(lexer, node, Lexer.IgnoreWhitespace);
1499            }
1500
1501            Report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1502            Node.trimEmptyElement(lexer, list);
1503        }
1504
1505    };
1506
1507    public static class ParsePre implements Parser {
1508
1509        public void parse( Lexer lexer, Node pre, short mode )
1510        {
1511            Node node, parent;
1512            TagTable tt = lexer.configuration.tt;
1513
1514            if ((pre.tag.model & Dict.CM_EMPTY) != 0)
1515                return;
1516
1517            if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
1518                Node.coerceNode(lexer, pre, tt.tagPre);
1519
1520            lexer.inlineDup( null); /* tell lexer to insert inlines if needed */
1521
1522            while (true)
1523            {
1524                node = lexer.getToken(Lexer.Preformatted);
1525                if (node == null) break;
1526                if (node.tag == pre.tag && node.type == Node.EndTag)
1527                {
1528                    Node.trimSpaces(lexer, pre);
1529                    pre.closed = true;
1530                    Node.trimEmptyElement(lexer, pre);
1531                    return;
1532                }
1533
1534                if (node.tag == tt.tagHtml)
1535                {
1536                    if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1537                        Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1538
1539                    continue;
1540                }
1541
1542                if (node.type == Node.TextNode)
1543                {
1544                    /* if first check for inital newline */
1545                    if (pre.content == null)
1546                    {
1547                        if (node.textarray[node.start] == (byte)'\n')
1548                            ++node.start;
1549
1550                        if (node.start >= node.end)
1551                        {
1552                            continue;
1553                        }
1554                    }
1555
1556                    Node.insertNodeAtEnd(pre, node);
1557                    continue;
1558                }
1559
1560                /* deal with comments etc. */
1561                if (Node.insertMisc(pre, node))
1562                    continue;
1563
1564                /* discard unknown and PARAM tags */
1565                if (node.tag == null || node.tag == tt.tagParam)
1566                {
1567                    Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1568                    continue;
1569                }
1570
1571                if (node.tag == tt.tagP)
1572                {
1573                    if (node.type == Node.StartTag)
1574                    {
1575                        Report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
1576
1577                        /* trim white space before <p> in <pre>*/
1578                        Node.trimSpaces(lexer, pre);
1579            
1580                        /* coerce both <p> and </p> to <br> */
1581                        Node.coerceNode(lexer, node, tt.tagBr);
1582                        Node.insertNodeAtEnd(pre, node);
1583                    }
1584                    else
1585                    {
1586                        Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1587                    }
1588                    continue;
1589                }
1590
1591                if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
1592                {
1593                    moveToHead(lexer, pre, node);
1594                    continue;
1595                }
1596
1597                /*
1598                  if this is the end tag for an ancestor element
1599                  then infer end tag for this element
1600                */

1601                if (node.type == Node.EndTag)
1602                {
1603                    if (node.tag == tt.tagForm)
1604                    {
1605                        lexer.badForm = 1;
1606                        Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1607                        continue;
1608                    }
1609
1610                    for (parent = pre.parent;
1611                            parent != null; parent = parent.parent)
1612                    {
1613                        if (node.tag == parent.tag)
1614                        {
1615                            Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1616
1617                            lexer.ungetToken();
1618                            Node.trimSpaces(lexer, pre);
1619                            Node.trimEmptyElement(lexer, pre);
1620                            return;
1621                        }
1622                    }
1623                }
1624
1625                /* what about head content, HEAD, BODY tags etc? */
1626                if (!((node.tag.model & Dict.CM_INLINE) != 0))
1627                {
1628                    if (node.type != Node.StartTag)
1629                    {
1630                        Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1631                        continue;
1632                    }
1633 
1634                    Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1635                    lexer.excludeBlocks = true;
1636
1637                    /* check if we need to infer a container */
1638                    if ((node.tag.model & Dict.CM_LIST) != 0)
1639                    {
1640                        lexer.ungetToken();
1641                        node = lexer.inferredTag( "ul");
1642                        Node.addClass(node, "noindent");
1643                    }
1644                    else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1645                    {
1646                        lexer.ungetToken();
1647                        node = lexer.inferredTag( "dl");
1648                    }
1649                    else if ((node.tag.model & Dict.CM_TABLE) != 0)
1650                    {
1651                        lexer.ungetToken();
1652                        node = lexer.inferredTag( "table");
1653                    }
1654
1655                    Node.insertNodeAfterElement(pre, node);
1656                    pre = lexer.inferredTag( "pre");
1657                    Node.insertNodeAfterElement(node, pre);
1658                    parseTag(lexer, node, Lexer.IgnoreWhitespace);
1659                    lexer.excludeBlocks = false;
1660                    continue;
1661                }
1662                /*
1663                if (!((node.tag.model & Dict.CM_INLINE) != 0))
1664                {
1665                    Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1666                    lexer.ungetToken();
1667                    return;
1668                }
1669                */

1670                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1671                {
1672                    /* trim white space before <br> */
1673                    if (node.tag == tt.tagBr)
1674                        Node.trimSpaces(lexer, pre);
1675            
1676                    Node.insertNodeAtEnd(pre, node);
1677                    parseTag(lexer, node, Lexer.Preformatted);
1678                    continue;
1679                }
1680
1681                /* discard unexpected tags */
1682                Report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1683            }
1684
1685            Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
1686            Node.trimEmptyElement(lexer, pre);
1687        }
1688
1689    };
1690
1691    public static class ParseBlock implements Parser {
1692
1693        public void parse( Lexer lexer, Node element, short mode )
1694        /*
1695           element is node created by the lexer
1696           upon seeing the start tag, or by the
1697           parser when the start tag is inferred
1698        */

1699        {
1700            Node node, parent;
1701            boolean checkstack;
1702            int istackbase = 0;
1703            TagTable tt = lexer.configuration.tt;
1704
1705            checkstack = true;
1706
1707            if ((element.tag.model & Dict.CM_EMPTY) != 0)
1708                return;
1709
1710            if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
1711                Report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
1712
1713            /*
1714             InlineDup() asks the lexer to insert inline emphasis tags
1715             currently pushed on the istack, but take care to avoid
1716             propagating inline emphasis inside OBJECT or APPLET.
1717             For these elements a fresh inline stack context is created
1718             and disposed of upon reaching the end of the element.
1719             They thus behave like table cells in this respect.
1720            */

1721            if ((element.tag.model & Dict.CM_OBJECT) != 0)
1722            {
1723                istackbase = lexer.istackbase;
1724                lexer.istackbase = lexer.istack.size();
1725            }
1726
1727            if (!((element.tag.model & Dict.CM_MIXED) != 0))
1728                lexer.inlineDup( null);
1729
1730            mode = Lexer.IgnoreWhitespace;
1731
1732            while (true)
1733            {
1734                node = lexer.getToken(mode /*Lexer.MixedContent*/);
1735                if (node == null) break;
1736                /* end tag for this element */
1737                if (node.type == Node.EndTag && node.tag != null &&
1738                    (node.tag == element.tag || element.was == node.tag))
1739                {
1740
1741                    if ((element.tag.model & Dict.CM_OBJECT) != 0)
1742                    {
1743                        /* pop inline stack */
1744                        while (lexer.istack.size() > lexer.istackbase)
1745                            lexer.popInline( null);
1746                        lexer.istackbase = istackbase;
1747                    }
1748
1749                    element.closed = true;
1750                    Node.trimSpaces(lexer, element);
1751                    Node.trimEmptyElement(lexer, element);
1752                    return;
1753                }
1754
1755                if (node.tag == tt.tagHtml ||
1756                    node.tag == tt.tagHead ||
1757                    node.tag == tt.tagBody)
1758                {
1759                    if (node.type == Node.StartTag || node.type == Node.StartEndTag)
1760                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1761
1762                    continue;
1763                }
1764
1765                if (node.type == Node.EndTag)
1766                {
1767                    if (node.tag == null)
1768                    {
1769                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1770
1771                        continue;
1772                    }
1773                    else if (node.tag == tt.tagBr)
1774                        node.type = Node.StartTag;
1775                    else if (node.tag == tt.tagP)
1776                    {
1777                        Node.coerceNode(lexer, node, tt.tagBr);
1778                        Node.insertNodeAtEnd(element, node);
1779                        node = lexer.inferredTag("br");
1780                    }
1781                    else
1782                    {
1783                        /*
1784                          if this is the end tag for an ancestor element
1785                          then infer end tag for this element
1786                        */

1787                        for (parent = element.parent;
1788                                parent != null; parent = parent.parent)
1789                        {
1790                            if (node.tag == parent.tag)
1791                            {
1792                                if (!((element.tag.model & Dict.CM_OPT) != 0))
1793                                    Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1794
1795                                lexer.ungetToken();
1796
1797                                if ((element.tag.model & Dict.CM_OBJECT) != 0)
1798                                {
1799                                    /* pop inline stack */
1800                                    while (lexer.istack.size() > lexer.istackbase)
1801                                        lexer.popInline( null);
1802                                    lexer.istackbase = istackbase;
1803                                }
1804
1805                                Node.trimSpaces(lexer, element);
1806                                Node.trimEmptyElement(lexer, element);
1807                                return;
1808                            }
1809                        }
1810                        /* special case </tr> etc. for stuff moved in front of table */
1811                        if (lexer.exiled
1812                                    && node.tag.model != 0
1813                                    && (node.tag.model & Dict.CM_TABLE) != 0)
1814                        {
1815                            lexer.ungetToken();
1816                            Node.trimSpaces(lexer, element);
1817                            Node.trimEmptyElement(lexer, element);
1818                            return;
1819                        }
1820                    }
1821                }
1822
1823                /* mixed content model permits text */
1824                if (node.type == Node.TextNode)
1825                {
1826                    boolean iswhitenode = false;
1827
1828                    if (node.type == Node.TextNode &&
1829                           node.end <= node.start + 1 &&
1830                           lexer.lexbuf[node.start] == (byte)' ')
1831                        iswhitenode = true;
1832
1833                    if (lexer.configuration.EncloseBlockText && !iswhitenode)
1834                    {
1835                        lexer.ungetToken();
1836                        node = lexer.inferredTag("p");
1837                        Node.insertNodeAtEnd(element, node);
1838                        parseTag(lexer, node, Lexer.MixedContent);
1839                        continue;
1840                    }
1841
1842                    if (checkstack)
1843                    {
1844                        checkstack = false;
1845
1846                        if (!((element.tag.model & Dict.CM_MIXED) != 0))
1847                        {
1848                            if (lexer.inlineDup( node) > 0)
1849                                continue;
1850                        }
1851                    }
1852
1853                    Node.insertNodeAtEnd(element, node);
1854                    mode = Lexer.MixedContent;
1855                    /*
1856                      HTML4 strict doesn't allow mixed content for
1857                      elements with %block; as their content model
1858                    */

1859                    lexer.versions &= ~Dict.VERS_HTML40_STRICT;
1860                    continue;
1861                }
1862
1863                if (Node.insertMisc(element, node))
1864                    continue;
1865
1866                /* allow PARAM elements? */
1867                if (node.tag == tt.tagParam)
1868                {
1869                    if (((element.tag.model & Dict.CM_PARAM) != 0) &&
1870                            (node.type == Node.StartTag || node.type == Node.StartEndTag))
1871                    {
1872                        Node.insertNodeAtEnd(element, node);
1873                        continue;
1874                    }
1875
1876                    /* otherwise discard it */
1877                    Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1878                    continue;
1879                }
1880
1881                /* allow AREA elements? */
1882                if (node.tag == tt.tagArea)
1883                {
1884                    if ((element.tag == tt.tagMap) &&
1885                            (node.type == Node.StartTag || node.type == Node.StartEndTag))
1886                    {
1887                        Node.insertNodeAtEnd(element, node);
1888                        continue;
1889                    }
1890
1891                    /* otherwise discard it */
1892                    Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1893                    continue;
1894                }
1895
1896                /* ignore unknown start/end tags */
1897                if (node.tag == null)
1898                {
1899                    Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1900                    continue;
1901                }
1902
1903                /*
1904                  Allow Dict.CM_INLINE elements here.
1905
1906                  Allow Dict.CM_BLOCK elements here unless
1907                  lexer.excludeBlocks is yes.
1908
1909                  LI and DD are special cased.
1910
1911                  Otherwise infer end tag for this element.
1912                */

1913
1914                if (!((node.tag.model & Dict.CM_INLINE) != 0))
1915                {
1916                    if (node.type != Node.StartTag && node.type != Node.StartEndTag)
1917                    {
1918                        Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1919                        continue;
1920                    }
1921
1922                    if (element.tag == tt.tagTd || element.tag == tt.tagTh)
1923                    {
1924                        /* if parent is a table cell, avoid inferring the end of the cell */
1925
1926                        if ((node.tag.model & Dict.CM_HEAD) != 0)
1927                        {
1928                            moveToHead(lexer, element, node);
1929                            continue;
1930                        }
1931
1932                        if ((node.tag.model & Dict.CM_LIST) != 0)
1933                        {
1934                            lexer.ungetToken();
1935                            node = lexer.inferredTag( "ul");
1936                            Node.addClass(node, "noindent");
1937                            lexer.excludeBlocks = true;
1938                        }
1939                        else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1940                        {
1941                            lexer.ungetToken();
1942                            node = lexer.inferredTag( "dl");
1943                            lexer.excludeBlocks = true;
1944                        }
1945
1946                        /* infer end of current table cell */
1947                        if (!((node.tag.model & Dict.CM_BLOCK) != 0))
1948                        {
1949                            lexer.ungetToken();
1950                            Node.trimSpaces(lexer, element);
1951                            Node.trimEmptyElement(lexer, element);
1952                            return;
1953                        }
1954                    }
1955                    else if ((node.tag.model & Dict.CM_BLOCK) != 0)
1956                    {
1957                        if (lexer.excludeBlocks)
1958                        {
1959                            if (!((element.tag.model & Dict.CM_OPT) != 0))
1960                                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1961
1962                            lexer.ungetToken();
1963
1964                            if ((element.tag.model & Dict.CM_OBJECT) != 0)
1965                                lexer.istackbase = istackbase;
1966
1967                            Node.trimSpaces(lexer, element);
1968                            Node.trimEmptyElement(lexer, element);
1969                            return;
1970                        }
1971                    }
1972                    else /* things like list items */
1973                    {
1974                        if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
1975                            Report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1976
1977                        if ((node.tag.model & Dict.CM_HEAD) != 0)
1978                        {
1979                            moveToHead(lexer, element, node);
1980                            continue;
1981                        }
1982
1983                        lexer.ungetToken();
1984
1985                        if ((node.tag.model & Dict.CM_LIST) != 0)
1986                        {
1987                            if (element.parent != null && element.parent.tag != null &&
1988                                element.parent.tag.parser == getParseList())
1989                            {
1990                                Node.trimSpaces(lexer, element);
1991                                Node.trimEmptyElement(lexer, element);
1992                                return;
1993                            }
1994
1995                            node = lexer.inferredTag("ul");
1996                            Node.addClass(node, "noindent");
1997                        }
1998                        else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
1999                        {
2000                            if (element.parent.tag == tt.tagDl)
2001                            {
2002                                Node.trimSpaces(lexer, element);
2003                                Node.trimEmptyElement(lexer, element);
2004                                return;
2005                            }
2006
2007                            node = lexer.inferredTag("dl");
2008                        }
2009                        else if ((node.tag.model & Dict.CM_TABLE) != 0 ||
2010                                 (node.tag.model & Dict.CM_ROW) != 0)
2011                        {
2012                            node = lexer.inferredTag("table");
2013                        }
2014                        else if ((element.tag.model & Dict.CM_OBJECT) != 0)
2015                        {
2016                            /* pop inline stack */
2017                            while (lexer.istack.size() > lexer.istackbase)
2018                                lexer.popInline( null);
2019                            lexer.istackbase = istackbase;
2020                            Node.trimSpaces(lexer, element);
2021                            Node.trimEmptyElement(lexer, element);
2022                            return;
2023
2024                        }
2025                        else
2026                        {
2027                            Node.trimSpaces(lexer, element);
2028                            Node.trimEmptyElement(lexer, element);
2029                            return;
2030                        }
2031                    }
2032                }
2033
2034                /* parse known element */
2035                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2036                {
2037                    if ((node.tag.model & Dict.CM_INLINE) != 0)
2038                    {
2039                        if (checkstack && !node.implicit)
2040                        {
2041                            checkstack = false;
2042
2043                            if (lexer.inlineDup( node) > 0)
2044                                continue;
2045                        }
2046
2047                        mode = Lexer.MixedContent;
2048                    }
2049                    else
2050                    {
2051                        checkstack = true;
2052                        mode = Lexer.IgnoreWhitespace;
2053                    }
2054
2055                    /* trim white space before <br> */
2056                    if (node.tag == tt.tagBr)
2057                        Node.trimSpaces(lexer, element);
2058
2059                    Node.insertNodeAtEnd(element, node);
2060            
2061                    if (node.implicit)
2062                        Report.warning(lexer, element, node, Report.INSERTING_TAG);
2063
2064                    parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
2065                    continue;
2066                }
2067
2068                /* discard unexpected tags */
2069                if (node.type == Node.EndTag)
2070                    lexer.popInline( node); /* if inline end tag */
2071
2072                Report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2073            }
2074
2075            if (!((element.tag.model & Dict.CM_OPT) != 0))
2076                Report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
2077
2078            if ((element.tag.model & Dict.CM_OBJECT) != 0)
2079            {
2080                /* pop inline stack */
2081                while (lexer.istack.size() > lexer.istackbase)
2082                    lexer.popInline( null);
2083                lexer.istackbase = istackbase;
2084            }
2085
2086            Node.trimSpaces(lexer, element);
2087            Node.trimEmptyElement(lexer, element);
2088        }
2089
2090    };
2091
2092    public static class ParseTableTag implements Parser {
2093
2094        public void parse( Lexer lexer, Node table, short mode )
2095        {
2096            Node node, parent;
2097            int istackbase;
2098            TagTable tt = lexer.configuration.tt;
2099
2100            lexer.deferDup();
2101            istackbase = lexer.istackbase;
2102            lexer.istackbase = lexer.istack.size();
2103    
2104            while (true)
2105            {
2106                node = lexer.getToken(Lexer.IgnoreWhitespace);
2107                if (node == null) break;
2108                if (node.tag == table.tag && node.type == Node.EndTag)
2109                {
2110                    lexer.istackbase = istackbase;
2111                    table.closed = true;
2112                    Node.trimEmptyElement(lexer, table);
2113                    return;
2114                }
2115
2116                /* deal with comments etc. */
2117                if (Node.insertMisc(table, node))
2118                    continue;
2119
2120                /* discard unknown tags */
2121                if (node.tag == null && node.type != Node.TextNode)
2122                {
2123                    Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2124                    continue;
2125                }
2126
2127                /* if TD or TH or text or inline or block then infer <TR> */
2128
2129                if (node.type != Node.EndTag)
2130                {
2131                    if (node.tag == tt.tagTd ||
2132                        node.tag == tt.tagTh ||
2133                        node.tag == tt.tagTable)
2134                    {
2135                        lexer.ungetToken();
2136                        node = lexer.inferredTag( "tr");
2137                        Report.warning(lexer, table, node, Report.MISSING_STARTTAG);
2138                    }
2139                    else if (node.type == Node.TextNode
2140                               || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2141                    {
2142                        Node.insertNodeBeforeElement(table, node);
2143                        Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2144                        lexer.exiled = true;
2145
2146                        /* AQ: TODO
2147                           Line 2040 of parser.c (13 Jan 2000) reads as follows:
2148                           if (!node->type == TextNode)
2149                           This will always evaluate to false.
2150                           This has been reported to Dave Raggett <dsr@w3.org>
2151                        */

2152                        //Should be?: if (!(node.type == Node.TextNode))
2153
if (false)
2154                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
2155
2156                        lexer.exiled = false;
2157                        continue;
2158                    }
2159                    else if ((node.tag.model & Dict.CM_HEAD) != 0)
2160                    {
2161                        moveToHead(lexer, table, node);
2162                        continue;
2163                    }
2164                }
2165
2166                /*
2167                  if this is the end tag for an ancestor element
2168                  then infer end tag for this element
2169                */

2170                if (node.type == Node.EndTag)
2171                {
2172                    if (node.tag == tt.tagForm)
2173                    {
2174                        lexer.badForm = 1;
2175                        Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2176                        continue;
2177                    }
2178
2179                    if (node.tag != null && (node.tag.model & (Dict.CM_TABLE|Dict.CM_ROW)) != 0)
2180                    {
2181                        Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2182                        continue;
2183                    }
2184
2185                    for (parent = table.parent;
2186                            parent != null; parent = parent.parent)
2187                    {
2188                        if (node.tag == parent.tag)
2189                        {
2190                            Report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
2191                            lexer.ungetToken();
2192                            lexer.istackbase = istackbase;
2193                            Node.trimEmptyElement(lexer, table);
2194                            return;
2195                        }
2196                    }
2197                }
2198
2199                if (!((node.tag.model & Dict.CM_TABLE) != 0))
2200                {
2201                    lexer.ungetToken();
2202                    Report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2203                    lexer.istackbase = istackbase;
2204                    Node.trimEmptyElement(lexer, table);
2205                    return;
2206                }
2207
2208                if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2209                {
2210                    Node.insertNodeAtEnd(table, node);;
2211                    parseTag(lexer, node, Lexer.IgnoreWhitespace);
2212                    continue;
2213                }
2214
2215                /* discard unexpected text nodes and end tags */
2216                Report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2217            }
2218
2219            Report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
2220            Node.trimEmptyElement(lexer, table);
2221            lexer.istackbase = istackbase;
2222        }
2223
2224    };
2225
2226    public static class ParseColGroup implements Parser {
2227
2228        public void parse( Lexer lexer, Node colgroup, short mode )
2229        {
2230            Node node, parent;
2231            TagTable tt = lexer.configuration.tt;
2232
2233            if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
2234                return;
2235
2236            while (true)
2237            {
2238                node = lexer.getToken(Lexer.IgnoreWhitespace);
2239                if (node == null) break;
2240                if (node.tag == colgroup.tag && node.type == Node.EndTag)
2241                {
2242                    colgroup.closed = true;
2243                    return;
2244                }
2245
2246                /*
2247                  if this is the end tag for an ancestor element
2248                  then infer end tag for this element
2249                */

2250                if (node.type == Node.EndTag)
2251                {
2252                    if (node.tag == tt.tagForm)
2253                    {
2254                        lexer.badForm = 1;
2255                        Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2256                        continue;
2257                    }
2258
2259                    for (parent = colgroup.parent;
2260                            parent != null; parent = parent.parent)
2261                    {
2262
2263                        if (node.tag == parent.tag)
2264                        {
2265                            lexer.ungetToken();
2266                            return;
2267                        }
2268                    }
2269                }
2270
2271                if (node.type == Node.TextNode)
2272                {
2273                    lexer.ungetToken();
2274                    return;
2275                }
2276
2277                /* deal with comments etc. */
2278                if (Node.insertMisc(colgroup, node))
2279                    continue;
2280
2281                /* discard unknown tags */
2282                if (node.tag == null)
2283                {
2284                    Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2285                    continue;
2286                }
2287
2288                if (node.tag != tt.tagCol)
2289                {
2290                    lexer.ungetToken();
2291                    return;
2292                }
2293
2294                if (node.type == Node.EndTag)
2295                {
2296                    Report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2297                    continue;
2298                }
2299        
2300                /* node should be <COL> */
2301                Node.insertNodeAtEnd(colgroup, node);
2302                parseTag(lexer, node, Lexer.IgnoreWhitespace);
2303            }
2304        }
2305
2306    };
2307
2308    public static class ParseRowGroup implements Parser {
2309
2310        public void parse( Lexer lexer, Node rowgroup, short mode )
2311        {
2312            Node node, parent;
2313            TagTable tt = lexer.configuration.tt;
2314
2315            if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
2316                return;
2317
2318            while (true)
2319            {
2320                node = lexer.getToken(Lexer.IgnoreWhitespace);
2321                if (node == null) break;
2322                if (node.tag == rowgroup.tag)
2323                {
2324                    if (node.type == Node.EndTag)
2325                    {
2326                        rowgroup.closed = true;
2327                        Node.trimEmptyElement(lexer, rowgroup);
2328                        return;
2329                    }
2330
2331                    lexer.ungetToken();
2332                    return;
2333                }
2334
2335                /* if </table> infer end tag */
2336                if (node.tag == tt.tagTable && node.type == Node.EndTag)
2337                {
2338                    lexer.ungetToken();
2339                    Node.trimEmptyElement(lexer, rowgroup);
2340                    return;
2341                }
2342
2343                /* deal with comments etc. */
2344                if (Node.insertMisc(rowgroup, node))
2345                    continue;
2346
2347                /* discard unknown tags */
2348                if (node.tag == null && node.type != Node.TextNode)
2349                {
2350                    Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2351                    continue;
2352                }
2353
2354                /*
2355                  if TD or TH then infer <TR>
2356                  if text or inline or block move before table
2357                  if head content move to head
2358                */

2359
2360                if (node.type != Node.EndTag)
2361                {
2362                    if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2363                    {
2364                        lexer.ungetToken();
2365                        node = lexer.inferredTag("tr");
2366                        Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2367                    }
2368                    else if (node.type == Node.TextNode
2369                            || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2370                    {
2371                        Node.moveBeforeTable(rowgroup, node, tt);
2372                        Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2373                        lexer.exiled = true;
2374
2375                        if (node.type != Node.TextNode)
2376                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
2377
2378                        lexer.exiled = false;
2379                        continue;
2380                    }
2381                    else if ((node.tag.model & Dict.CM_HEAD) != 0)
2382                    {
2383                        Report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2384                        moveToHead(lexer, rowgroup, node);
2385                        continue;
2386                    }
2387                }
2388
2389                /*
2390                  if this is the end tag for ancestor element
2391                  then infer end tag for this element
2392                */

2393                if (node.type == Node.EndTag)
2394                {
2395                    if (node.tag == tt.tagForm)
2396                    {
2397                        lexer.badForm = 1;
2398                        Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2399                        continue;
2400                    }
2401
2402                    if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
2403                    {
2404                        Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2405                        continue;
2406                    }
2407
2408                    for (parent = rowgroup.parent;
2409                            parent != null; parent = parent.parent)
2410                    {
2411                        if (node.tag == parent.tag)
2412                        {
2413                            lexer.ungetToken();
2414                            Node.trimEmptyElement(lexer, rowgroup);
2415                            return;
2416                        }
2417                    }
2418                }
2419
2420                /*
2421                  if THEAD, TFOOT or TBODY then implied end tag
2422
2423                */

2424                if ((node.tag.model & Dict.CM_ROWGRP) != 0)
2425                {
2426                    if (node.type != Node.EndTag)
2427                        lexer.ungetToken();
2428
2429                    Node.trimEmptyElement(lexer, rowgroup);
2430                    return;
2431                }
2432
2433                if (node.type == Node.EndTag)
2434                {
2435                    Report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2436                    continue;
2437                }
2438        
2439                if (!(node.tag == tt.tagTr))
2440                {
2441                    node = lexer.inferredTag( "tr");
2442                    Report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2443                    lexer.ungetToken();
2444                }
2445
2446               /* node should be <TR> */
2447                Node.insertNodeAtEnd(rowgroup, node);
2448                parseTag(lexer, node, Lexer.IgnoreWhitespace);
2449            }
2450
2451            Node.trimEmptyElement(lexer, rowgroup);
2452        }
2453
2454    };
2455
2456    public static class ParseRow implements Parser {
2457
2458        public void parse( Lexer lexer, Node row, short mode )
2459        {
2460            Node node, parent;
2461            boolean exclude_state;
2462            TagTable tt = lexer.configuration.tt;
2463
2464            if ((row.tag.model & Dict.CM_EMPTY) != 0)
2465                return;
2466
2467            while (true)
2468            {
2469                node = lexer.getToken(Lexer.IgnoreWhitespace);
2470                if (node == null) break;
2471                if (node.tag == row.tag)
2472                {
2473                    if (node.type == Node.EndTag)
2474                    {
2475                        row.closed = true;
2476                        Node.fixEmptyRow(lexer, row);
2477                        return;
2478                    }
2479
2480                    lexer.ungetToken();
2481                    Node.fixEmptyRow(lexer, row);
2482                    return;
2483                }
2484
2485                /*
2486                  if this is the end tag for an ancestor element
2487                  then infer end tag for this element
2488                */

2489                if (node.type == Node.EndTag)
2490                {
2491                    if (node.tag == tt.tagForm)
2492                    {
2493                        lexer.badForm = 1;
2494                        Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2495                        continue;
2496                    }
2497
2498                    if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2499                    {
2500                        Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2501                        continue;
2502                    }
2503
2504                    for (parent = row.parent;
2505                            parent != null; parent = parent.parent)
2506                    {
2507                        if (node.tag == parent.tag)
2508                        {
2509                            lexer.ungetToken();
2510                            Node.trimEmptyElement(lexer, row);
2511                            return;
2512                        }
2513                    }
2514                }
2515
2516                /* deal with comments etc. */
2517                if (Node.insertMisc(row, node))
2518                    continue;
2519
2520                /* discard unknown tags */
2521                if (node.tag == null && node.type != Node.TextNode)
2522                {
2523                    Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2524                    continue;
2525                }
2526
2527                /* discard unexpected <table> element */
2528                if (node.tag == tt.tagTable)
2529                {
2530                    Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2531                    continue;
2532                }
2533
2534                /* THEAD, TFOOT or TBODY */
2535                if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
2536                {
2537                    lexer.ungetToken();
2538                    Node.trimEmptyElement(lexer, row);
2539                    return;
2540                }
2541
2542                if (node.type == Node.EndTag)
2543                {
2544                    Report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2545                    continue;
2546                }
2547
2548                /*
2549                  if text or inline or block move before table
2550                  if head content move to head
2551                */

2552
2553                if (node.type != Node.EndTag)
2554                {
2555                    if (node.tag == tt.tagForm)
2556                    {
2557                        lexer.ungetToken();
2558                        node = lexer.inferredTag("td");
2559                        Report.warning(lexer, row, node, Report.MISSING_STARTTAG);
2560                    }
2561                    else if (node.type == Node.TextNode
2562                            || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2563                    {
2564                        Node.moveBeforeTable(row, node, tt);
2565                        Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2566                        lexer.exiled = true;
2567
2568                        if (node.type != Node.TextNode)
2569                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
2570
2571                        lexer.exiled = false;
2572                        continue;
2573                    }
2574                    else if ((node.tag.model & Dict.CM_HEAD) != 0)
2575                    {
2576                        Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2577                        moveToHead(lexer, row, node);
2578                        continue;
2579                    }
2580                }
2581
2582                if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
2583                {
2584                    Report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2585                    continue;
2586                }
2587        
2588                /* node should be <TD> or <TH> */
2589                Node.insertNodeAtEnd(row, node);
2590                exclude_state = lexer.excludeBlocks;
2591                lexer.excludeBlocks = false;
2592                parseTag(lexer, node, Lexer.IgnoreWhitespace);
2593                lexer.excludeBlocks = exclude_state;
2594
2595                /* pop inline stack */
2596
2597                while (lexer.istack.size() > lexer.istackbase)
2598                    lexer.popInline( null);
2599            }
2600
2601            Node.trimEmptyElement(lexer, row);
2602        }
2603
2604    };
2605
2606    public static class ParseNoFrames implements Parser {
2607
2608        public void parse( Lexer lexer, Node noframes, short mode )
2609        {
2610            Node node;
2611            boolean checkstack;
2612            TagTable tt = lexer.configuration.tt;
2613
2614            lexer.badAccess |= Report.USING_NOFRAMES;
2615            mode = Lexer.IgnoreWhitespace;
2616            checkstack = true;
2617
2618            while (true)
2619            {
2620                node = lexer.getToken(mode);
2621                if (node == null) break;
2622                if (node.tag == noframes.tag && node.type == Node.EndTag)
2623                {
2624                    noframes.closed = true;
2625                    Node.trimSpaces(lexer, noframes);
2626                    return;
2627                }
2628
2629                if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
2630                {
2631                    Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
2632                    Node.trimSpaces(lexer, noframes);
2633                    lexer.ungetToken();
2634                    return;
2635                }
2636
2637                if (node.tag == tt.tagHtml)
2638                {
2639                    if (node.type == Node.StartTag || node.type == Node.StartEndTag)
2640                        Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2641
2642                    continue;
2643                }
2644
2645                /* deal with comments etc. */
2646                if (Node.insertMisc(noframes, node))
2647                    continue;
2648
2649                if (node.tag == tt.tagBody && node.type == Node.StartTag)
2650                {
2651                    Node.insertNodeAtEnd(noframes, node);
2652                    parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2653                    continue;
2654                }
2655
2656                /* implicit body element inferred */
2657                if (node.type == Node.TextNode || node.tag != null)
2658                {
2659                    lexer.ungetToken();
2660                    node = lexer.inferredTag("body");
2661                    if (lexer.configuration.XmlOut)
2662                        Report.warning(lexer, noframes, node, Report.INSERTING_TAG);
2663                    Node.insertNodeAtEnd(noframes, node);
2664                    parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2665                    continue;
2666                }
2667                /* discard unexpected end tags */
2668                Report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
2669            }
2670
2671            Report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
2672        }
2673
2674    };
2675
2676    public static class ParseSelect implements Parser {
2677
2678        public void parse( Lexer lexer, Node field, short mode )
2679        {
2680            Node node;
2681            TagTable tt = lexer.configuration.tt;
2682
2683            lexer.insert = -1; /* defer implicit inline start tags */
2684
2685            while (true)
2686            {
2687                node = lexer.getToken(Lexer.IgnoreWhitespace);
2688                if (node == null) break;
2689                if (node.tag == field.tag && node.type == Node.EndTag)
2690                {
2691                    field.closed = true;
2692                    Node.trimSpaces(lexer, field);
2693                    return;
2694                }
2695
2696                /* deal with comments etc. */
2697                if (Node.insertMisc(field, node))
2698                    continue;
2699
2700                if (node.type == Node.StartTag &&
2701                     (node.tag == tt.tagOption ||
2702                      node.tag == tt.tagOptgroup ||
2703                      node.tag == tt.tagScript))
2704                {
2705                    Node.insertNodeAtEnd(field, node);
2706                    parseTag(lexer, node, Lexer.IgnoreWhitespace);
2707                    continue;
2708                }
2709
2710                /* discard unexpected tags */
2711                Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2712            }
2713
2714            Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2715        }
2716
2717    };
2718
2719    public static class ParseText implements Parser {
2720
2721        public void parse( Lexer lexer, Node field, short mode )
2722        {
2723            Node node;
2724            TagTable tt = lexer.configuration.tt;
2725
2726            lexer.insert = -1; /* defer implicit inline start tags */
2727
2728            if (field.tag == tt.tagTextarea)
2729                mode = Lexer.Preformatted;
2730
2731            while (true)
2732            {
2733                node = lexer.getToken(mode);
2734                if (node == null) break;
2735                if (node.tag == field.tag && node.type == Node.EndTag)
2736                {
2737                    field.closed = true;
2738                    Node.trimSpaces(lexer, field);
2739                    return;
2740                }
2741
2742                /* deal with comments etc. */
2743                if (Node.insertMisc(field, node))
2744                    continue;
2745
2746                if (node.type == Node.TextNode)
2747                {
2748                    /* only called for 1st child */
2749                    if (field.content == null && !((mode & Lexer.Preformatted) != 0))
2750                        Node.trimSpaces(lexer, field);
2751
2752                    if (node.start >= node.end)
2753                    {
2754                        continue;
2755                    }
2756
2757                    Node.insertNodeAtEnd(field, node);
2758                    continue;
2759                }
2760
2761                if (node.tag == tt.tagFont)
2762                {
2763                    Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2764                    continue;
2765                }
2766
2767                /* terminate element on other tags */
2768                if (!((field.tag.model & Dict.CM_OPT) != 0))
2769                        Report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
2770
2771                lexer.ungetToken();
2772                Node.trimSpaces(lexer, field);
2773                return;
2774            }
2775
2776            if (!((field.tag.model & Dict.CM_OPT) != 0))
2777                Report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
2778        }
2779
2780    };
2781
2782    public static class ParseOptGroup implements Parser {
2783
2784        public void parse( Lexer lexer, Node field, short mode )
2785        {
2786            Node node;
2787            TagTable tt = lexer.configuration.tt;
2788
2789            lexer.insert = -1; /* defer implicit inline start tags */
2790
2791            while (true)
2792            {
2793                node = lexer.getToken(Lexer.IgnoreWhitespace);
2794                if (node == null) break;
2795                if (node.tag == field.tag && node.type == Node.EndTag)
2796                {
2797                    field.closed = true;
2798                    Node.trimSpaces(lexer, field);
2799                    return;
2800                }
2801
2802                /* deal with comments etc. */
2803                if (Node.insertMisc(field, node))
2804                    continue;
2805
2806                if (node.type == Node.StartTag &&
2807                     (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
2808                {
2809                    if (node.tag == tt.tagOptgroup)
2810                        Report.warning(lexer, field, node, Report.CANT_BE_NESTED);
2811
2812                    Node.insertNodeAtEnd(field, node);
2813                    parseTag(lexer, node, Lexer.MixedContent);
2814                    continue;
2815                }
2816
2817                /* discard unexpected tags */
2818                Report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
2819            }
2820        }
2821
2822    };
2823
2824    public static Parser getParseHTML()
2825    {
2826        return _parseHTML;
2827    }
2828
2829    public static Parser getParseHead()
2830    {
2831        return _parseHead;
2832    }
2833
2834    public static Parser getParseTitle()
2835    {
2836        return _parseTitle;
2837    }
2838
2839    public static Parser getParseScript()
2840    {
2841        return _parseScript;
2842    }
2843
2844    public static Parser getParseBody()
2845    {
2846        return _parseBody;
2847    }
2848
2849    public static Parser getParseFrameSet()
2850    {
2851        return _parseFrameSet;
2852    }
2853
2854    public static Parser getParseInline()
2855    {
2856        return _parseInline;
2857    }
2858
2859    public static Parser getParseList()
2860    {
2861        return _parseList;
2862    }
2863
2864    public static Parser getParseDefList()
2865    {
2866        return _parseDefList;
2867    }
2868
2869    public static Parser getParsePre()
2870    {
2871        return _parsePre;
2872    }
2873
2874    public static Parser getParseBlock()
2875    {
2876        return _parseBlock;
2877    }
2878
2879    public static Parser getParseTableTag()
2880    {
2881        return _parseTableTag;
2882    }
2883
2884    public static Parser getParseColGroup()
2885    {
2886        return _parseColGroup;
2887    }
2888
2889    public static Parser getParseRowGroup()
2890    {
2891        return _parseRowGroup;
2892    }
2893
2894    public static Parser getParseRow()
2895    {
2896        return _parseRow;
2897    }
2898
2899    public static Parser getParseNoFrames()
2900    {
2901        return _parseNoFrames;
2902    }
2903
2904    public static Parser getParseSelect()
2905    {
2906        return _parseSelect;
2907    }
2908
2909    public static Parser getParseText()
2910    {
2911        return _parseText;
2912    }
2913
2914    public static Parser getParseOptGroup()
2915    {
2916        return _parseOptGroup;
2917    }
2918
2919
2920    private static Parser _parseHTML = new ParseHTML();
2921    private static Parser _parseHead = new ParseHead();
2922    private static Parser _parseTitle = new ParseTitle();
2923    private static Parser _parseScript = new ParseScript();
2924    private static Parser _parseBody = new ParseBody();
2925    private static Parser _parseFrameSet = new ParseFrameSet();
2926    private static Parser _parseInline = new ParseInline();
2927    private static Parser _parseList = new ParseList();
2928    private static Parser _parseDefList = new ParseDefList();
2929    private static Parser _parsePre = new ParsePre();
2930    private static Parser _parseBlock = new ParseBlock();
2931    private static Parser _parseTableTag = new ParseTableTag();
2932    private static Parser _parseColGroup = new ParseColGroup();
2933    private static Parser _parseRowGroup = new ParseRowGroup();
2934    private static Parser _parseRow = new ParseRow();
2935    private static Parser _parseNoFrames = new ParseNoFrames();
2936    private static Parser _parseSelect = new ParseSelect();
2937    private static Parser _parseText = new ParseText();
2938    private static Parser _parseOptGroup = new ParseOptGroup();
2939
2940    /*
2941      HTML is the top level element
2942    */

2943    public static Node parseDocument(Lexer lexer)
2944    {
2945        Node node, document, html;
2946        Node doctype = null;
2947        TagTable tt = lexer.configuration.tt;
2948
2949        document = lexer.newNode();
2950        document.type = Node.RootNode;
2951
2952        while (true)
2953        {
2954            node = lexer.getToken(Lexer.IgnoreWhitespace);
2955            if (node == null) break;
2956
2957            /* deal with comments etc. */
2958            if (Node.insertMisc(document, node))
2959                continue;
2960
2961            if (node.type == Node.DocTypeTag)
2962            {
2963                if (doctype == null)
2964                {
2965                    Node.insertNodeAtEnd(document, node);
2966                    doctype = node;
2967                }
2968                else
2969                    Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
2970                continue;
2971            }
2972
2973            if (node.type == Node.EndTag)
2974            {
2975                Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
2976
continue;
2977            }
2978
2979            if (node.type != Node.StartTag || node.tag != tt.tagHtml)
2980            {
2981                lexer.ungetToken();
2982                html = lexer.inferredTag("html");
2983            }
2984            else
2985                html = node;
2986
2987            Node.insertNodeAtEnd(document, html);
2988            getParseHTML().parse(lexer, html, (short)0); // TODO?
2989
break;
2990        }
2991
2992        return document;
2993    }
2994
2995    /**
2996     * Indicates whether or not whitespace should be preserved for this element.
2997     * If an <code>xml:space</code> attribute is found, then if the attribute value is
2998     * <code>preserve</code>, returns <code>true</code>. For any other value, returns
2999     * <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em>
3000     * found, then the following element names result in a return value of <code>true:
3001     * pre, script, style,</code> and <code>xsl:text</code>. Finally, if a
3002     * <code>TagTable</code> was passed in and the element appears as the "pre" element
3003     * in the <code>TagTable</code>, then <code>true</code> will be returned.
3004     * Otherwise, <code>false</code> is returned.
3005     * @param element The <code>Node</code> to test to see if whitespace should be
3006     * preserved.
3007     * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code>
3008     * function. This may be <code>null</code>, in which case this test
3009     * is bypassed.
3010     * @return <code>true</code> or <code>false</code>, as explained above.
3011     */

3012
3013    public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
3014    {
3015        AttVal attribute;
3016
3017        /* search attributes for xml:space */
3018        for (attribute = element.attributes; attribute != null; attribute = attribute.next)
3019        {
3020            if (attribute.attribute.equals("xml:space"))
3021            {
3022                if (attribute.value.equals("preserve"))
3023                    return true;
3024
3025                return false;
3026            }
3027        }
3028
3029        /* kludge for html docs without explicit xml:space attribute */
3030        if (Lexer.wstrcasecmp(element.element, "pre") == 0
3031            || Lexer.wstrcasecmp(element.element, "script") == 0
3032            || Lexer.wstrcasecmp(element.element, "style") == 0)
3033            return true;
3034
3035        if ( (tt != null) && (tt.findParser(element) == getParsePre()) )
3036            return true;
3037
3038        /* kludge for XSL docs */
3039        if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3040            return true;
3041
3042        return false;
3043    }
3044
3045    /*
3046      XML documents
3047    */

3048    public static void parseXMLElement(Lexer lexer, Node element, short mode)
3049    {
3050        Node node;
3051
3052        /* Jeff Young's kludge for XSL docs */
3053
3054        if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
3055            return;
3056
3057        /* if node is pre or has xml:space="preserve" then do so */
3058
3059        if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
3060            mode = Lexer.Preformatted;
3061
3062        while (true)
3063        {
3064            node = lexer.getToken(mode);
3065            if (node == null) break;
3066            if (node.type == Node.EndTag && node.element.equals(element.element))
3067            {
3068                element.closed = true;
3069                break;
3070            }
3071
3072            /* discard unexpected end tags */
3073            if (node.type == Node.EndTag)
3074            {
3075                Report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
3076                continue;
3077            }
3078
3079            /* parse content on seeing start tag */
3080            if (node.type == Node.StartTag)
3081                parseXMLElement(lexer, node, mode);
3082
3083            Node.insertNodeAtEnd(element, node);
3084        }
3085
3086        /*
3087         if first child is text then trim initial space and
3088         delete text node if it is empty.
3089        */

3090
3091        node = element.content;
3092
3093        if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3094        {
3095            if (node.textarray[node.start] == (byte)' ')
3096            {
3097                node.start++;
3098
3099                if (node.start >= node.end)
3100                    Node.discardElement(node);
3101            }
3102        }
3103
3104        /*
3105         if last child is text then trim final space and
3106         delete the text node if it is empty
3107        */

3108
3109        node = element.last;
3110
3111        if (node != null && node.type == Node.TextNode && mode != Lexer.Preformatted)
3112        {
3113            if (node.textarray[node.end - 1] == (byte)' ')
3114            {
3115                node.end--;
3116
3117                if (node.start >= node.end)
3118                    Node.discardElement(node);
3119            }
3120        }
3121    }
3122
3123    public static Node parseXMLDocument(Lexer lexer)
3124    {
3125        Node node, document, doctype;
3126
3127        document = lexer.newNode();
3128        document.type = Node.RootNode;
3129        doctype = null;
3130        lexer.configuration.XmlTags = true;
3131
3132        while (true)
3133        {
3134            node = lexer.getToken(Lexer.IgnoreWhitespace);
3135            if (node == null) break;
3136            /* discard unexpected end tags */
3137            if (node.type == Node.EndTag)
3138            {
3139                Report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
3140                continue;
3141            }
3142
3143            /* deal with comments etc. */
3144            if (Node.insertMisc(document, node))
3145                continue;
3146
3147            if (node.type == Node.DocTypeTag)
3148            {
3149                if (doctype == null)
3150                {
3151                    Node.insertNodeAtEnd(document, node);
3152                    doctype = node;
3153                }
3154                else
3155                    Report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
3156
continue;
3157            }
3158
3159            /* if start tag then parse element's content */
3160            if (node.type == Node.StartTag)
3161            {
3162                Node.insertNodeAtEnd(document, node);
3163                parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
3164            }
3165
3166        }
3167
3168if (false) { //#if 0
3169
/* discard the document type */
3170        node = document.findDocType();
3171
3172        if (node != null)
3173            Node.discardElement(node);
3174} // #endif
3175

3176        if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
3177                Report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
3178
3179        /* ensure presence of initial <?XML version="1.0"?> */
3180        if (lexer.configuration.XmlPi)
3181            lexer.fixXMLPI(document);
3182
3183        return document;
3184    }
3185
3186    public static boolean isJavaScript(Node node)
3187    {
3188        boolean result = false;
3189        AttVal attr;
3190
3191        if (node.attributes == null)
3192            return true;
3193
3194        for (attr = node.attributes; attr != null; attr = attr.next)
3195        {
3196            if ( (Lexer.wstrcasecmp(attr.attribute, "language") == 0
3197                    || Lexer.wstrcasecmp(attr.attribute, "type") == 0)
3198                    && Lexer.wsubstr(attr.value, "javascript"))
3199                result = true;
3200        }
3201
3202        return result;
3203    }
3204
3205}
3206
Popular Tags