Node


1   /*
2    * @(#)Node.java   1.11 2000/08/16
3    *
4    */
5   
6   package org.w3c.tidy;
7   
8   /**
9    *
10   * Node
11   *
12   * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13   * See Tidy.java for the copyright notice.
14   * Derived from <a HREF="http://www.w3.org/People/Raggett/tidy">
15   * HTML Tidy Release 4 Aug 2000</a>
16   *
17   * @author  Dave Raggett <dsr@w3.org>
18   * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19   * @version 1.0, 1999/05/22
20   * @version 1.0.1, 1999/05/29
21   * @version 1.1, 1999/06/18 Java Bean
22   * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23   * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24   * @version 1.4, 1999/09/04 DOM support
25   * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26   * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27   * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28   * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29   * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30   * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31   * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
32   */
33  
34  /*
35    Used for elements and text nodes
36    element name is null for text nodes
37    start and end are offsets into lexbuf
38    which contains the textual content of
39    all elements in the parse tree.
40  
41    parent and content allow traversal
42    of the parse tree in any direction.
43    attributes are represented as a linked
44    list of AttVal nodes which hold the
45    strings for attribute/value pairs.
46  */
47  
48  public class Node {
49  
50      public static final short RootNode        = 0;
51      public static final short DocTypeTag      = 1;
52      public static final short CommentTag      = 2;
53      public static final short ProcInsTag      = 3;
54      public static final short TextNode        = 4;
55      public static final short StartTag        = 5;
56      public static final short EndTag          = 6;
57      public static final short StartEndTag     = 7;
58      public static final short CDATATag        = 8;
59      public static final short SectionTag      = 9;
60      public static final short AspTag          = 10;
61      public static final short JsteTag         = 11;
62      public static final short PhpTag          = 12;
63  
64      protected Node parent;
65      protected Node prev;
66      protected Node next;
67      protected Node last;
68      protected int start;             /* start of span onto text array */
69      protected int end;               /* end of span onto text array */
70      protected byte[] textarray;      /* the text array */
71      protected short type;              /* TextNode, StartTag, EndTag etc. */
72      protected boolean closed;            /* true if closed by explicit end tag */
73      protected boolean implicit;          /* true if inferred */
74      protected boolean linebreak;         /* true if followed by a line break */
75      protected Dict was;   /* old tag when it was changed */
76      protected Dict tag;   /* tag's dictionary definition */
77      protected String   element;          /* name (null for text nodes) */
78      protected AttVal attributes;
79      protected Node content;
80  
81      public Node()
82      {
83          this(TextNode, null, 0, 0);
84      }
85  
86      public Node(short type, byte[] textarray, int start, int end)
87      {
88          this.parent = null;
89          this.prev = null;
90          this.next = null;
91          this.last = null;
92          this.start = start;
93          this.end = end;
94          this.textarray = textarray;
95          this.type = type;
96          this.closed = false;
97          this.implicit = false;
98          this.linebreak = false;
99          this.was = null;
100         this.tag = null;
101         this.element = null;
102         this.attributes = null;
103         this.content = null;
104     }
105 
106     public Node(short type, byte[] textarray, int start, int end, String   element, TagTable tt)
107     {
108         this.parent = null;
109         this.prev = null;
110         this.next = null;
111         this.last = null;
112         this.start = start;
113         this.end = end;
114         this.textarray = textarray;
115         this.type = type;
116         this.closed = false;
117         this.implicit = false;
118         this.linebreak = false;
119         this.was = null;
120         this.tag = null;
121         this.element = element;
122         this.attributes = null;
123         this.content = null;
124         if (type == StartTag || type == StartEndTag || type == EndTag)
125             tt.findTag(this);
126     }
127 
128     /* used to clone heading nodes when split by an <HR> */
129     protected Object   clone()
130     {
131         Node node = new Node();
132 
133         node.parent = this.parent;
134         if (this.textarray != null)
135         {
136             node.textarray = new byte[this.end - this.start];
137             node.start = 0;
138             node.end = this.end - this.start;
139             if (node.end > 0)
140                 System.arraycopy(this.textarray, this.start,
141                                  node.textarray, node.start, node.end);
142         }
143         node.type = this.type;
144         node.closed = this.closed;
145         node.implicit = this.implicit;
146         node.linebreak = this.linebreak;
147         node.was = this.was;
148         node.tag = this.tag;
149         if (this.element != null)
150             node.element = this.element;
151         if (this.attributes != null)
152             node.attributes = (AttVal)this.attributes.clone();
153         return node;
154     }
155 
156     public AttVal getAttrByName(String   name)
157     {
158         AttVal attr;
159 
160         for (attr = this.attributes; attr != null; attr = attr.next)
161         {
162             if (name != null &&
163                 attr.attribute != null &&
164                 attr.attribute.equals(name))
165                 break;
166         }
167 
168         return attr;
169     }
170 
171     /* default method for checking an element's attributes */
172     public void checkAttributes( Lexer lexer )
173     {
174         AttVal attval;
175 
176         for (attval = this.attributes; attval != null; attval = attval.next)
177             attval.checkAttribute( lexer, this );
178     }
179 
180     public void checkUniqueAttributes(Lexer lexer)
181     {
182         AttVal attval;
183 
184         for (attval = this.attributes; attval != null; attval = attval.next) {
185             if (attval.asp == null && attval.php == null)
186                 attval.checkUniqueAttribute(lexer, this);
187         }
188     }
189 
190     public void addAttribute(String   name, String   value)
191     {
192         AttVal av = new AttVal(null, null, null, null,
193                                '"', name, value);
194         av.dict =
195           AttributeTable.getDefaultAttributeTable().findAttribute(av);
196 
197         if (this.attributes == null)
198             this.attributes = av;
199         else /* append to end of attributes */
200         {
201             AttVal here = this.attributes;
202 
203             while (here.next != null)
204                 here = here.next;
205 
206             here.next = av;
207         }
208     }
209 
210     /* remove attribute from node then free it */
211     public void removeAttribute(AttVal attr)
212     {
213         AttVal av;
214         AttVal prev = null;
215         AttVal next;
216 
217         for (av = this.attributes; av != null; av = next)
218         {
219             next = av.next;
220 
221             if (av == attr)
222             {
223                 if (prev != null)
224                     prev.next = next;
225                 else
226                     this.attributes = next;
227             }
228             else
229                 prev = av;
230     }
231     }
232 
233     /* find doctype element */
234     public Node findDocType()
235     {
236         Node node;
237 
238         for (node = this.content; 
239             node != null && node.type != DocTypeTag; node = node.next);
240 
241         return node;
242     }
243 
244     public void discardDocType()
245     {
246         Node node;
247 
248         node = findDocType();
249         if (node != null)
250         {
251             if (node.prev != null)
252                 node.prev.next = node.next;
253             else
254                 node.parent.content = node.next;
255 
256             if (node.next != null)
257                 node.next.prev = node.prev;
258 
259             node.next = null;
260         }
261     }
262 
263     /* remove node from markup tree and discard it */
264     public static Node discardElement(Node element)
265     {
266         Node next = null;
267 
268         if (element != null)
269         {
270             next = element.next;
271             removeNode(element);
272         }
273 
274         return next;
275     }
276 
277     /* insert node into markup tree */
278     public static void insertNodeAtStart(Node element, Node node)
279     {
280         node.parent = element;
281 
282         if (element.content == null)
283             element.last = node;
284         else
285             element.content.prev = node; // AQ added 13 Apr 2000
286 
287         node.next = element.content;
288         node.prev = null;
289         element.content = node;
290     }
291 
292     /* insert node into markup tree */
293     public static void insertNodeAtEnd(Node element, Node node)
294     {
295         node.parent = element;
296         node.prev = element.last;
297 
298         if (element.last != null)
299             element.last.next = node;
300         else
301             element.content = node;
302 
303         element.last = node;
304     }
305 
306     /*
307      insert node into markup tree in pace of element
308      which is moved to become the child of the node
309     */
310     public static void insertNodeAsParent(Node element, Node node)
311     {
312         node.content = element;
313         node.last = element;
314         node.parent = element.parent;
315         element.parent = node;
316     
317         if (node.parent.content == element)
318             node.parent.content = node;
319 
320         if (node.parent.last == element)
321             node.parent.last = node;
322 
323         node.prev = element.prev;
324         element.prev = null;
325 
326         if (node.prev != null)
327             node.prev.next = node;
328 
329         node.next = element.next;
330         element.next = null;
331 
332         if (node.next != null)
333             node.next.prev = node;
334     }
335 
336     /* insert node into markup tree before element */
337     public static void insertNodeBeforeElement(Node element, Node node)
338     {
339         Node parent;
340 
341         parent = element.parent;
342         node.parent = parent;
343         node.next = element;
344         node.prev = element.prev;
345         element.prev = node;
346 
347         if (node.prev != null)
348             node.prev.next = node;
349 
350         if (parent.content == element)
351             parent.content = node;
352     }
353 
354     /* insert node into markup tree after element */
355     public static void insertNodeAfterElement(Node element, Node node)
356     {
357         Node parent;
358 
359         parent = element.parent;
360         node.parent = parent;
361 
362         // AQ - 13Jan2000 fix for parent == null
363         if (parent != null && parent.last == element)
364             parent.last = node;
365         else
366         {
367             node.next = element.next;
368             // AQ - 13Jan2000 fix for node.next == null
369             if (node.next != null)
370                 node.next.prev = node;
371         }
372 
373         element.next = node;
374         node.prev = element;
375     }
376 
377     public static void trimEmptyElement(Lexer lexer, Node element)
378     {
379         TagTable tt = lexer.configuration.tt;
380 
381         if (lexer.canPrune(element))
382         {
383             if (element.type != TextNode)
384                 Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
385 
386             discardElement(element);
387         }
388         else if (element.tag == tt.tagP && element.content == null)
389         {
390             /* replace <p></p> by <br><br> to preserve formatting */
391             Node node = lexer.inferredTag("br");
392             Node.coerceNode(lexer, element, tt.tagBr);
393             Node.insertNodeAfterElement(element, node);
394         }
395     }
396 
397     /*
398       This maps 
399            <em>hello </em><strong>world</strong>
400       to
401            <em>hello</em> <strong>world</strong>
402 
403       If last child of element is a text node
404       then trim trailing white space character
405       moving it to after element's end tag.
406     */
407     public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
408     {
409         byte c;
410         TagTable tt = lexer.configuration.tt;
411 
412         if (last != null && last.type == Node.TextNode &&
413             last.end > last.start)
414         {
415             c = lexer.lexbuf[last.end - 1];
416 
417             if (c == 160 || c == (byte)' ')
418             {
419                 /* take care with <td>&nbsp;</td> */
420                 if (element.tag == tt.tagTd ||
421                     element.tag == tt.tagTh)
422                 {
423                     if (last.end > last.start + 1)
424                         last.end -= 1;
425                 }
426                 else
427                 {
428                     last.end -= 1;
429 
430                     if (((element.tag.model & Dict.CM_INLINE) != 0) &&
431                             !((element.tag.model & Dict.CM_FIELD) != 0))
432                         lexer.insertspace = true;
433 
434                     /* if empty string then delete from parse tree */
435                     if (last.start == last.end)
436                         trimEmptyElement(lexer, last);
437                 }
438             }
439         }
440     }
441 
442     /*
443       This maps 
444            <p>hello<em> world</em>
445       to
446            <p>hello <em>world</em>
447 
448       Trims initial space, by moving it before the
449       start tag, or if this element is the first in
450       parent's content, then by discarding the space
451     */
452     public static void trimInitialSpace(Lexer lexer, Node element, Node text)
453     {
454         Node prev, node;
455 
456         // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
457         //      31-Oct-00. 
458         if (text.type == TextNode && text.textarray[text.start] == (byte)' ' 
459                            && (text.start < text.end))
460         {
461             if (((element.tag.model & Dict.CM_INLINE) != 0) &&
462                 !((element.tag.model & Dict.CM_FIELD) != 0) &&
463                 element.parent.content != element)
464             {
465                 prev = element.prev;
466 
467                 if (prev != null && prev.type == TextNode)
468                 {
469                     if (prev.textarray[prev.end - 1] != (byte)' ')
470                         prev.textarray[prev.end++] = (byte)' ';
471 
472                     ++element.start;
473                 }
474                 else /* create new node */
475                 {
476                     node = lexer.newNode();
477                     // Local fix for bug 228486 (GLP).  This handles the case
478                     // where we need to create a preceeding text node but there are
479                     // no "slots" in textarray that we can steal from the current
480                     // element.  Therefore, we create a new textarray containing
481                     // just the blank.  When Tidy is fixed, this should be removed.
482                     if (element.start >= element.end)
483                     {
484                         node.start = 0;
485                         node.end = 1;
486                         node.textarray = new byte[1];
487                     }
488                     else
489                     {
490                         node.start = element.start++;
491                         node.end = element.start;
492                         node.textarray = element.textarray;
493                     }
494                     node.textarray[node.start] = (byte)' ';
495                     node.prev = prev;
496                     if (prev != null)
497                         prev.next = node;
498                     node.next = element;
499                     element.prev = node;
500                     node.parent = element.parent;
501                 }
502             }
503 
504             /* discard the space  in current node */
505             ++text.start;
506         }
507     }
508 
509     /* 
510       Move initial and trailing space out.
511       This routine maps:
512 
513            hello<em> world</em>
514       to
515            hello <em>world</em>
516       and
517            <em>hello </em><strong>world</strong>
518       to
519            <em>hello</em> <strong>world</strong>
520     */
521     public static void trimSpaces(Lexer lexer, Node element)
522     {
523         Node text = element.content;
524         TagTable tt = lexer.configuration.tt;
525 
526         if (text != null && text.type == Node.TextNode &&
527             element.tag != tt.tagPre)
528             trimInitialSpace(lexer, element, text);
529 
530         text = element.last;
531 
532         if (text != null && text.type == Node.TextNode)
533             trimTrailingSpace(lexer, element, text);
534     }
535 
536     public boolean isDescendantOf(Dict tag)
537     {
538         Node parent;
539 
540         for (parent = this.parent;
541                 parent != null; parent = parent.parent)
542         {
543             if (parent.tag == tag)
544                 return true;
545         }
546 
547         return false;
548     }
549 
550     /*
551      the doctype has been found after other tags,
552      and needs moving to before the html element
553     */
554     public static void insertDocType(Lexer lexer, Node element, Node doctype)
555     {
556         TagTable tt = lexer.configuration.tt;
557       
558         Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
559 
560         while (element.tag != tt.tagHtml)
561             element = element.parent;
562 
563         insertNodeBeforeElement(element, doctype);
564     }
565 
566     public Node findBody(TagTable tt)
567     {
568         Node node;
569 
570         node = this.content;
571 
572         while (node != null && node.tag != tt.tagHtml)
573             node = node.next;
574 
575         if (node == null)
576             return null;
577 
578         node = node.content;
579 
580         while (node != null && node.tag != tt.tagBody)
581             node = node.next;
582 
583         return node;
584     }
585 
586     public boolean isElement()
587     {
588         return (this.type == StartTag || this.type == StartEndTag ? true : false);
589     }
590 
591     /*
592      unexpected content in table row is moved to just before
593      the table in accordance with Netscape and IE. This code
594      assumes that node hasn't been inserted into the row.
595     */
596     public static void moveBeforeTable(Node row, Node node, TagTable tt)
597     {
598         Node table;
599 
600         /* first find the table element */
601         for (table = row.parent; table != null; table = table.parent)
602         {
603             if (table.tag == tt.tagTable)
604             {
605                 if (table.parent.content == table)
606                     table.parent.content = node;
607 
608                 node.prev = table.prev;
609                 node.next = table;
610                 table.prev = node;
611                 node.parent = table.parent;
612         
613                 if (node.prev != null)
614                     node.prev.next = node;
615 
616                 break;
617             }
618         }
619     }
620 
621     /*
622      if a table row is empty then insert an empty cell
623      this practice is consistent with browser behavior
624      and avoids potential problems with row spanning cells
625     */
626     public static void fixEmptyRow(Lexer lexer, Node row)
627     {
628         Node cell;
629 
630         if (row.content == null)
631         {
632             cell = lexer.inferredTag("td");
633             insertNodeAtEnd(row, cell);
634             Report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
635         }
636     }
637 
638     public static void coerceNode(Lexer lexer, Node node, Dict tag)
639     {
640         Node tmp = lexer.inferredTag(tag.name);
641         Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
642         node.was = node.tag;
643         node.tag = tag;
644         node.type = StartTag;
645         node.implicit = true;
646         node.element = tag.name;
647     }
648 
649     /* extract a node and its children from a markup tree */
650     public static void removeNode(Node node)
651     {
652         if (node.prev != null)
653             node.prev.next = node.next;
654 
655         if (node.next != null)
656             node.next.prev = node.prev;
657 
658         if (node.parent != null)
659         {
660             if (node.parent.content == node)
661                 node.parent.content = node.next;
662 
663             if (node.parent.last == node)
664                 node.parent.last = node.prev;
665         }
666 
667         node.parent = node.prev = node.next = null;
668     }
669 
670     public static boolean insertMisc(Node element, Node node)
671     {
672         if (node.type == CommentTag ||
673             node.type == ProcInsTag ||
674             node.type == CDATATag ||
675             node.type == SectionTag ||
676             node.type == AspTag ||
677             node.type == JsteTag ||
678             node.type == PhpTag)
679         {
680             insertNodeAtEnd(element, node);
681             return true;
682         }
683 
684         return false;
685     }
686 
687     /*
688      used to determine how attributes
689      without values should be printed
690      this was introduced to deal with
691      user defined tags e.g. Cold Fusion
692     */
693     public static boolean isNewNode(Node node)
694     {
695         if (node != null && node.tag != null)
696         {
697             return ((node.tag.model & Dict.CM_NEW) != 0);
698         }
699 
700         return true;
701     }
702 
703     public boolean hasOneChild()
704     {
705         return (this.content != null && this.content.next == null);
706     }
707 
708     /* find html element */
709     public Node findHTML(TagTable tt)
710     {
711         Node node;
712 
713         for (node = this.content;
714                 node != null && node.tag != tt.tagHtml; node = node.next);
715 
716         return node;
717     }
718 
719     public Node findHEAD(TagTable tt)
720     {
721         Node node;
722 
723         node = this.findHTML(tt);
724 
725         if (node != null)
726         {
727             for (node = node.content;
728                 node != null && node.tag != tt.tagHead;
729                 node = node.next);
730         }
731 
732         return node;
733     }
734 
735     public boolean checkNodeIntegrity()
736     {
737         Node child;
738         boolean found = false;
739 
740         if (this.prev != null)
741         {
742             if (this.prev.next != this)
743                 return false;
744         }
745 
746         if (this.next != null)
747         {
748             if (this.next.prev != this)
749                 return false;
750         }
751 
752         if (this.parent != null)
753         {
754             if (this.prev == null && this.parent.content != this)
755                 return false;
756 
757             if (this.next == null && this.parent.last != this)
758                 return false;
759 
760             for (child = this.parent.content; child != null; child = child.next)
761                 if (child == this)
762                 {
763                     found = true;
764                     break;
765                 }
766 
767             if (!found)
768                 return false;
769         }
770 
771         for (child = this.content; child != null; child = child.next)
772             if (!child.checkNodeIntegrity())
773                 return false;
774 
775         return true;
776     }
777 
778     /*
779      Add class="foo" to node
780     */
781     public static void addClass(Node node, String   classname)
782     {
783         AttVal classattr = node.getAttrByName("class");
784 
785             /*
786              if there already is a class attribute
787              then append class name after a space
788             */
789             if (classattr != null)
790             {
791                 classattr.value = classattr.value + " " + classname;
792             }
793             else /* create new class attribute */
794                 node.addAttribute("class", classname);
795     }
796 
797     /* --------------------- DEBUG -------------------------- */
798 
799     private static final String  [] nodeTypeString =
800     {
801         "RootNode",
802         "DocTypeTag",
803         "CommentTag",
804         "ProcInsTag",
805         "TextNode",
806         "StartTag",
807         "EndTag",
808         "StartEndTag",
809         "SectionTag",
810         "AspTag",
811         "PhpTag"
812     };
813 
814     public String   toString()
815     {
816         String   s = "";
817         Node n = this;
818 
819         while (n != null) {
820             s += "[Node type=";
821             s += nodeTypeString[n.type];
822             s += ",element=";
823             if (n.element != null)
824                 s += n.element;
825             else
826                 s += "null";
827             if (n.type == TextNode ||
828                 n.type == CommentTag ||
829                 n.type == ProcInsTag) {
830                 s += ",text=";
831                 if (n.textarray != null && n.start <= n.end) {
832                     s += "\"";
833                     s += Lexer.getString(n.textarray, n.start, n.end - n.start);
834                     s += "\"";
835                 } else {
836                     s += "null";
837                 }
838             }
839             s += ",content=";
840             if (n.content != null)
841                 s += n.content.toString();
842             else
843                 s += "null";
844             s += "]";
845             if (n.next != null)
846                 s += ",";
847             n = n.next;
848         }
849         return s;
850     }
851     /* --------------------- END DEBUG ---------------------- */
852 
853 
854     /* --------------------- DOM ---------------------------- */
855 
856     protected org.w3c.dom.Node   adapter = null;
857 
858     protected org.w3c.dom.Node   getAdapter()
859     {
860         if (adapter == null)
861         {
862             switch (this.type)
863             {
864                 case RootNode:
865                     adapter = new DOMDocumentImpl(this);
866                     break;
867                 case StartTag:
868                 case StartEndTag:
869                     adapter = new DOMElementImpl(this);
870                     break;
871                 case DocTypeTag:
872                     adapter = new DOMDocumentTypeImpl(this);
873                     break;
874                 case CommentTag:
875                     adapter = new DOMCommentImpl(this);
876                     break;
877                 case TextNode:
878                     adapter = new DOMTextImpl(this);
879                     break;
880                 case CDATATag:
881                     adapter = new DOMCDATASectionImpl(this);
882                     break;
883                 case ProcInsTag:
884                     adapter = new DOMProcessingInstructionImpl(this);
885                     break;
886                 default:
887                     adapter = new DOMNodeImpl(this);
888             }
889         }
890         return adapter;
891     }
892 
893     protected Node cloneNode(boolean deep)
894     {
895         Node node = (Node)this.clone();
896         if (deep)
897         {
898             Node child;
899             Node newChild;
900             for (child = this.content; child != null; child = child.next)
901             {
902                 newChild = child.cloneNode(deep);
903                 insertNodeAtEnd(node, newChild);
904             }
905         }
906         return node;
907     }
908 
909 
910     protected void setType(short newType)
911     {
912         this.type = newType;
913     }
914 
915     /* --------------------- END DOM ------------------------ */
916 
917 }
918
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags