KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > nodes > AbstractNode


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes/AbstractNode.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/07/17 13:45:04 $
10
// $Revision: 1.3 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.nodes;
28
29 import java.io.Serializable JavaDoc;
30
31 import org.htmlparser.Node;
32 import org.htmlparser.NodeFilter;
33 import org.htmlparser.lexer.Page;
34 import org.htmlparser.util.NodeList;
35 import org.htmlparser.util.ParserException;
36 import org.htmlparser.visitors.NodeVisitor;
37
38 /**
39  * AbstractNode, which implements the Node interface, is the base class for all types of nodes, including tags, string elements, etc
40  */

41 public abstract class AbstractNode implements Node, Serializable JavaDoc
42 {
43     /**
44      * The page this node came from.
45      */

46     protected Page mPage;
47
48     /**
49      * The beginning position of the tag in the line
50      */

51     protected int nodeBegin;
52
53     /**
54      * The ending position of the tag in the line
55      */

56     protected int nodeEnd;
57
58     /**
59      * The parent of this node.
60      */

61     protected Node parent;
62
63     /**
64      * The children of this node.
65      */

66     protected NodeList children;
67
68     /**
69      * Create an abstract node with the page positions given.
70      * Remember the page and start & end cursor positions.
71      * @param page The page this tag was read from.
72      * @param start The starting offset of this node within the page.
73      * @param end The ending offset of this node within the page.
74      */

75     public AbstractNode (Page page, int start, int end)
76     {
77         mPage = page;
78         nodeBegin = start;
79         nodeEnd = end;
80         parent = null;
81         children = null;
82     }
83
84     /**
85      * Clone this object.
86      * Exposes java.lang.Object clone as a public method.
87      * @return A clone of this object.
88      * @exception CloneNotSupportedException This shouldn't be thrown since
89      * the {@link Node} interface extends Cloneable.
90      */

91     public Object JavaDoc clone() throws CloneNotSupportedException JavaDoc
92     {
93         return (super.clone ());
94     }
95
96     /**
97      * Returns a string representation of the node. This is an important method, it allows a simple string transformation
98      * of a web page, regardless of a node.<br>
99      * Typical application code (for extracting only the text from a web page) would then be simplified to :<br>
100      * <pre>
101      * Node node;
102      * for (Enumeration e = parser.elements();e.hasMoreElements();) {
103      * node = (Node)e.nextElement();
104      * System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string
105      * }
106      * </pre>
107      */

108     public abstract String JavaDoc toPlainTextString();
109
110     /**
111      * This method will make it easier when using html parser to reproduce html pages (with or without modifications)
112      * Applications reproducing html can use this method on nodes which are to be used or transferred as they were
113      * recieved, with the original html
114      */

115     public abstract String JavaDoc toHtml();
116
117     /**
118      * Return the string representation of the node.
119      * Subclasses must define this method, and this is typically to be used in the manner<br>
120      * <pre>System.out.println(node)</pre>
121      * @return java.lang.String
122      */

123     public abstract String JavaDoc toString();
124
125     /**
126      * Collect this node and its child nodes (if-applicable) into the collectionList parameter, provided the node
127      * satisfies the filtering criteria.<P>
128      *
129      * This mechanism allows powerful filtering code to be written very easily,
130      * without bothering about collection of embedded tags separately.
131      * e.g. when we try to get all the links on a page, it is not possible to
132      * get it at the top-level, as many tags (like form tags), can contain
133      * links embedded in them. We could get the links out by checking if the
134      * current node is a {@link org.htmlparser.tags.CompositeTag}, and going through its children.
135      * So this method provides a convenient way to do this.<P>
136      *
137      * Using collectInto(), programs get a lot shorter. Now, the code to
138      * extract all links from a page would look like:
139      * <pre>
140      * NodeList collectionList = new NodeList();
141      * NodeFilter filter = new TagNameFilter ("A");
142      * for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
143      * e.nextNode().collectInto(collectionList, filter);
144      * </pre>
145      * Thus, collectionList will hold all the link nodes, irrespective of how
146      * deep the links are embedded.<P>
147      *
148      * Another way to accomplish the same objective is:
149      * <pre>
150      * NodeList collectionList = new NodeList();
151      * NodeFilter filter = new TagClassFilter (LinkTag.class);
152      * for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
153      * e.nextNode().collectInto(collectionList, filter);
154      * </pre>
155      * This is slightly less specific because the LinkTag class may be
156      * registered for more than one node name, e.g. &lt;LINK&gt; tags too.
157      */

158     public void collectInto (NodeList list, NodeFilter filter)
159     {
160         if (filter.accept (this))
161             list.add (this);
162     }
163
164     /**
165      * Returns the beginning position of the tag.
166      * @deprecated Use {@link #getStartPosition}.
167      */

168     public int elementBegin()
169     {
170         return (getStartPosition ());
171     }
172
173     /**
174      * Returns the ending position fo the tag
175      * @deprecated Use {@link #getEndPosition}.
176      */

177     public int elementEnd()
178     {
179         return (getEndPosition ());
180     }
181
182     /**
183      * Get the page this node came from.
184      * @return The page that supplied this node.
185      */

186     public Page getPage ()
187     {
188         return (mPage);
189     }
190
191     /**
192      * Set the page this node came from.
193      * @param page The page that supplied this node.
194      */

195     public void setPage (Page page)
196     {
197         mPage = page;
198     }
199
200     /**
201      * Gets the starting position of the node.
202      * @return The start position.
203      */

204     public int getStartPosition ()
205     {
206         return (nodeBegin);
207     }
208
209     /**
210      * Sets the starting position of the node.
211      * @param position The new start position.
212      */

213     public void setStartPosition (int position)
214     {
215         nodeBegin = position;
216     }
217
218     /**
219      * Gets the ending position of the node.
220      * @return The end position.
221      */

222     public int getEndPosition ()
223     {
224         return (nodeEnd);
225     }
226
227     /**
228      * Sets the ending position of the node.
229      * @param position The new end position.
230      */

231     public void setEndPosition (int position)
232     {
233         nodeEnd = position;
234     }
235
236     public abstract void accept (NodeVisitor visitor);
237
238     /**
239      * @deprecated - use toHtml() instead
240      */

241     public final String JavaDoc toHTML() {
242         return toHtml();
243     }
244
245     /**
246      * Get the parent of this node.
247      * This will always return null when parsing without scanners,
248      * i.e. if semantic parsing was not performed.
249      * The object returned from this method can be safely cast to a <code>CompositeTag</code>.
250      * @return The parent of this node, if it's been set, <code>null</code> otherwise.
251      */

252     public Node getParent ()
253     {
254         return (parent);
255     }
256
257     /**
258      * Sets the parent of this node.
259      * @param node The node that contains this node. Must be a <code>CompositeTag</code>.
260      */

261     public void setParent (Node node)
262     {
263         parent = node;
264     }
265
266     /**
267      * Get the children of this node.
268      * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise.
269      */

270     public NodeList getChildren ()
271     {
272         return (children);
273     }
274
275     /**
276      * Set the children of this node.
277      * @param children The new list of children this node contains.
278      */

279     public void setChildren (NodeList children)
280     {
281         this.children = children;
282     }
283
284     /**
285      * Returns the text of the string line
286      */

287     public String JavaDoc getText() {
288         return null;
289     }
290
291     /**
292      * Sets the string contents of the node.
293      * @param text The new text for the node.
294      */

295     public void setText(String JavaDoc text) {
296
297     }
298
299     /**
300      * Perform the meaning of this tag.
301      * The default action is to do nothing.
302      */

303     public void doSemanticAction () throws ParserException
304     {
305     }
306 }
307
Popular Tags