KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > util > NodeTreeWalker


1 // HTMLParser Library - A java-based parser for HTML
2
// http://htmlparser.org
3
// Copyright (C) 2006 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $URL: https://svn.sourceforge.net/svnroot/htmlparser/trunk/parser/src/main/java/org/htmlparser/util/NodeTreeWalker.java $
8
// $Author: derrickoswald $
9
// $Date: 2006-09-16 10:44:17 -0400 (Sat, 16 Sep 2006) $
10
// $Revision: 4 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the Common Public License; either
14
// version 1.0 of the License, or (at your option) any later version.
15
//
16
// This library is distributed in the hope that it will be useful,
17
// but WITHOUT ANY WARRANTY; without even the implied warranty of
18
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
// Common Public License for more details.
20
//
21
// You should have received a copy of the Common Public License
22
// along with this library; if not, the license is available from
23
// the Open Source Initiative (OSI) website:
24
// http://opensource.org/licenses/cpl1.0.php
25

26 package org.htmlparser.util;
27
28 import org.htmlparser.Node;
29
30 /**
31  * A class for walking a tree of {@link Node} objects, in either a depth-first or breadth-first manner.
32  * The following two diagrams show the represent tree traversal with the two different methods.
33  * <table>
34  * <tr>
35  * <th>Depth-first traversal</th>
36  * <th>Breadth-first traversal</th>
37  * </tr>
38  * <tr>
39  * <img SRC="http://htmlparser.sourceforge.net/tree-traversal-depth-first.gif" alt="Diagram showing depth-first tree traversal" width="300" height="300" />
40  * </tr>
41  * <tr>
42  * <img SRC="http://htmlparser.sourceforge.net/tree-traversal-breadth-first.gif" alt="Diagram showing breadth-first tree traversal" width="300" height="300" />
43  * </tr>
44  * </table>
45  * @author ian_macfarlane
46  */

47 public class NodeTreeWalker implements NodeIterator
48 {
49
50     /**
51      * The root Node element which defines the scope of the current tree to walk.
52      */

53     protected Node mRootNode;
54     
55     /**
56      * The current Node element, which will be a child of the root Node, or null.
57      */

58     protected Node mCurrentNode;
59     
60     /**
61      * The next Node element after the current Node element.
62      * Stored for internal use only.
63      */

64     protected Node mNextNode;
65     
66     /**
67      * The maximum depth (child-parent links) from which this NodeTreeWalker may be removed from the root Node.
68      * A value of -1 indicates that there is no depth restriction.
69      */

70     protected int mMaxDepth;
71     
72     /**
73      * Whether the tree traversal method used is depth-first (default) or breadth-first.
74      */

75     protected boolean mDepthFirst;
76     
77     /**
78      * Creates a new instance of NodeTreeWalker using depth-first tree traversal, without limits on how deep it may traverse.
79      * @param rootNode Node The Node to set as the root of the tree.
80      * @throws NullPointerException if root Node is null.
81      */

82     public NodeTreeWalker(Node rootNode)
83     {
84         this(rootNode, true, -1);
85     }
86     
87     /**
88      * Creates a new instance of NodeTreeWalker using the specified type of tree traversal, without limits on how deep it may traverse.
89      * @param rootNode The Node to set as the root of the tree.
90      * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal.
91      * @throws NullPointerException if rootNode is null.
92      */

93     public NodeTreeWalker(Node rootNode, boolean depthFirst)
94     {
95         this(rootNode, depthFirst, -1);
96     }
97     
98     /**
99      * Creates a new instance of NodeTreeWalker using the specified type of tree traversal and maximum depth from the root Node to traverse.
100      * @param rootNode The Node to set as the root of the tree.
101      * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal.
102      * @param maxDepth The maximum depth from the root Node that this NodeTreeWalker may traverse. This must be > 0 or equal to -1.
103      * @throws NullPointerException if rootNode is null.
104      * @throws IllegalArgumentException maxDepth is not > 0 or equal to -1.
105      */

106     public NodeTreeWalker(Node rootNode, boolean depthFirst, int maxDepth)
107     {
108         //check maxDepth is valid
109
if ( ! ((maxDepth >= 1) || (maxDepth == -1)))//if not one of these valid possibilities
110
throw new IllegalArgumentException JavaDoc("Paramater maxDepth must be > 0 or equal to -1.");
111         initRootNode(rootNode);//this method also checks if rootNode is valid
112
this.mDepthFirst = depthFirst;
113         this.mMaxDepth = maxDepth;
114     }
115     
116     /**
117      * Whether the NodeTreeWalker is currently set to use depth-first or breadth-first tree traversal.
118      * @return True if depth-first tree-traversal is used, or false if breadth-first tree-traversal is being used.
119      */

120     public boolean isDepthFirst()
121     {
122         return (this.mDepthFirst);
123     }
124     
125     /**
126      * Sets whether the NodeTreeWalker should use depth-first or breadth-first tree traversal.
127      * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal.
128      */

129     public void setDepthFirst(boolean depthFirst)
130     {
131         if (this.mDepthFirst != depthFirst)//if we are changing search pattern
132
this.mNextNode = null;
133         this.mDepthFirst = depthFirst;
134     }
135     
136     /**
137      * The maximum depth (number of child-parent links) below the root Node that this NodeTreeWalker may traverse.
138      * @return The maximum depth that this NodeTreeWalker can traverse to.
139      */

140     public int getMaxDepth()
141     {
142         return (this.mMaxDepth);
143     }
144     
145     /**
146      * Removes any restrictions in place that prevent this NodeTreeWalker from traversing beyond a certain depth.
147      */

148     public void removeMaxDepthRestriction()
149     {
150         this.mMaxDepth = -1;
151     }
152     
153     /**
154      * Get the root Node that defines the scope of the tree to traverse.
155      * @return The root Node.
156      */

157     public Node getRootNode()
158     {
159         return (this.mRootNode);
160     }
161     
162     /**
163      * Get the Node in the tree that the NodeTreeWalker is current at.
164      * @return The current Node.
165      */

166     public Node getCurrentNode()
167     {
168         return (this.mCurrentNode);
169     }
170     
171     /**
172      * Sets the current Node as the root Node.
173      * Resets the current position in the tree.
174      * @throws NullPointerException if the current Node is null (i.e. if the tree traversal has not yet begun).
175      */

176     public void setCurrentNodeAsRootNode() throws NullPointerException JavaDoc
177     {
178         if (this.mCurrentNode == null)
179             throw new NullPointerException JavaDoc("Current Node is null, cannot set as root Node.");
180         initRootNode(this.mCurrentNode);
181     }
182     
183     /**
184      * Sets the specified Node as the root Node.
185      * Resets the current position in the tree.
186      * @param rootNode The Node to set as the root of the tree.
187      * @throws NullPointerException if rootNode is null.
188      */

189     public void setRootNode(Node rootNode) throws NullPointerException JavaDoc
190     {
191         initRootNode(rootNode);
192     }
193     
194     /**
195      * Resets the current position in the tree,
196      * such that calling <code>nextNode()</code> will return the first Node again.
197      */

198     public void reset()
199     {
200         this.mCurrentNode = null;
201         this.mNextNode = null;
202     }
203     
204     /**
205      * Traverses to the next Node from the current Node, using either depth-first or breadth-first tree traversal as appropriate.
206      * @return The next Node from the current Node.
207      */

208     public Node nextNode()
209     {
210         if (this.mNextNode != null)//check if we've already found the next Node by calling hasMoreNodes()
211
{
212             this.mCurrentNode = this.mNextNode;
213             this.mNextNode = null;//reset mNextNode
214
}
215         else
216         {
217             //Check if we have started traversing yet. If not, start with first child (for either traversal method).
218
if (this.mCurrentNode == null)
219                 this.mCurrentNode = this.mRootNode.getFirstChild();
220             else
221             {
222                 if (this.mDepthFirst)
223                     this.mCurrentNode = getNextNodeDepthFirst();
224                 else
225                     this.mCurrentNode = getNextNodeBreadthFirst();
226             }
227         }
228         return (this.mCurrentNode);
229     }
230     
231     /**
232      * Get the number of places down that the current Node is from the root Node.
233      * Returns 1 if current Node is a child of the root Node.
234      * Returns 0 if this NodeTreeWalker has not yet traversed to any Nodes.
235      * @return The depth the current Node is from the root Node.
236      */

237     public int getCurrentNodeDepth()
238     {
239         int depth = 0;
240         if (this.mCurrentNode != null)//if we are not at the root Node.
241
{
242             Node traverseNode = this.mCurrentNode;
243             while (traverseNode != this.mRootNode)
244             {
245                 ++depth;
246                 traverseNode = traverseNode.getParent();
247             }
248         }
249         return (depth);
250     }
251     
252     /**
253      * Returns whether or not there are more nodes available based on the current configuration of this NodeTreeWalker.
254      * @return True if there are more Nodes available, based on the current configuration, or false otherwise.
255      */

256     public boolean hasMoreNodes()
257     {
258         if (this.mNextNode == null)//if we've already generated mNextNode
259
{
260             if (this.mCurrentNode == null)
261                 this.mNextNode = this.mRootNode.getFirstChild();
262             else
263             {
264                 if (this.mDepthFirst)
265                     this.mNextNode = getNextNodeDepthFirst();
266                 else
267                     this.mNextNode = getNextNodeBreadthFirst();
268             }
269         }
270         return (this.mNextNode != null);
271     }
272     
273     /**
274      * Sets the root Node to be the given Node.
275      * Resets the current position in the tree.
276      * @param rootNode The Node to set as the root of the tree.
277      * @throws NullPointerException if rootNode is null.
278      */

279     protected void initRootNode(Node rootNode) throws NullPointerException JavaDoc
280     {
281         if (rootNode == null)
282             throw new NullPointerException JavaDoc("Root Node cannot be null.");
283         this.mRootNode = rootNode;
284         this.mCurrentNode = null;
285         this.mNextNode = null;
286     }
287     
288     /**
289      * Traverses to the next Node from the current Node using depth-first tree traversal
290      * @return The next Node from the current Node using depth-first tree traversal.
291      */

292     protected Node getNextNodeDepthFirst()
293     {
294         //loosely based on http://www.myarch.com/treeiter/traditways.jhtml
295
int currentDepth = getCurrentNodeDepth();
296         Node traverseNode = null;
297         if ((this.mMaxDepth == -1) || (currentDepth < this.mMaxDepth))//if it is less than max depth, then getting first child won't be more than max depth
298
{
299             traverseNode = this.mCurrentNode.getFirstChild();
300             if (traverseNode != null)
301                 return (traverseNode);
302         }
303         
304         traverseNode = this.mCurrentNode;
305         
306         Node tempNextSibling = null;//keeping a reference to this this saves calling getNextSibling once later
307
while ((traverseNode != this.mRootNode) && (tempNextSibling = traverseNode.getNextSibling()) == null)//CANNOT assign traverseNode as root Node
308
traverseNode = traverseNode.getParent();// use child-parent link to get to the parent level
309

310         return (tempNextSibling);//null if ran out of Node's
311
}
312     
313     /**
314      * Traverses to the next Node from the current Node using breadth-first tree traversal
315      * @return The next Node from the current Node using breadth-first tree traversal.
316      */

317     protected Node getNextNodeBreadthFirst()
318     {
319         Node traverseNode;
320         
321         //see if the mCurrentNode has a sibling after it
322
traverseNode = this.mCurrentNode.getNextSibling();
323         if (traverseNode != null)
324             return (traverseNode);
325         
326         int depth = getCurrentNodeDepth();
327         
328         //try and find the next Node at the same depth that is not a sibling
329

330         NodeList traverseNodeList;
331         
332         //step up to the parent Node to look through its children
333
traverseNode = this.mCurrentNode.getParent();
334         int currentDepth = depth - 1;
335         
336         while(currentDepth > 0)//this is safe as we've tried getNextSibling already
337
{
338             Node tempNextSibling = null;//keeping a reference to this this saves calling getNextSibling once later
339
//go to first parent with nextSibling, then to that sibling
340
while(((tempNextSibling = traverseNode.getNextSibling()) == null) && (traverseNode != this.mRootNode))//CAN assign traverseNode as root Node
341
{
342                 traverseNode = traverseNode.getParent();
343                 --currentDepth;
344             }
345             
346             //if have traversed back to the root Node, skip to next part where it finds the first Node at the next depth down
347
if (traverseNode == this.mRootNode)
348                 break;
349             
350             traverseNode = tempNextSibling;
351             
352             if (traverseNode != null)
353             {
354                 //go through children of that sibling
355
traverseNodeList = traverseNode.getChildren();
356                 while((traverseNodeList != null) && (traverseNodeList.size() != 0))
357                 {
358                     traverseNode = traverseNode.getFirstChild();
359                     ++currentDepth;
360                     if (currentDepth == depth)
361                         return (traverseNode);//found the next Node at the current depth
362
else
363                         traverseNodeList = traverseNode.getChildren();
364                 } // while((traverseNodeList != null) && (traverseNodeList.size() != 0))
365
} // if (traverseNode != null)
366
} // while(currentDepth > 0)
367

368         //step to the next depth down
369

370         //check first whether we are about to go past max depth
371
if (this.mMaxDepth != -1)//if -1, then there is no max depth restriction
372
{
373             if (depth >= this.mMaxDepth)
374                 return (null);//can't go past max depth
375
}
376         
377         traverseNode = this.mRootNode.getFirstChild();
378         ++depth;//look for next depth
379
currentDepth = 1;
380         while(currentDepth > 0)
381         {
382             //go through children of that sibling
383
traverseNodeList = traverseNode.getChildren();
384             while((traverseNodeList != null) && (traverseNodeList.size() != 0))
385             {
386                 traverseNode = traverseNode.getFirstChild();
387                 ++currentDepth;
388                 if (currentDepth == depth)
389                     return (traverseNode);//found the next Node at the current depth
390
else
391                     traverseNodeList = traverseNode.getChildren();
392             } // while((traverseNodeList != null) && (traverseNodeList.size() != 0))
393

394             //go to first parent with nextSibling, then to that sibling
395
while((traverseNode.getNextSibling() == null) && (traverseNode != this.mRootNode))
396             {
397                 traverseNode = traverseNode.getParent();
398                 --currentDepth;
399             }
400             traverseNode = traverseNode.getNextSibling();
401             if (traverseNode == null)//if null (i.e. reached end of tree), return null
402
return (null);
403         } // while(currentDepth > 0)
404

405         //otherwise, finished searching, return null
406
return (null);
407     }
408     
409     
410     // todo
411

412     // previousNode()
413
// getPreviousNodeDepthFirst()
414
// getPreviousNodeBreadthFirst()
415
// hasPreviousNodes() ?
416
// these should be specificed in an interface - suggest something like ReversableNodeIterator (extends NodeIterator)
417
// possible optimisations: when doing mNextNode, we should save mCurrentNode as previousNode, and vice versa
418
}
Popular Tags