KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > meterware > httpunit > ParsedHTML


1 /*
2  * fgiust
3  * PATCHED to add support for table rows
4  */

5 package com.meterware.httpunit;
6
7 import java.io.IOException JavaDoc;
8 import java.net.URL JavaDoc;
9 import java.util.ArrayList JavaDoc;
10 import java.util.Arrays JavaDoc;
11 import java.util.HashMap JavaDoc;
12 import java.util.Iterator JavaDoc;
13 import java.util.List JavaDoc;
14
15 import org.w3c.dom.Document JavaDoc;
16 import org.w3c.dom.Element JavaDoc;
17 import org.w3c.dom.Node JavaDoc;
18 import org.w3c.dom.NodeList JavaDoc;
19
20 import com.meterware.httpunit.scripting.ScriptableDelegate;
21
22
23 /**
24  * @author <a HREF="mailto:russgold@httpunit.org">Russell Gold</a>
25  * @author <a HREF="mailto:bx@bigfoot.com">Benoit Xhenseval</a>
26  */

27 class ParsedHTML
28 {
29
30     final static private HTMLElement[] NO_ELEMENTS = new HTMLElement[0];
31
32     final static private String JavaDoc[] TEXT_ELEMENTS = {"p", "h1", "h2", "h3", "h4", "h5", "h6"};
33
34     private Node JavaDoc _rootNode;
35
36     protected URL JavaDoc _baseURL;
37
38     protected FrameSelector _frame;
39
40     protected String JavaDoc _baseTarget;
41
42     protected String JavaDoc _characterSet;
43
44     private WebResponse _response;
45
46     private boolean _updateElements = true;
47
48     private boolean _enableNoScriptNodes;
49
50     /** map of element IDs to elements. * */
51     private HashMap JavaDoc _elementsByID = new HashMap JavaDoc();
52
53     /** map of element names to lists of elements. * */
54     private HashMap JavaDoc _elementsByName = new HashMap JavaDoc();
55
56     /** map of DOM elements to HTML elements * */
57     private HashMap JavaDoc _elements = new HashMap JavaDoc();
58
59     private ArrayList JavaDoc _formsList = new ArrayList JavaDoc();
60
61     private WebForm[] _forms;
62
63     private WebForm _activeForm;
64
65     private ArrayList JavaDoc _imagesList = new ArrayList JavaDoc();
66
67     private WebImage[] _images;
68
69     private ArrayList JavaDoc _linkList = new ArrayList JavaDoc();
70
71     private WebLink[] _links;
72
73     private ArrayList JavaDoc _blocksList = new ArrayList JavaDoc();
74
75     private TextBlock[] _blocks;
76
77     private ArrayList JavaDoc _appletList = new ArrayList JavaDoc();
78
79     private WebApplet[] _applets;
80
81     private ArrayList JavaDoc _tableList = new ArrayList JavaDoc();
82
83     private WebTable[] _tables;
84
85     private ArrayList JavaDoc _frameList = new ArrayList JavaDoc();
86
87     private WebFrame[] _frames;
88
89     ParsedHTML(
90         WebResponse response,
91         FrameSelector frame,
92         URL JavaDoc baseURL,
93         String JavaDoc baseTarget,
94         Node JavaDoc rootNode,
95         String JavaDoc characterSet)
96     {
97         _response = response;
98         _frame = frame;
99         _baseURL = baseURL;
100         _baseTarget = baseTarget;
101         _rootNode = rootNode;
102         _characterSet = characterSet;
103     }
104
105     /**
106      * Returns the forms found in the page in the order in which they appear.
107      */

108     public WebForm[] getForms()
109     {
110         if (_forms == null)
111         {
112             loadElements();
113             _forms = (WebForm[]) _formsList.toArray(new WebForm[_formsList.size()]);
114         }
115         return _forms;
116     }
117
118     /**
119      * Returns the links found in the page in the order in which they appear.
120      */

121     public WebLink[] getLinks()
122     {
123         if (_links == null)
124         {
125             loadElements();
126             _links = (WebLink[]) _linkList.toArray(new WebLink[_linkList.size()]);
127         }
128         return _links;
129     }
130
131     /**
132      * Returns a proxy for each applet found embedded in this page.
133      */

134     public WebApplet[] getApplets()
135     {
136         if (_applets == null)
137         {
138             loadElements();
139             _applets = (WebApplet[]) _appletList.toArray(new WebApplet[_appletList.size()]);
140         }
141         return _applets;
142     }
143
144     /**
145      * Returns the images found in the page in the order in which they appear.
146      */

147     public WebImage[] getImages()
148     {
149         if (_images == null)
150         {
151             loadElements();
152             _images = (WebImage[]) _imagesList.toArray(new WebImage[_imagesList.size()]);
153         }
154         return _images;
155     }
156
157     /**
158      * Returns the top-level block elements found in the page in the order in which they appear.
159      */

160     public TextBlock[] getTextBlocks()
161     {
162         if (_blocks == null)
163         {
164             loadElements();
165             _blocks = (TextBlock[]) _blocksList.toArray(new TextBlock[_blocksList.size()]);
166         }
167         return _blocks;
168     }
169
170     /**
171      * Returns the first text block found in the page which matches the specified predicate and value.
172      */

173     public TextBlock getFirstMatchingTextBlock(HTMLElementPredicate predicate, Object JavaDoc criteria)
174     {
175         TextBlock[] blocks = getTextBlocks();
176         for (int i = 0; i < blocks.length; i++)
177         {
178             if (predicate.matchesCriteria(blocks[i], criteria))
179                 return blocks[i];
180         }
181         return null;
182     }
183
184     public TextBlock getNextTextBlock(TextBlock block)
185     {
186         int index = _blocksList.indexOf(block);
187         if (index < 0 || index == _blocksList.size() - 1)
188             return null;
189         return (TextBlock) _blocksList.get(index + 1);
190     }
191
192     /**
193      * Returns the top-level tables found in the page in the order in which they appear.
194      */

195     public WebTable[] getTables()
196     {
197         if (_tables == null)
198         {
199             loadElements();
200             _tables = (WebTable[]) _tableList.toArray(new WebTable[_tableList.size()]);
201         }
202         return _tables;
203     }
204
205     /**
206      * Returns the HTMLElement with the specified ID.
207      */

208     public HTMLElement getElementWithID(String JavaDoc id)
209     {
210         return (HTMLElement) getElementWithID(id, HTMLElement.class);
211     }
212
213     /**
214      * Returns the HTML elements with the specified name.
215      */

216     public HTMLElement[] getElementsWithName(String JavaDoc name)
217     {
218         loadElements();
219         ArrayList JavaDoc elements = (ArrayList JavaDoc) _elementsByName.get(name);
220         return elements == null ? NO_ELEMENTS : (HTMLElement[]) elements.toArray(new HTMLElement[elements.size()]);
221     }
222
223     /**
224      * Returns the HTML elements with an attribute with the specified name and value.
225      */

226     public HTMLElement[] getElementsWithAttribute(String JavaDoc name, String JavaDoc value)
227     {
228         loadElements();
229         ArrayList JavaDoc elements = new ArrayList JavaDoc();
230         for (Iterator JavaDoc i = _elements.values().iterator(); i.hasNext();)
231         {
232             HTMLElement element = (HTMLElement) i.next();
233             if (value.equals(element.getAttribute(name)))
234                 elements.add(element);
235         }
236         return (HTMLElement[]) elements.toArray(new HTMLElement[elements.size()]);
237     }
238
239     /**
240      * Returns a list of HTML element names contained in this HTML section.
241      */

242     public String JavaDoc[] getElementNames()
243     {
244         loadElements();
245         return (String JavaDoc[]) _elementsByName.keySet().toArray(new String JavaDoc[_elementsByName.size()]);
246     }
247
248     HTMLElement[] getElementsByTagName(Node JavaDoc dom, String JavaDoc name)
249     {
250         loadElements();
251         if (dom instanceof Element JavaDoc)
252         {
253             return getElementsFromList(((Element JavaDoc) dom).getElementsByTagName(name));
254         }
255         else
256         {
257             return getElementsFromList(((Document JavaDoc) dom).getElementsByTagName(name));
258         }
259     }
260
261     private HTMLElement[] getElementsFromList(NodeList JavaDoc nl)
262     {
263         HTMLElement[] elements = new HTMLElement[nl.getLength()];
264         for (int i = 0; i < elements.length; i++)
265         {
266             Node JavaDoc node = nl.item(i);
267             elements[i] = (HTMLElement) _elements.get(node);
268             if (elements[i] == null)
269             {
270                 elements[i] = toDefaultElement((Element JavaDoc) node);
271                 _elements.put(node, elements[i]);
272             }
273         }
274         return elements;
275     }
276
277     /**
278      * Returns the form found in the page with the specified ID.
279      */

280     public WebForm getFormWithID(String JavaDoc id)
281     {
282         return (WebForm) getElementWithID(id, WebForm.class);
283     }
284
285     /**
286      * Returns the link found in the page with the specified ID.
287      */

288     public WebLink getLinkWithID(String JavaDoc id)
289     {
290         return (WebLink) getElementWithID(id, WebLink.class);
291
292     }
293
294     private Object JavaDoc getElementWithID(String JavaDoc id, final Class JavaDoc klass)
295     {
296         loadElements();
297         return whenCast(_elementsByID.get(id), klass);
298     }
299
300     private Object JavaDoc whenCast(Object JavaDoc o, Class JavaDoc klass)
301     {
302         return klass.isInstance(o) ? o : null;
303     }
304
305     /**
306      * Returns the first link found in the page matching the specified criteria.
307      */

308     public WebForm getFirstMatchingForm(HTMLElementPredicate predicate, Object JavaDoc criteria)
309     {
310         WebForm[] forms = getForms();
311         for (int i = 0; i < forms.length; i++)
312         {
313             if (predicate.matchesCriteria(forms[i], criteria))
314                 return forms[i];
315         }
316         return null;
317     }
318
319     /**
320      * Returns all links found in the page matching the specified criteria.
321      */

322     public WebForm[] getMatchingForms(HTMLElementPredicate predicate, Object JavaDoc criteria)
323     {
324         ArrayList JavaDoc matches = new ArrayList JavaDoc();
325         WebForm[] forms = getForms();
326         for (int i = 0; i < forms.length; i++)
327         {
328             if (predicate.matchesCriteria(forms[i], criteria))
329                 matches.add(forms[i]);
330         }
331         return (WebForm[]) matches.toArray(new WebForm[matches.size()]);
332     }
333
334     /**
335      * Returns the form found in the page with the specified name.
336      */

337     public WebForm getFormWithName(String JavaDoc name)
338     {
339         return getFirstMatchingForm(WebForm.MATCH_NAME, name);
340     }
341
342     private void interpretScriptElement(Element JavaDoc element)
343     {
344         String JavaDoc script = getScript(element);
345         if (script != null)
346         {
347             try
348             {
349                 _updateElements = false;
350                 String JavaDoc language = NodeUtils.getNodeAttribute(element, "language", null);
351                 if (!getResponse().getScriptableObject().supportsScript(language))
352                     _enableNoScriptNodes = true;
353                 getResponse().getScriptableObject().runScript(language, script);
354             }
355             finally
356             {
357                 setRootNode(_rootNode);
358             }
359         }
360     }
361
362     private String JavaDoc getScript(Node JavaDoc scriptNode)
363     {
364         String JavaDoc scriptLocation = NodeUtils.getNodeAttribute(scriptNode, "src", null);
365         if (scriptLocation == null)
366         {
367             return NodeUtils.asText(scriptNode.getChildNodes());
368         }
369         else
370         {
371             try
372             {
373                 return getIncludedScript(scriptLocation);
374             }
375             catch (IOException JavaDoc e)
376             {
377                 throw new RuntimeException JavaDoc("Error loading included script: " + e);
378             }
379         }
380     }
381
382     /**
383      * Returns the contents of an included script, given its src attribute.
384      * @param srcAttribute
385      * @return the contents of the script.
386      * @throws java.io.IOException if there is a problem retrieving the script
387      */

388     String JavaDoc getIncludedScript(String JavaDoc srcAttribute) throws IOException JavaDoc
389     {
390         WebRequest req = new GetMethodWebRequest(getBaseURL(), srcAttribute);
391         WebWindow window = getResponse().getWindow();
392         if (window == null)
393             throw new IllegalStateException JavaDoc(
394                 "Unable to retrieve script included by this response, since it was loaded by getResource(). Use getResponse() instead.");
395         return window.getResource(req).getText();
396     }
397
398     /**
399      * If noscript node content is enabled, returns null - otherwise returns a concealing element.
400      */

401     private HTMLElement toNoscriptElement(Element JavaDoc element)
402     {
403         return _enableNoScriptNodes ? null : new NoScriptElement(element);
404     }
405
406     static class HtmlElementRecorder
407     {
408
409         protected void recordHtmlElement(NodeUtils.PreOrderTraversal pot, Node JavaDoc node, HTMLElement htmlElement)
410         {
411             if (htmlElement != null)
412             {
413                 addToMaps(pot, node, htmlElement);
414                 addToLists(pot, htmlElement);
415             }
416         }
417
418         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
419         {
420             for (Iterator JavaDoc i = pot.getContexts(); i.hasNext();)
421             {
422                 Object JavaDoc o = i.next();
423                 if (o instanceof ParsedHTML)
424                     ((ParsedHTML) o).addToList(htmlElement);
425             }
426         }
427
428         protected void addToMaps(NodeUtils.PreOrderTraversal pot, Node JavaDoc node, HTMLElement htmlElement)
429         {
430             for (Iterator JavaDoc i = pot.getContexts(); i.hasNext();)
431             {
432                 Object JavaDoc o = i.next();
433                 if (o instanceof ParsedHTML)
434                     ((ParsedHTML) o).addToMaps(node, htmlElement);
435             }
436         }
437
438     }
439
440     abstract static class HTMLElementFactory extends HtmlElementRecorder
441     {
442
443         abstract HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element);
444
445         void recordElement(NodeUtils.PreOrderTraversal pot, Element JavaDoc element, ParsedHTML parsedHTML)
446         {
447             HTMLElement htmlElement = toHTMLElement(pot, parsedHTML, element);
448             recordHtmlElement(pot, element, htmlElement);
449         }
450
451         protected boolean isRecognized(ClientProperties properties)
452         {
453             return true;
454         }
455
456         protected boolean addToContext()
457         {
458             return false;
459         }
460
461         final protected ParsedHTML getParsedHTML(NodeUtils.PreOrderTraversal pot)
462         {
463             return (ParsedHTML) getClosestContext(pot, ParsedHTML.class);
464         }
465
466         final protected Object JavaDoc getClosestContext(NodeUtils.PreOrderTraversal pot, Class JavaDoc aClass)
467         {
468             return pot.getClosestContext(aClass);
469         }
470
471         protected ParsedHTML getRootContext(NodeUtils.PreOrderTraversal pot)
472         {
473             return (ParsedHTML) pot.getRootContext();
474         }
475     }
476
477     static class DefaultElementFactory extends HTMLElementFactory
478     {
479
480         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
481         {
482             if (element.getAttribute("id").equals(""))
483                 return null;
484             return parsedHTML.toDefaultElement(element);
485         }
486
487         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
488         {
489         }
490     }
491
492     private HTMLElement toDefaultElement(Element JavaDoc element)
493     {
494         return new HTMLElementBase(element)
495         {
496
497             protected ScriptableDelegate newScriptable()
498             {
499                 return new HTMLElementScriptable(this);
500             }
501
502             protected ScriptableDelegate getParentDelegate()
503             {
504                 return getResponse().getScriptableObject().getDocument();
505             }
506         };
507     }
508
509     static class WebFormFactory extends HTMLElementFactory
510     {
511
512         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
513         {
514             return parsedHTML.toWebForm(element);
515         }
516
517         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
518         {
519             super.addToLists(pot, htmlElement);
520             getRootContext(pot)._activeForm = (WebForm) htmlElement;
521         }
522     }
523
524     static class WebLinkFactory extends HTMLElementFactory
525     {
526
527         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
528         {
529             return parsedHTML.toLinkAnchor(element);
530         }
531     }
532
533     static class TextBlockFactory extends HTMLElementFactory
534     {
535
536         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
537         {
538             return parsedHTML.toTextBlock(element);
539         }
540
541         protected boolean addToContext()
542         {
543             return true;
544         }
545
546         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
547         {
548             for (Iterator JavaDoc i = pot.getContexts(); i.hasNext();)
549             {
550                 Object JavaDoc o = i.next();
551                 if (!(o instanceof ParsedHTML))
552                     continue;
553                 ((ParsedHTML) o).addToList(htmlElement);
554                 break;
555             }
556         }
557
558     }
559
560     static class ScriptFactory extends HTMLElementFactory
561     {
562
563         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
564         {
565             return null;
566         }
567
568         void recordElement(NodeUtils.PreOrderTraversal pot, Element JavaDoc element, ParsedHTML parsedHTML)
569         {
570             parsedHTML.interpretScriptElement(element);
571         }
572     }
573
574     static class NoScriptFactory extends HTMLElementFactory
575     {
576
577         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
578         {
579             return parsedHTML.toNoscriptElement(element);
580         }
581
582         protected boolean addToContext()
583         {
584             return true;
585         }
586     }
587
588     static class WebFrameFactory extends HTMLElementFactory
589     {
590
591         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
592         {
593             return parsedHTML.toWebFrame(element);
594         }
595     }
596
597     static class WebIFrameFactory extends HTMLElementFactory
598     {
599
600         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
601         {
602             return parsedHTML.toWebIFrame(element);
603         }
604
605         protected boolean isRecognized(ClientProperties properties)
606         {
607             return properties.isIframeSupported();
608         }
609
610         protected boolean addToContext()
611         {
612             return true;
613         }
614     }
615
616     static class WebImageFactory extends HTMLElementFactory
617     {
618
619         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
620         {
621             return parsedHTML.toWebImage(element);
622         }
623     }
624
625     static class WebAppletFactory extends HTMLElementFactory
626     {
627
628         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
629         {
630             return parsedHTML.toWebApplet(element);
631         }
632
633         protected boolean addToContext()
634         {
635             return true;
636         }
637     }
638
639     static class WebTableFactory extends HTMLElementFactory
640     {
641
642         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
643         {
644             return parsedHTML.toWebTable(element);
645         }
646
647         protected boolean addToContext()
648         {
649             return true;
650         }
651
652         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
653         {
654             for (Iterator JavaDoc i = pot.getContexts(); i.hasNext();)
655             {
656                 Object JavaDoc o = i.next();
657                 if (o instanceof ParsedHTML)
658                     ((ParsedHTML) o).addToList(htmlElement);
659                 if (o instanceof TableCell)
660                     break;
661             }
662         }
663     }
664
665     static class TableRowFactory extends HTMLElementFactory
666     {
667
668         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
669         {
670             WebTable wt = getWebTable(pot);
671             if (wt == null)
672                 return null;
673             return wt.newTableRow(element);
674         }
675
676         private WebTable getWebTable(NodeUtils.PreOrderTraversal pot)
677         {
678             return (WebTable) getClosestContext(pot, WebTable.class);
679         }
680
681         protected boolean addToContext()
682         {
683             return true;
684         }
685
686         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
687         {
688             getWebTable(pot).addRow((TableRow) htmlElement);
689         }
690     }
691
692     static class TableCellFactory extends HTMLElementFactory
693     {
694
695         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
696         {
697             TableRow tr = getTableRow(pot);
698             if (tr == null)
699                 return null;
700             return tr.newTableCell(element);
701         }
702
703         private TableRow getTableRow(NodeUtils.PreOrderTraversal pot)
704         {
705             return (TableRow) getClosestContext(pot, TableRow.class);
706         }
707
708         protected boolean addToContext()
709         {
710             return true;
711         }
712
713         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
714         {
715             getTableRow(pot).addTableCell((TableCell) htmlElement);
716         }
717     }
718
719     static class FormControlFactory extends HTMLElementFactory
720     {
721
722         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
723         {
724             final WebForm form = getForm(pot);
725             return form == null ? newControlWithoutForm(parsedHTML, element) : form.newFormControl(element);
726         }
727
728         private HTMLElement newControlWithoutForm(ParsedHTML parsedHTML, Element JavaDoc element)
729         {
730             if ((element.getNodeName().equalsIgnoreCase("button") || element.getNodeName().equalsIgnoreCase("input"))
731                 && isValidNonFormButtonType(NodeUtils.getNodeAttribute(element, "type")))
732             {
733                 return parsedHTML.toButtonWithoutForm(element);
734             }
735             else
736             {
737                 return null;
738             }
739         }
740
741         private boolean isValidNonFormButtonType(String JavaDoc buttonType)
742         {
743             return buttonType.equals("") || buttonType.equalsIgnoreCase("button");
744         }
745
746         private WebForm getForm(NodeUtils.PreOrderTraversal pot)
747         {
748             return getRootContext(pot)._activeForm;
749         }
750
751         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
752         {
753             WebForm form = getForm(pot);
754             if (form != null)
755                 form.addFormControl((FormControl) htmlElement);
756         }
757     }
758
759     static class WebListFactory extends HTMLElementFactory
760     {
761
762         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
763         {
764             return parsedHTML.toOrderedList(element);
765         }
766
767         protected boolean addToContext()
768         {
769             return true;
770         }
771
772         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
773         {
774             TextBlock textBlock = getTextBlock(pot);
775             if (textBlock != null)
776                 textBlock.addList((WebList) htmlElement);
777         }
778
779         private TextBlock getTextBlock(NodeUtils.PreOrderTraversal pot)
780         {
781             return (TextBlock) getClosestContext(pot, TextBlock.class);
782         }
783     }
784
785     static class ListItemFactory extends HTMLElementFactory
786     {
787
788         HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element JavaDoc element)
789         {
790             WebList webList = getWebList(pot);
791             if (webList == null)
792                 return null;
793             return webList.addNewItem(element);
794         }
795
796         private WebList getWebList(NodeUtils.PreOrderTraversal pot)
797         {
798             return (WebList) getClosestContext(pot, WebList.class);
799         }
800
801         protected boolean addToContext()
802         {
803             return true;
804         }
805
806         protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement)
807         {
808         }
809     }
810
811     private static HashMap JavaDoc _htmlFactoryClasses = new HashMap JavaDoc();
812
813     private static HTMLElementFactory _defaultFactory = new DefaultElementFactory();
814
815     static
816     {
817         _htmlFactoryClasses.put("a", new WebLinkFactory());
818         _htmlFactoryClasses.put("area", new WebLinkFactory());
819         _htmlFactoryClasses.put("form", new WebFormFactory());
820         _htmlFactoryClasses.put("img", new WebImageFactory());
821         _htmlFactoryClasses.put("applet", new WebAppletFactory());
822         _htmlFactoryClasses.put("table", new WebTableFactory());
823         _htmlFactoryClasses.put("tr", new TableRowFactory());
824         _htmlFactoryClasses.put("td", new TableCellFactory());
825         _htmlFactoryClasses.put("th", new TableCellFactory());
826         _htmlFactoryClasses.put("frame", new WebFrameFactory());
827         _htmlFactoryClasses.put("iframe", new WebIFrameFactory());
828         _htmlFactoryClasses.put("script", new ScriptFactory());
829         _htmlFactoryClasses.put("noscript", new NoScriptFactory());
830         _htmlFactoryClasses.put("ol", new WebListFactory());
831         _htmlFactoryClasses.put("ul", new WebListFactory());
832         _htmlFactoryClasses.put("li", new ListItemFactory());
833
834         for (int i = 0; i < TEXT_ELEMENTS.length; i++)
835         {
836             _htmlFactoryClasses.put(TEXT_ELEMENTS[i], new TextBlockFactory());
837         }
838
839         for (Iterator JavaDoc i = Arrays.asList(FormControl.getControlElementTags()).iterator(); i.hasNext();)
840         {
841             _htmlFactoryClasses.put(i.next(), new FormControlFactory());
842         }
843     }
844
845     private static HTMLElementFactory getHTMLElementFactory(String JavaDoc tagName)
846     {
847         final HTMLElementFactory factory = (HTMLElementFactory) _htmlFactoryClasses.get(tagName);
848         return factory != null ? factory : _defaultFactory;
849     }
850
851     private void loadElements()
852     {
853         if (!_updateElements)
854             return;
855
856         NodeUtils.NodeAction action = new NodeUtils.NodeAction()
857         {
858
859             public boolean processElement(NodeUtils.PreOrderTraversal pot, Element JavaDoc element)
860             {
861                 HTMLElementFactory factory = getHTMLElementFactory(element.getNodeName().toLowerCase());
862                 if (factory == null || !factory.isRecognized(getClientProperties()))
863                     return true;
864                 if (pot.getClosestContext(ContentConcealer.class) != null)
865                     return true;
866
867                 if (!_elements.containsKey(element))
868                     factory.recordElement(pot, element, ParsedHTML.this);
869                 if (factory.addToContext())
870                     pot.pushContext(_elements.get(element));
871
872                 return true;
873             }
874
875             public void processTextNode(NodeUtils.PreOrderTraversal pot, Node JavaDoc textNode)
876             {
877                 if (textNode.getNodeValue().trim().length() == 0)
878                     return;
879
880                 Node JavaDoc parent = textNode.getParentNode();
881                 if (!parent.getNodeName().equalsIgnoreCase("body"))
882                     return;
883                 if (pot.getClosestContext(ContentConcealer.class) != null)
884                     return;
885                 new HtmlElementRecorder().recordHtmlElement(pot, textNode, newTextBlock(textNode));
886             }
887         };
888         NodeUtils.PreOrderTraversal nt = new NodeUtils.PreOrderTraversal(getRootNode());
889         nt.pushBaseContext(this);
890         nt.perform(action);
891
892         _updateElements = false;
893     }
894
895     private ClientProperties getClientProperties()
896     {
897         WebWindow window = _response.getWindow();
898         return window == null ? ClientProperties.getDefaultProperties() : window.getClient().getClientProperties();
899     }
900
901     private Button toButtonWithoutForm(Element JavaDoc element)
902     {
903         return new Button(_response, element);
904     }
905
906     private WebForm toWebForm(Element JavaDoc element)
907     {
908         return new WebForm(_response, _baseURL, element, _frame, _baseTarget, _characterSet);
909     }
910
911     private WebFrame toWebFrame(Element JavaDoc element)
912     {
913         return new WebFrame(_response, _baseURL, element, _frame);
914     }
915
916     private WebFrame toWebIFrame(Element JavaDoc element)
917     {
918         return new WebIFrame(_baseURL, element, _frame);
919     }
920
921     private WebLink toLinkAnchor(Element JavaDoc child)
922     {
923         return (!isWebLink(child)) ? null : new WebLink(_response, _baseURL, child, _frame, _baseTarget, _characterSet);
924     }
925
926     private boolean isWebLink(Node JavaDoc node)
927     {
928         return (node.getAttributes().getNamedItem("href") != null);
929     }
930
931     private WebImage toWebImage(Element JavaDoc child)
932     {
933         return new WebImage(_response, this, _baseURL, child, _frame, _baseTarget, _characterSet);
934     }
935
936     private WebApplet toWebApplet(Element JavaDoc element)
937     {
938         return new WebApplet(_response, element, _baseTarget);
939     }
940
941     private WebTable toWebTable(Element JavaDoc element)
942     {
943         return new WebTable(_response, _frame, element, _baseURL, _baseTarget, _characterSet);
944     }
945
946     private TextBlock toTextBlock(Element JavaDoc element)
947     {
948         return new TextBlock(_response, _frame, _baseURL, _baseTarget, element, _characterSet);
949     }
950
951     private TextBlock newTextBlock(Node JavaDoc textNode)
952     {
953         return new TextBlock(_response, _frame, _baseURL, _baseTarget, textNode, _characterSet);
954     }
955
956     private WebList toOrderedList(Element JavaDoc element)
957     {
958         return new WebList(_response, _frame, _baseURL, _baseTarget, element, _characterSet);
959     }
960
961     private void addToMaps(Node JavaDoc node, HTMLElement htmlElement)
962     {
963         _elements.put(node, htmlElement);
964         if (htmlElement.getID() != null)
965             _elementsByID.put(htmlElement.getID(), htmlElement);
966         if (htmlElement.getName() != null)
967             addNamedElement(htmlElement.getName(), htmlElement);
968     }
969
970     private void addNamedElement(String JavaDoc name, HTMLElement htmlElement)
971     {
972         List JavaDoc list = (List JavaDoc) _elementsByName.get(name);
973         if (list == null)
974             _elementsByName.put(name, list = new ArrayList JavaDoc());
975         list.add(htmlElement);
976     }
977
978     private void addToList(HTMLElement htmlElement)
979     {
980         ArrayList JavaDoc list = getListForElement(htmlElement);
981         if (list != null)
982             list.add(htmlElement);
983     }
984
985     private ArrayList JavaDoc getListForElement(HTMLElement element)
986     {
987         if (element instanceof WebLink)
988             return _linkList;
989         if (element instanceof WebForm)
990             return _formsList;
991         if (element instanceof WebImage)
992             return _imagesList;
993         if (element instanceof WebApplet)
994             return _appletList;
995         if (element instanceof WebTable)
996             return _tableList;
997         if (element instanceof WebFrame)
998             return _frameList;
999         if (element instanceof BlockElement)
1000            return _blocksList;
1001        return null;
1002    }
1003
1004    /**
1005     * Returns the first link which contains the specified text.
1006     */

1007    public WebLink getLinkWith(String JavaDoc text)
1008    {
1009        return getFirstMatchingLink(WebLink.MATCH_CONTAINED_TEXT, text);
1010    }
1011
1012    /**
1013     * Returns the link which contains the first image with the specified text as its 'alt' attribute.
1014     */

1015    public WebLink getLinkWithImageText(String JavaDoc text)
1016    {
1017        WebImage image = getImageWithAltText(text);
1018        return image == null ? null : image.getLink();
1019    }
1020
1021    /**
1022     * Returns the link found in the page with the specified name.
1023     */

1024    public WebLink getLinkWithName(String JavaDoc name)
1025    {
1026        return getFirstMatchingLink(WebLink.MATCH_NAME, name);
1027    }
1028
1029    /**
1030     * Returns the first link found in the page matching the specified criteria.
1031     */

1032    public WebLink getFirstMatchingLink(HTMLElementPredicate predicate, Object JavaDoc criteria)
1033    {
1034        WebLink[] links = getLinks();
1035        for (int i = 0; i < links.length; i++)
1036        {
1037            if (predicate.matchesCriteria(links[i], criteria))
1038                return links[i];
1039        }
1040        return null;
1041    }
1042
1043    /**
1044     * Returns all links found in the page matching the specified criteria.
1045     */

1046    public WebLink[] getMatchingLinks(HTMLElementPredicate predicate, Object JavaDoc criteria)
1047    {
1048        ArrayList JavaDoc matches = new ArrayList JavaDoc();
1049        WebLink[] links = getLinks();
1050        for (int i = 0; i < links.length; i++)
1051        {
1052            if (predicate.matchesCriteria(links[i], criteria))
1053                matches.add(links[i]);
1054        }
1055        return (WebLink[]) matches.toArray(new WebLink[matches.size()]);
1056    }
1057
1058    /**
1059     * Returns the image found in the page with the specified name.
1060     */

1061    public WebImage getImageWithName(String JavaDoc name)
1062    {
1063        WebImage[] images = getImages();
1064        for (int i = 0; i < images.length; i++)
1065        {
1066            if (HttpUnitUtils.matches(name, images[i].getName()))
1067                return images[i];
1068        }
1069        return null;
1070    }
1071
1072    /**
1073     * Returns the first image found in the page with the specified src attribute.
1074     */

1075    public WebImage getImageWithSource(String JavaDoc source)
1076    {
1077        WebImage[] images = getImages();
1078        for (int i = 0; i < images.length; i++)
1079        {
1080            if (HttpUnitUtils.matches(source, images[i].getSource()))
1081                return images[i];
1082        }
1083        return null;
1084    }
1085
1086    /**
1087     * Returns the first image found in the page with the specified alt attribute.
1088     */

1089    public WebImage getImageWithAltText(String JavaDoc altText)
1090    {
1091        WebImage[] images = getImages();
1092        for (int i = 0; i < images.length; i++)
1093        {
1094            if (HttpUnitUtils.matches(altText, images[i].getAltText()))
1095                return images[i];
1096        }
1097        return null;
1098    }
1099
1100    /**
1101     * Returns the first table in the response which matches the specified predicate and value. Will recurse into any
1102     * nested tables, as needed.
1103     * @return the selected table, or null if none is found
1104     */

1105    public WebTable getFirstMatchingTable(HTMLElementPredicate predicate, Object JavaDoc criteria)
1106    {
1107        return getTableSatisfyingPredicate(getTables(), predicate, criteria);
1108    }
1109
1110    /**
1111     * Returns the tables in the response which match the specified predicate and value. Will recurse into any nested
1112     * tables, as needed.
1113     * @return the selected tables, or null if none are found
1114     */

1115    public WebTable[] getMatchingTables(HTMLElementPredicate predicate, Object JavaDoc criteria)
1116    {
1117        return getTablesSatisfyingPredicate(getTables(), predicate, criteria);
1118    }
1119
1120    /**
1121     * Returns the first table in the response which has the specified text as the full text of its first non-blank row
1122     * and non-blank column. Will recurse into any nested tables, as needed.
1123     * @return the selected table, or null if none is found
1124     */

1125    public WebTable getTableStartingWith(String JavaDoc text)
1126    {
1127        return getFirstMatchingTable(WebTable.MATCH_FIRST_NONBLANK_CELL, text);
1128    }
1129
1130    /**
1131     * Returns the first table in the response which has the specified text as a prefix of the text in its first
1132     * non-blank row and non-blank column. Will recurse into any nested tables, as needed.
1133     * @return the selected table, or null if none is found
1134     */

1135    public WebTable getTableStartingWithPrefix(String JavaDoc text)
1136    {
1137        return getFirstMatchingTable(WebTable.MATCH_FIRST_NONBLANK_CELL_PREFIX, text);
1138    }
1139
1140    /**
1141     * Returns the first table in the response which has the specified text as its summary attribute. Will recurse into
1142     * any nested tables, as needed.
1143     * @return the selected table, or null if none is found
1144     */

1145    public WebTable getTableWithSummary(String JavaDoc summary)
1146    {
1147        return getFirstMatchingTable(WebTable.MATCH_SUMMARY, summary);
1148    }
1149
1150    /**
1151     * Returns the first table in the response which has the specified text as its ID attribute. Will recurse into any
1152     * nested tables, as needed.
1153     * @return the selected table, or null if none is found
1154     */

1155    public WebTable getTableWithID(String JavaDoc ID)
1156    {
1157        return getFirstMatchingTable(WebTable.MATCH_ID, ID);
1158    }
1159
1160    /**
1161     * Returns a copy of the domain object model associated with this page.
1162     */

1163    public Node JavaDoc getDOM()
1164    {
1165        return getRootNode().cloneNode( /* deep */true);
1166    }
1167
1168    // ---------------------------------- Object methods --------------------------------
1169

1170    public String JavaDoc toString()
1171    {
1172        return _baseURL.toExternalForm() + System.getProperty("line.separator") + _rootNode;
1173    }
1174
1175    // ---------------------------------- package members --------------------------------
1176

1177    /**
1178     * Specifies the root node for this HTML fragment.
1179     */

1180    void setRootNode(Node JavaDoc rootNode)
1181    {
1182        if (_rootNode != null && rootNode != _rootNode)
1183            throw new IllegalStateException JavaDoc("The root node has already been defined as "
1184                + _rootNode
1185                + " and cannot be redefined as "
1186                + rootNode);
1187        _rootNode = rootNode;
1188        _links = null;
1189        _forms = null;
1190        _images = null;
1191        _applets = null;
1192        _tables = null;
1193        _frames = null;
1194        _blocks = null;
1195        _updateElements = true;
1196    }
1197
1198    /**
1199     * Returns the base URL for this HTML segment.
1200     */

1201    URL JavaDoc getBaseURL()
1202    {
1203        return _baseURL;
1204    }
1205
1206    WebResponse getResponse()
1207    {
1208        return _response;
1209    }
1210
1211    /**
1212     * Returns the domain object model associated with this page, to be used internally.
1213     */

1214    Node JavaDoc getOriginalDOM()
1215    {
1216        return getRootNode();
1217    }
1218
1219    /**
1220     * Returns the frames found in the page in the order in which they appear.
1221     */

1222    public WebFrame[] getFrames()
1223    {
1224        if (_frames == null)
1225        {
1226            loadElements();
1227            _frames = (WebFrame[]) _frameList.toArray(new WebFrame[_frameList.size()]);
1228        }
1229        return _frames;
1230    }
1231
1232    // ---------------------------------- private members --------------------------------
1233

1234    Node JavaDoc getRootNode()
1235    {
1236        if (_rootNode == null)
1237            throw new IllegalStateException JavaDoc("The root node has not been specified");
1238        return _rootNode;
1239    }
1240
1241    /**
1242     * Returns the table with the specified text in its summary attribute.
1243     */

1244    private WebTable getTableSatisfyingPredicate(WebTable[] tables, HTMLElementPredicate predicate, Object JavaDoc value)
1245    {
1246        for (int i = 0; i < tables.length; i++)
1247        {
1248            if (predicate.matchesCriteria(tables[i], value))
1249            {
1250                return tables[i];
1251            }
1252            else
1253            {
1254                for (int j = 0; j < tables[i].getRowCount(); j++)
1255                {
1256                    for (int k = 0; k < tables[i].getColumnCount(); k++)
1257                    {
1258                        TableCell cell = tables[i].getTableCell(j, k);
1259                        if (cell != null)
1260                        {
1261                            WebTable[] innerTables = cell.getTables();
1262                            if (innerTables.length != 0)
1263                            {
1264                                WebTable result = getTableSatisfyingPredicate(innerTables, predicate, value);
1265                                if (result != null)
1266                                    return result;
1267                            }
1268                        }
1269                    }
1270                }
1271            }
1272        }
1273        return null;
1274    }
1275
1276    /**
1277     * Returns the tables which match the specified criteria.
1278     */

1279    private WebTable[] getTablesSatisfyingPredicate(WebTable[] tables, HTMLElementPredicate predicate, Object JavaDoc value)
1280    {
1281        ArrayList JavaDoc matches = new ArrayList JavaDoc();
1282        for (int i = 0; i < tables.length; i++)
1283        {
1284            if (predicate.matchesCriteria(tables[i], value))
1285            {
1286                matches.add(tables[i]);
1287            }
1288            for (int j = 0; j < tables[i].getRowCount(); j++)
1289            {
1290                for (int k = 0; k < tables[i].getColumnCount(); k++)
1291                {
1292                    TableCell cell = tables[i].getTableCell(j, k);
1293                    if (cell != null)
1294                    {
1295                        WebTable[] innerTables = cell.getTables();
1296                        if (innerTables.length != 0)
1297                        {
1298                            WebTable[] result = getTablesSatisfyingPredicate(innerTables, predicate, value);
1299                            if (result != null && result.length > 0)
1300                            {
1301                                for (int l = 0; l < result.length; l++)
1302                                {
1303                                    matches.add(result[l]);
1304                                }
1305                            }
1306                        }
1307                    }
1308                }
1309            }
1310        }
1311        if (matches.size() > 0)
1312        {
1313            return (WebTable[]) matches.toArray(new WebTable[matches.size()]);
1314        }
1315        else
1316        {
1317            return null;
1318        }
1319    }
1320
1321    class WebIFrame extends WebFrame implements ContentConcealer
1322    {
1323
1324        public WebIFrame(URL JavaDoc baseURL, Node JavaDoc frameNode, FrameSelector parentFrame)
1325        {
1326            super(_response, baseURL, frameNode, parentFrame);
1327        }
1328    }
1329
1330    class NoScriptElement extends HTMLElementBase implements ContentConcealer
1331    {
1332
1333        public NoScriptElement(Node JavaDoc node)
1334        {
1335            super(node);
1336        }
1337
1338        protected ScriptableDelegate newScriptable()
1339        {
1340            return null;
1341        }
1342
1343        protected ScriptableDelegate getParentDelegate()
1344        {
1345            return null;
1346        }
1347    }
1348
1349}
1350
Popular Tags