KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > clipbuilder > html > web > html > Impl > HTMLParser > TransformBuilderVisitor


1 package org.jahia.clipbuilder.html.web.html.Impl.HTMLParser;
2
3 import java.util.*;
4
5 import javax.swing.text.html.*;
6
7 import org.jahia.clipbuilder.html.util.*;
8 import org.jahia.clipbuilder.html.web.Constant.*;
9 import org.htmlparser.*;
10 import org.htmlparser.Tag;
11 import org.htmlparser.nodes.*;
12 import org.htmlparser.tags.*;
13 import org.htmlparser.util.*;
14 import org.htmlparser.visitors.*;
15 import org.jahia.clipbuilder.html.struts.Util.Constants;
16 import org.htmlparser.scanners.*;
17 /**
18  * Description of the Class
19  *
20  *@author Tlili Khaled
21  */

22 class TransformBuilderVisitor extends NodeVisitor {
23     /**
24      * Description of the Field
25      */

26     protected int nbLinks = 0;
27     /**
28      * Description of the Field
29      */

30     protected String JavaDoc formParentName;
31     /**
32      * Description of the Field
33      */

34     protected String JavaDoc formParentId;
35     /**
36      * Description of the Field
37      */

38     protected int formParentPosition = -1;
39
40     /**
41      * Description of the Field
42      */

43     protected List framesList = new ArrayList();
44     /**
45      * Description of the Field
46      */

47     private Node firstNode;
48     private HTMLParserTransformer transformer;
49
50     /**
51      * Description of the Field
52      */

53     public static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(TransformBuilderVisitor.class);
54
55
56     /**
57      * Constructor for the TransformBuilderVisitor object
58      *
59      *@param transformer Description of Parameter
60      */

61     public TransformBuilderVisitor(HTMLParserTransformer transformer) {
62         this.transformer = transformer;
63         ScriptScanner.STRICT = false;
64     }
65
66
67     /**
68      * Sets the FirstNode attribute of the TransformBuilderVisitor object
69      *
70      *@param firstNode The new FirstNode value
71      */

72     public void setFirstNode(Node firstNode) {
73         this.firstNode = firstNode;
74     }
75
76
77     /**
78      * Gets the FirstNode attribute of the TransformBuilderVisitor object
79      *
80      *@return The FirstNode value
81      */

82     public Node getFirstNode() {
83         return firstNode;
84     }
85
86
87     /**
88      * Description of the Method
89      *
90      *@param tag Description of Parameter
91      */

92     public void visitTag(Tag tag) {
93         if (tag instanceof Html) {
94             setFirstNode(tag);
95
96         }
97
98         if (tag instanceof BaseHrefTag) {
99             processBaseHrefTag((BaseHrefTag) tag);
100         }
101         // remove target tag
102
processTargetAtt(tag);
103
104         // process style tag
105
processStyleTag(tag);
106
107         //refactor javascript that appear in an event attribute
108
if (transformer.isRemoveBodyScript() || transformer.isRemoveHeadScriptTag()) {
109             processEventHandlerAtt(tag);
110         }
111         // process ele whith attribut href
112
if (tag.getAttribute("href") != null) {
113             try {
114                 nbLinks = processHrefAttribute(tag, nbLinks, Constants.WEB_BROWSER_SHOW_BROWSE);
115                 logger.debug("Nb link = " + nbLinks);
116             }
117             catch (WebClippingException ex) {
118                 ex.printStackTrace();
119                 transformer.addParsingErrors(ex.getMessage());
120             }
121         }
122
123         // process ele whith attribut src
124
if (tag.getAttribute("src") != null) {
125             try {
126                 processSrcAttribute(tag);
127             }
128             catch (WebClippingException ex) {
129                 ex.printStackTrace();
130
131                 transformer.addParsingErrors(ex.getMessage());
132             }
133         }
134
135         //process frame tag
136
if (tag instanceof FrameTag) {
137             processFrameTag((FrameTag) tag);
138         }
139
140         if (tag instanceof MetaTag) {
141             if (transformer.isRemoveMetaTag()) {
142                 tag.removeAttribute(HTML.Attribute.HTTPEQUIV.toString());
143             }
144             try {
145                 processMetaTag((MetaTag) tag, Constants.WEB_BROWSER_SHOW_BROWSE);
146             }
147             catch (WebClippingException ex) {
148                 ex.printStackTrace();
149
150                 transformer.addParsingErrors(ex.getMessage());
151             }
152         }
153
154         // process body tag
155
if (tag instanceof BodyTag) {
156             /*
157              * try {
158              * processBodyTag((BodyTag) tag);
159              * }
160              * catch (WebClippingException ex) {
161              * ex.printStackTrace();
162              * transformer.addParsingErrors(ex.getMessage());
163              * }
164              */

165         }
166         //process head tag
167
if (tag instanceof HeadTag) {
168             //node = processHeadTag((HeadTag) tag);
169
}
170
171         // process title tag
172
else if (tag instanceof TitleTag) {
173             //node = processTitleTag((TitleTag) tag);
174
}
175
176         // process ele whith attribut action
177
else if (tag instanceof FormTag) {
178             try {
179                 processFormTag((FormTag) tag, Constants.WEB_BROWSER_SHOW_BROWSE);
180
181             }
182             catch (WebClippingException ex) {
183                 ex.printStackTrace();
184                 transformer.addParsingErrors(ex.getMessage());
185             }
186         }
187
188         //process input element
189
else if (tag instanceof InputTag) {
190             try {
191                 processInputTag((InputTag) tag);
192             }
193             catch (WebClippingException ex) {
194                 ex.printStackTrace();
195                 transformer.addParsingErrors(ex.getMessage());
196             }
197         }
198         else if (tag instanceof SelectTag) {
199             processSelectTag((SelectTag) tag);
200         }
201
202         // inline css
203
else if (tag instanceof StyleTag) {
204             /*
205              * try {
206              * processStyleTag((StyleTag) tag);
207              * }
208              * catch (WebClippingException ex) {
209              * ex.printStackTrace();
210              * transformer.addParsingErrors(ex.getMessage());
211              * }
212              */

213         }
214
215         // extract javascript
216
else if (tag instanceof ScriptTag) {
217             logger.debug("[ Process script tag ]");
218             if (transformer.isRemoveBodyScript() || transformer.isRemoveHeadScriptTag()) {
219                 tag.setChildren(new NodeList());
220                 tag.removeAttribute("src");
221             }
222             try {
223                 processScriptTag((ScriptTag) tag);
224             }
225             catch (WebClippingException ex) {
226                 transformer.addParsingErrors(ex.getMessage());
227                 logger.error("Rewrite javascript error");
228                 ex.printStackTrace();
229             }
230         }
231
232     }
233
234
235
236     /**
237      * Gets the Transformer attribute of the TransformBuilderVisitor object
238      *
239      *@return The Transformer value
240      */

241     protected HTMLParserTransformer getTransformer() {
242         return transformer;
243     }
244
245
246     /**
247      * Description of the Method
248      *
249      *@param tag Description of Parameter
250      */

251     protected void processTargetAtt(Tag tag) {
252         tag.removeAttribute("target");
253     }
254
255
256
257
258     /**
259      * Description of the Method
260      *
261      *@param tag Description of Parameter
262      */

263     protected void processBaseHrefTag(BaseHrefTag tag) {
264         tag.removeAttribute("href");
265     }
266
267
268     /**
269      * Description of the Method
270      *
271      *@param tag Description of Parameter
272      */

273     protected void processFrameTag(FrameTag tag) {
274         String JavaDoc name = tag.getAttribute("name");
275         String JavaDoc id = tag.getAttribute("id");
276         String JavaDoc src = tag.getAttribute("src");
277         transformer.addFrame(framesList, name, id, src);
278         transformer.getDocument().setFramesList(framesList);
279     }
280
281
282
283     /**
284      * Description of the Method
285      *
286      *@param tag Description of Parameter
287      *@exception WebClippingException Description of Exception
288      */

289     protected void processLinkTag(Tag tag) throws WebClippingException {
290         if (transformer.isEnableCSS()) {
291             try {
292                 String JavaDoc media = tag.getAttribute("media");
293                 //get the content of the css
294
String JavaDoc href = tag.getAttribute("href");
295                 // link --> Style
296
tag = new StyleTag();
297                 StyleTag endTag = new StyleTag();
298                 endTag.setTagName("/style");
299                 tag.setEndTag(endTag);
300
301                 // add new content
302
String JavaDoc content = "<!--" + transformer.getImportCssContent(href, media) + "-->";
303                 TextNode t = new TextNode(content);
304                 NodeList children = new NodeList();
305                 children.add(t);
306                 tag.setChildren(children);
307
308                 // remove content and attribute
309
//tag.removeAttribute("href");
310

311
312             }
313             catch (ParserException ex) {
314                 ex.printStackTrace();
315                 throw new WebClippingException("Link", ex);
316             }
317
318             catch (Exception JavaDoc ex) {
319                 ex.printStackTrace();
320                 throw new WebClippingException("Link", ex);
321             }
322             finally {
323                 return;
324             }
325
326         }
327         // dont't process link
328
else {
329             logger.debug("[ CSS removed ]");
330             return;
331         }
332
333     }
334
335
336     /**
337      * Description of the Method
338      *
339      *@param tag Description of Parameter
340      *@exception WebClippingException Description of Exception
341      */

342     protected void processStyleTag(StyleTag tag) throws WebClippingException {
343
344         try {
345
346             // add new content
347
String JavaDoc content = transformer.getInlineCssContent(tag.toPlainTextString());
348
349             //get the content of the css
350
logger.debug("[ Process Inline css ]");
351             Hashtable atts = tag.getAttributes();
352             tag = new StyleTag();
353             tag.setAttributes(atts);
354             StyleTag endTag = new StyleTag();
355             endTag.setTagName("/style");
356             tag.setEndTag(endTag);
357
358             TextNode t = new TextNode("<!--" + content + "-->");
359             NodeList children = new NodeList();
360             children.add(t);
361             tag.setChildren(children);
362
363         }
364         catch (WebClippingException ex) {
365             throw ex;
366         }
367         catch (Exception JavaDoc ex) {
368             throw new WebClippingException("Style", ex);
369         }
370         finally {
371             return;
372         }
373
374     }
375
376
377
378     /**
379      * Description of the Method
380      *
381      *@param tag Description of Parameter
382      *@param webBrowserAction Description of Parameter
383      *@exception WebClippingException Description of Exception
384      */

385     protected void processFormTag(FormTag tag, String JavaDoc webBrowserAction) throws WebClippingException {
386
387         try {
388             //update positon value
389
formParentPosition++;
390
391             // clone the tag because some children will be added
392
tag = (FormTag) tag.clone();
393             // encode the url
394
String JavaDoc url = tag.getFormLocation();
395             formParentName = tag.getFormName();
396             formParentId = tag.getAttribute("id");
397
398             if (formParentId == null) {
399                 formParentId = "";
400             }
401             if (formParentName == null) {
402                 formParentName = "";
403             }
404
405             String JavaDoc method = tag.getFormMethod();
406             if (method == null) {
407                 method = "GET";
408             }
409             String JavaDoc enctype = tag.getAttribute("enctype");
410
411             tag.setFormLocation(transformer.getRewritedActionValue(url, method, formParentName, formParentId, enctype, formParentPosition, webBrowserAction));
412
413             // set method to POST
414
tag.setAttribute("method", "post");
415
416             // remove the att that may open a new window
417
tag.removeAttribute("target");
418             // remove att that is specific to the server that send the data
419
tag.removeAttribute("enctype");
420
421             transformer.resetInputHash();
422         }
423         catch (Exception JavaDoc ex) {
424             throw new WebClippingException("Form tag", ex);
425         }
426         finally {
427             return;
428         }
429
430     }
431
432
433
434     /**
435      * Description of the Method
436      *
437      *@param tag Description of Parameter
438      *@exception WebClippingException Description of Exception
439      */

440     protected void processHtmlTag(Html tag) throws WebClippingException {
441         try {
442             tag.setTagName(HTML.Tag.DIV.toString());
443             Div end = new Div();
444             end.setTagName("/div");
445             tag.setEndTag(end);
446             String JavaDoc classValue = CssUtilities.HTML_CLASS;
447             tag.setAttribute(HTML.Attribute.CLASS.toString(), classValue);
448
449         }
450         catch (Exception JavaDoc ex) {
451             throw new WebClippingException("Html tag", ex);
452         }
453         finally {
454             return;
455         }
456
457     }
458
459
460     /**
461      * Description of the Method
462      *
463      *@param tag Description of Parameter
464      *@exception WebClippingException Description of Exception
465      */

466     protected void processTitleTag(TitleTag tag) throws WebClippingException {
467         try {
468             /*
469              * tag.setTagName(HTML.Tag.DIV.toString());
470              * Div end = new Div();
471              * end.setTagName("/div");
472              * tag.setEndTag(end);
473              */

474         }
475         catch (Exception JavaDoc ex) {
476             throw new WebClippingException("Title tag", ex);
477         }
478         finally {
479             return;
480         }
481     }
482
483
484     /**
485      * Description of the Method
486      */

487     /*
488      * protected void processHeadTag(HeadTag tag) throws WebClippingException {
489      * try {
490      * tag.setTagName(HTML.Tag.DIV.toString());
491      * Div end = new Div();
492      * end.setTagName("/div");
493      * tag.setEndTag(end);
494      * }
495      * catch (Exception ex) {
496      * throw new WebClippingException("Head tag", ex);
497      * }
498      * finally {
499      * return;
500      * }
501      * }
502      */

503     /**
504      * Description of the Method
505      *
506      *@param tag Description of Parameter
507      *@param position Description of Parameter
508      *@param webBrowserAction WebBrowser action to call when the anchor
509      * is clicked
510      *@return postion of the last link tag in the
511      * document
512      *@exception WebClippingException Description of Exception
513      */

514     protected int processHrefAttribute(Tag tag, int position, String JavaDoc webBrowserAction) throws WebClippingException {
515         logger.debug("[ process href Attribute ]");
516         try {
517             String JavaDoc href = tag.getAttribute("href");
518             logger.debug("Found href value: " + href);
519             // set the id of the element
520
//href = URLUtilities.getHrefAbsoluteValue(getTransformer().getUrlBean().getRedirectUrl().toExternalForm(), href);
521
String JavaDoc id = tag.getAttribute("id");
522             String JavaDoc pos = "" + position;
523             if (tag.getTagName().equalsIgnoreCase("a")) {
524                 if (id == null) {
525                     tag.setAttribute("id", HashUtilities.buildLinkHash(href, id, pos));
526                 }
527                 //remove target att
528
tag.removeAttribute("target");
529                 position++;
530             }
531
532             // encode the href
533

534             href = transformer.getRewritedHrefValue(tag.getTagName(), href, id, pos, webBrowserAction);
535             tag.setAttribute("href", href);
536
537             logger.debug("Href rewrited");
538         }
539         catch (Exception JavaDoc ex) {
540             logger.error("Error has occured whith href");
541             ex.printStackTrace();
542             throw new WebClippingException("Href attribute", ex);
543         }
544         finally {
545             return position;
546         }
547
548     }
549
550
551     /**
552      * Description of the Method
553      *
554      *@param tag Description of Parameter
555      *@exception WebClippingException Description of Exception
556      */

557     protected void processSrcAttribute(Tag tag) throws WebClippingException {
558         logger.debug("[ Process scr attribute ]");
559         try {
560
561             String JavaDoc src = tag.getAttribute("src");
562             logger.debug("Found src: " + src);
563             tag.setAttribute("src", transformer.relatifToAbsolute(src));
564         }
565         catch (Exception JavaDoc ex) {
566             throw new WebClippingException("Src Attribute", ex);
567         }
568         finally {
569             return;
570         }
571
572     }
573
574
575     /**
576      * Description of the Method
577      *
578      *@param tag Description of Parameter
579      *@exception WebClippingException Description of Exception
580      */

581     protected void processScriptTag(ScriptTag tag) throws WebClippingException {
582         try {
583             // imported scrip
584
String JavaDoc src = tag.getAttribute(HTML.Attribute.SRC.toString());
585             if (src != null && !src.equals("")) {
586                 // get content
587
String JavaDoc content = transformer.getImportJavascriptContent(src, Constants.WEB_BROWSER_SHOW_BROWSE);
588                 //tag.setAttribute("src", transformer.relatifToAbsolute(src));
589

590                 // add it as comment
591

592                 /*
593                  * TextNode t = new TextNode(content);
594                  * NodeList children = new NodeList();
595                  * children.add(t);
596                  * tag.setChildren(children);
597                  * tag.removeAttribute(HTML.Attribute.SRC.toString());
598                  */

599             }
600             else {
601                 //inline javascript
602

603                 String JavaDoc content = tag.getScriptCode();
604                 if (content != null) {
605                     Hashtable atts = tag.getAttributes();
606                     tag = new ScriptTag();
607                     tag.setAttributes(atts);
608                     StyleTag endTag = new StyleTag();
609                     endTag.setTagName("/script");
610                     tag.setEndTag(endTag);
611                     String JavaDoc baseUrl = transformer.getUrlBean().getAbsoluteUrlValue();
612                     String JavaDoc refactoredJavascript = transformer.getRefactoredJavascript(baseUrl, Constants.WEB_BROWSER_SHOW_BROWSE, content);
613                     refactoredJavascript = "";
614                     TextNode t = new TextNode(refactoredJavascript);
615                     NodeList children = new NodeList();
616                     children.add(t);
617                     tag.setChildren(children);
618                 }
619
620             }
621
622         }
623         catch (Exception JavaDoc ex) {
624             throw new WebClippingException("Script", ex);
625         }
626         finally {
627             return;
628         }
629
630     }
631
632
633     /**
634      * Description of the Method
635      *
636      *@param tag Description of Parameter
637      *@param webBrowserAction Description of Parameter
638      *@exception WebClippingException Description of Exception
639      */

640     protected void processMetaTag(MetaTag tag, String JavaDoc webBrowserAction) throws WebClippingException {
641         logger.debug("[process meta tag]");
642         try {
643             //Get the http-equiv value
644
String JavaDoc httpEquiv = tag.getAttribute(HTML.Attribute.HTTPEQUIV.toString());
645             if (httpEquiv != null) {
646                 //process the refresh
647
if ("refresh".equals(httpEquiv.toLowerCase())) {
648                     String JavaDoc content = tag.getAttribute(HTML.Attribute.CONTENT.toString());
649                     //Get the url of the refresh
650
int urlIndex = content.toLowerCase().indexOf("url");
651                     if (urlIndex != -1) {
652                         //Get the content just before the url
653
String JavaDoc contentBeforeURL = content.substring(0, urlIndex).trim();
654                         logger.debug("[ Content before url " + contentBeforeURL + " ]");
655
656                         //Get the url
657
int equalIndex = urlIndex + 3;
658                         String JavaDoc contentURL = content.substring(equalIndex + 1).trim();
659                         logger.debug("[ BEFORE meta http-equiv=[" + httpEquiv + "] content=[" + content + "] ]");
660                         logger.debug("[ Refresh url is [" + contentURL + " ] ]");
661                         //Encode the url
662
String JavaDoc encodedUrl = transformer.getRewritedHrefValue("meta", contentURL, "metaId", "metaLink", webBrowserAction);
663
664                         //Set the new value whith the encoded url
665
tag.setAttribute(HTML.Attribute.CONTENT.toString(), contentBeforeURL + "url=" + encodedUrl);
666                         logger.debug("[ AFTER meta http-equiv=[" + httpEquiv + "] content=[" + contentBeforeURL + "url=" + encodedUrl + "] ]+");
667                     }
668                 }
669             }
670
671
672         }
673         catch (Exception JavaDoc ex) {
674             ex.printStackTrace();
675             throw new WebClippingException("Meta", ex);
676         }
677         finally {
678             return;
679         }
680
681     }
682
683
684     /**
685      * Description of the Method
686      *
687      *@param tag Description of Parameter
688      */

689     private void processStyleTag(Tag tag) {
690         // process style tag
691
String JavaDoc name = tag.getTagName();
692         logger.debug(" Tag name: " + name);
693         try {
694             String JavaDoc style = tag.getAttribute("style");
695             if (style != null && !style.equalsIgnoreCase("")) {
696                 style = transformer.refactorCssStyleRule(style);
697                 tag.setAttribute("style", style);
698             }
699         }
700         catch (Exception JavaDoc ex) {
701             ex.printStackTrace();
702             logger.error("Rewrite style att error");
703
704         }
705     }
706
707
708     /**
709      * Description of the Method
710      *
711      *@param tag Description of Parameter
712      */

713     private void processEventHandlerAtt(Tag tag) {
714         for (int i = 0; i < WebConstants.JAVASCRIPT_EVENT_NAMES_ARRAY.length; i++) {
715             String JavaDoc name = WebConstants.JAVASCRIPT_EVENT_NAMES_ARRAY[i];
716             String JavaDoc value = tag.getAttribute(name);
717             if (value != null) {
718                 boolean refactor = transformer.isRefactorJavascriptEvent();
719                 if (refactor) {
720                     String JavaDoc url = transformer.getDocument().getUrlBean().getRedirectUrl().toExternalForm();
721                     String JavaDoc newValue = transformer.getRefactoredJavascript(url, Constants.WEB_BROWSER_SHOW_BROWSE, value);
722                     tag.setAttribute(name, newValue);
723                 }
724                 else {
725                     boolean remove = transformer.isRemoveJavascriptEvent();
726                     if (remove) {
727                         tag.removeAttribute(name);
728                     }
729                 }
730             }
731         }
732     }
733
734
735     /**
736      * Description of the Method
737      *
738      *@param tag Description of Parameter
739      */

740     private void processSelectTag(SelectTag tag) {
741         String JavaDoc name = tag.getAttribute("name");
742         if (name == null) {
743             name = "";
744         }
745         String JavaDoc type = WebConstants.TYPE_SELECT;
746         int paramPosition = transformer.getPosAndUpdateInputHash(name);
747         String JavaDoc visibility = "true";
748         NodeList it = tag.getChildren();
749         if (it != null) {
750             for (int i = 0; i < it.size(); i++) {
751                 Node childNode = it.elementAt(i);
752                 if (childNode instanceof OptionTag) {
753                     OptionTag op = (OptionTag) childNode;
754                     String JavaDoc possibleValue = op.getValue();
755
756                     try {
757                         transformer.recordFormParam(type,
758                                 formParentName, formParentId,
759                                 formParentPosition, name, possibleValue,
760                                 visibility, paramPosition);
761                     }
762                     catch (Exception JavaDoc ex) {
763                         ex.printStackTrace();
764                     }
765                 }
766             }
767         }
768     }
769
770
771     /**
772      * Description of the Method
773      *
774      *@param tag Description of Parameter
775      *@exception WebClippingException Description of Exception
776      */

777     private void processInputTag(InputTag tag) throws WebClippingException {
778         try {
779
780             String JavaDoc type = tag.getAttribute(HTML.Attribute.TYPE.toString());
781             //logger.debug("[Input element found whith type: " + type + " ]");
782
String JavaDoc visibility = Boolean.TRUE.toString();
783             //Get the properties of the param
784
String JavaDoc name = transformer.notNullValueForType(tag.getAttribute(HTML.Attribute.NAME.toString()));
785             int paramPosition = transformer.getPosAndUpdateInputHash(name);
786             String JavaDoc possibleValue = transformer.notNullValueForType(tag.getAttribute(HTML.Attribute.VALUE.toString()));
787             type = transformer.notNullValueForType(type);
788             transformer.recordFormParam(type, formParentName, formParentId, formParentPosition, name, possibleValue, visibility, paramPosition);
789
790             // update position value
791
paramPosition++;
792         }
793         catch (Exception JavaDoc ex) {
794             throw new WebClippingException("Parser, Input Element", ex);
795         }
796         finally {
797             return;
798         }
799
800     }
801
802 }
803
Popular Tags