KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > clipbuilder > html > web > html > Impl > ExtractorFilter > StringTreeExtractorFilter


1 package org.jahia.clipbuilder.html.web.html.Impl.ExtractorFilter;
2
3 import java.util.*;
4
5 import javax.swing.text.html.*;
6
7 import org.jahia.clipbuilder.html.util.*;
8 import org.jahia.clipbuilder.html.web.Constant.*;
9 import org.jahia.clipbuilder.html.web.html.*;
10 import org.jahia.clipbuilder.html.web.html.HTMLDocument;
11 import org.htmlparser.*;
12 import org.htmlparser.Parser;
13 import org.htmlparser.Tag;
14 import org.htmlparser.tags.*;
15 import org.htmlparser.util.*;
16 import org.htmlparser.visitors.*;
17 import org.jahia.clipbuilder.html.web.html.Impl.ExtractorFilter.util.*;
18
19 /**
20  * String implementation of HTMLFilter
21  *
22  *@author Tlili Khaled
23  */

24 public class StringTreeExtractorFilter extends AbsctractExtractoreFilter {
25     /**
26      * Description of the Method
27      *
28      *@param ele Description of Parameter
29      *@return Description of the Returned Value
30      */

31     private String JavaDoc selectedPart = "<p> Can't retrieve the selected part </p>";
32     private final String JavaDoc KEY_SELECTED_PART = "keyPart";
33     private Hashtable stringTagHash = new Hashtable();
34     private final String JavaDoc NAME = "StringTreeExtractorFilter";
35     private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(StringTreeExtractorFilter.class);
36
37
38     /**
39      * Constructor for the JDomHTMLFilter object
40      */

41     public StringTreeExtractorFilter() {
42         super("StringTreeExtractorFilter");
43     }
44
45
46
47     /**
48      * Constructor for the StringTreeFilter object
49      *
50      *@param originalSelectedPart Description of Parameter
51      */

52     public StringTreeExtractorFilter(String JavaDoc originalSelectedPart) {
53         this();
54         buildKeySelectedPart(originalSelectedPart);
55     }
56
57
58     /**
59      * Constructor for the StringTreeHTMLFilter object
60      *
61      *@param doc Description of Parameter
62      *@param originalSelectedPart Description of Parameter
63      */

64     public StringTreeExtractorFilter(HTMLDocument doc, String JavaDoc originalSelectedPart) {
65         this(originalSelectedPart);
66         buildKeySelectedPart(originalSelectedPart);
67     }
68
69
70
71     /**
72      * Sets the StringTagHash attribute of the StringTreeHTMLFilter object
73      *
74      *@param stringTagHash The new StringTagHash value
75      */

76     public void setStringTagHash(Hashtable stringTagHash) {
77         this.stringTagHash = stringTagHash;
78     }
79
80
81     /**
82      * Sets the SelectedPart attribute of the StringTreeHTMLFilter object
83      *
84      *@param selectedPart The new SelectedPart value
85      */

86     public void setSelectedPart(String JavaDoc selectedPart) {
87         this.selectedPart = selectedPart;
88     }
89
90
91     /**
92      * Gets the SelectedPart attribute of the StringTreeExtractorFilter object
93      *
94      *@return The SelectedPart value
95      */

96     public String JavaDoc getSelectedPart() {
97         return this.selectedPart;
98     }
99
100
101
102     /**
103      * Gets the Name attribute of the StringTreeExtractorFilter object
104      *
105      *@return The Name value
106      */

107     public String JavaDoc getName() {
108         return NAME.toString();
109     }
110
111
112
113     /**
114      * Gets the KeyPart attribute of the StringTreeFilter object
115      *
116      *@return The KeyPart value
117      */

118     public String JavaDoc getKeyPart() {
119         String JavaDoc key = (String JavaDoc) getKeyMap().get(KEY_SELECTED_PART);
120         return key;
121     }
122
123
124     /**
125      * Gets the StringTagHash attribute of the StringTreeHTMLFilter object
126      *
127      *@return The StringTagHash value
128      */

129     public Hashtable getStringTagHash() {
130         return stringTagHash;
131     }
132
133
134
135     /**
136      * Gets the SelectedPart attribute of the JDomHTMLFilter object
137      *
138      *@param doc HTMLDocument object that have the instance of a
139      * JDomHTMLDocument
140      *@param action Description of Parameter
141      *@return The SelectedPart value
142      *@exception Exception Description of Exception
143      */

144     public String JavaDoc getSelectedPart(HTMLDocument doc, int action) throws Exception JavaDoc {
145         if (action == ACTION_BUILD_KEY_PART) {
146             // don't update parameters values ang get selected part
147
doc.getTransformedDocumentAsString();
148         }
149         else if (action == ACTION_RETRIEVE_SELECTED_PART) {
150             // update parameters values ang get selected part
151
doc.getUserDocumentAsString();
152         }
153
154         //filter the document
155
processFiltering(doc, action);
156         return getSelectedPart();
157     }
158
159
160     /**
161      * Sets the KeyPart attribute of the StringTreeFilter object
162      *
163      *@param keyPart The new KeyPart value
164      */

165     public void addKeyPart(String JavaDoc keyPart) {
166         getKeyMap().put(this.KEY_SELECTED_PART, keyPart);
167     }
168
169
170
171     /**
172      * Description of the Method
173      *
174      *@param originalSelectedPart Description of Parameter
175      */

176     public void buildKeySelectedPart(String JavaDoc originalSelectedPart) {
177
178         //build a jdom document of the selected part
179
String JavaDoc charSet = "";
180         Parser parser = Parser.createParser(originalSelectedPart, charSet);
181         BuildKeyVisitor visitor = new BuildKeyVisitor();
182         try {
183             parser.visitAllNodesWith(visitor);
184             logger.debug("key part is: " + visitor.getBuildedKey());
185         }
186         catch (Exception JavaDoc ex) {
187             ex.printStackTrace();
188             logger.error("buldKeySelectedPart error " + ex.getMessage());
189         }
190
191         String JavaDoc key = visitor.getBuildedKey();
192         key = refactorKey(key);
193         addKeyPart(key);
194
195     }
196
197
198     /**
199      * Gets the TagFromStringKey attribute of the StringTreeHTMLFilter object
200      *
201      *@param key Description of Parameter
202      *@return The TagFromStringKey value
203      */

204     private Tag getTagFromStringKey(String JavaDoc key) {
205         return (Tag) getStringTagHash().get(key);
206     }
207
208
209
210     /**
211      * Best match = v:br: --> :div:br:
212      *
213      *@param where Description of Parameter
214      *@param bestMatch Description of Parameter
215      *@return The ValidBestMatch value
216      */

217     private String JavaDoc getValidBestMatch(String JavaDoc where, String JavaDoc bestMatch) {
218         // bestMatch is null or empty
219
if (bestMatch == null || bestMatch.length() == 0) {
220             return bestMatch;
221         }
222         if (bestMatch.charAt(0) == ':') {
223             return bestMatch;
224         }
225         int position = where.indexOf(bestMatch);
226
227         String JavaDoc toAppend = "";
228         while (where.charAt(position) != ':') {
229             toAppend = toAppend + where.charAt(position);
230             position--;
231         }
232         logger.debug("[ Append ]" + toAppend);
233         return toAppend + bestMatch;
234     }
235
236
237     /**
238      * Gets the AllElementsFromBestMatch attribute of the
239      * StringTreeExtractorFilter object
240      *
241      *@param docString Description of Parameter
242      *@param bestMatch Description of Parameter
243      *@return The AllElementsFromBestMatch value
244      *@exception Exception Description of Exception
245      */

246     private Hashtable getAllTagsFromBestMatch(String JavaDoc docString, String JavaDoc bestMatch) throws Exception JavaDoc {
247         Hashtable tagList = new Hashtable();
248
249         //Retrieve the first elemeent
250
int indexBestMatch = docString.indexOf(bestMatch);
251         logger.debug("[Best match is " + bestMatch + " ]");
252         String JavaDoc key = docString.substring(0, indexBestMatch);
253         Tag firtsTag = getTagFromStringKey(key);
254
255         // add the first ele to the list
256
if (firtsTag == null) {
257             logger.error("[No element whith key " + key + " found]");
258         }
259         else {
260             logger.debug("[Element --" + firtsTag.getTagName() + "-- whith key " + key + " added]");
261             tagList.put(key, firtsTag);
262         }
263
264         //Retreive and add of the rest of element
265
String JavaDoc[] nameEleArray = bestMatch.split(":");
266         //key = key_of_element_just_before + ":"+name_elemet_just_before;
267
//Warning: the first element has been already processed;
268
String JavaDoc keyElementJustBefore = key;
269         for (int i = 2; i < nameEleArray.length; i++) {
270             logger.debug("[Process: " + nameEleArray[i] + " ]");
271             //build key
272
String JavaDoc nameElementJustBefore = nameEleArray[i - 1];
273             String JavaDoc newKey = keyElementJustBefore + ":" + nameElementJustBefore;
274
275             //add element
276
Tag currentEle = getTagFromStringKey(newKey);
277             if (currentEle == null) {
278                 logger.error("[No element whith key " + newKey + " found]");
279             }
280             else {
281                 logger.debug("[Element <" + currentEle.getTagName() + "> whith key " + newKey + " added]");
282                 tagList.put(newKey, currentEle);
283
284             }
285
286             //update
287
keyElementJustBefore = newKey;
288
289         }
290         return tagList;
291     }
292
293
294     /**
295      * Gets the RealSelectedPart attribute of the StringTreeExtractorFilter
296      * object
297      *
298      *@param originalSelectedPart Description of Parameter
299      *@return The RealSelectedPart value
300      */

301     private String JavaDoc refactorKey(String JavaDoc originalSelectedPart) {
302         String JavaDoc realSelectedPart = originalSelectedPart;
303         realSelectedPart = originalSelectedPart.replaceAll(":TBODY", "");
304         return realSelectedPart;
305     }
306
307
308
309     /**
310      * Adds a feature to the ElemenWhitKey attribute of the StringTreeHTMLFilter
311      * object
312      *
313      *@param key The feature to be added to the ElemenWhitKey attribute
314      *@param ele The feature to be added to the ElemenWhitKey attribute
315      */

316     private void addElemenWhitKey(String JavaDoc key, Tag ele) {
317         getStringTagHash().put(key, ele);
318         //logger.debug("Tag " + ele + "whith key " + key + " added");
319
}
320
321
322     /**
323      * Extract the selected part of the document
324      *
325      *@param htmlDoc Description of Parameter
326      *@param action Description of Parameter
327      *@exception Exception Description of Exception
328      */

329
330     private void processFiltering(HTMLDocument htmlDoc, int action) throws Exception JavaDoc {
331         //build key of the html document
332
String JavaDoc html = "";
333         if (action == ACTION_BUILD_KEY_PART) {
334             // don't update parameters values ang get selected part
335
html = htmlDoc.getTransformedDocumentAsString();
336         }
337         else if (action == ACTION_RETRIEVE_SELECTED_PART) {
338             // update parameters values ang get selected part
339
html = htmlDoc.getUserDocumentAsString();
340         }
341
342         String JavaDoc charSet = "";
343         Parser parser = Parser.createParser(html, "");
344         BuildKeyVisitor visitor = new BuildKeyVisitor();
345         String JavaDoc docString = "";
346         try {
347             parser.visitAllNodesWith(visitor);
348             docString = visitor.getBuildedKey();
349             logger.debug("pattern of the documnet part is: " + docString);
350         }
351         catch (ParserException ex) {
352             ex.printStackTrace();
353             logger.error("buldKeySelectedPart error " + ex.getMessage());
354             throw new WebClippingException("Select part", ex);
355         }
356
357         //Compute best match
358
String JavaDoc bestMatch = StringUtilities.getBestMatchString(docString, getKeyPart(), ':');
359         bestMatch = getValidBestMatch(docString, bestMatch);
360
361         logger.debug("[KeyPart is : " + getKeyPart() + " ]");
362         logger.debug("[BestPart is : " + bestMatch + " ]");
363
364         //build hashtable
365

366         //Build the supposed selected part
367
int mode = getMode();
368         if (mode == MODE_WHITOUT_CSS) {
369             processWhithViewMode(parser, visitor, docString, bestMatch, htmlDoc);
370         }
371         else {
372             processWhithCssMode(parser, visitor, docString, bestMatch, htmlDoc);
373         }
374     }
375
376
377     /**
378      * Description of the Method
379      *
380      *@param parser Description of Parameter
381      *@param visitor Description of Parameter
382      *@param docString Description of Parameter
383      *@param bestMatch Description of Parameter
384      *@param doc Description of Parameter
385      *@exception Exception Description of Exception
386      */

387     private void processWhithViewMode(Parser parser, BuildKeyVisitor visitor, String JavaDoc docString, String JavaDoc bestMatch, HTMLDocument doc) throws Exception JavaDoc {
388         BuildSelectedPartVisitorViewMode bspv = new BuildSelectedPartVisitorViewMode(docString, bestMatch);
389         try {
390             parser.reset();
391             parser.visitAllNodesWith(bspv);
392             docString = visitor.getBuildedKey();
393             logger.debug("pattern of the documnet part is: " + docString);
394             String JavaDoc selectedPart = bspv.getSelectedPart();
395             String JavaDoc encoded = selectedPart;
396
397             //Rebuild the document
398
setSelectedPart(encoded);
399         }
400         catch (Exception JavaDoc ex) {
401             ex.printStackTrace();
402             logger.error("Selectet part error " + ex.getMessage());
403             throw new WebClippingException("Select part", ex);
404         }
405     }
406
407
408     /**
409      * Description of the Method
410      *
411      *@param parser Description of Parameter
412      *@param visitor Description of Parameter
413      *@param docString Description of Parameter
414      *@param bestMatch Description of Parameter
415      *@param doc Description of Parameter
416      *@exception Exception Description of Exception
417      */

418     private void processWhithCssMode(Parser parser, BuildKeyVisitor visitor, String JavaDoc docString, String JavaDoc bestMatch, HTMLDocument doc) throws Exception JavaDoc {
419         BuildSelectedPartVisitorCss bspv = new BuildSelectedPartVisitorCss(docString, bestMatch);
420         try {
421             parser.reset();
422             parser.visitAllNodesWith(bspv);
423             docString = visitor.getBuildedKey();
424             logger.debug("pattern of the documnet part is: " + docString);
425
426             String JavaDoc selectedPart = bspv.getSelectedPart();
427             String JavaDoc encoded = selectedPart;
428
429             //Rebuild the document
430
setSelectedPart(encoded);
431         }
432         catch (Exception JavaDoc ex) {
433             ex.printStackTrace();
434             logger.error("Selectet part error " + ex.getMessage());
435             throw new WebClippingException("Select part", ex);
436         }
437     }
438
439
440
441     /**
442      * Description of the Class
443      *
444      *@author Tlili Khaled
445      */

446     public class BuildKeyVisitor extends NodeVisitor {
447         private String JavaDoc key = "";
448
449
450         /**
451          * Constructor for the MyVisitor object
452          */

453         public BuildKeyVisitor() {
454             key = "";
455         }
456
457
458         /**
459          * Gets the BuildedKey attribute of the BuildKeyVisitor object
460          *
461          *@return The BuildedKey value
462          */

463         public String JavaDoc getBuildedKey() {
464             return key;
465         }
466
467
468         /**
469          * Description of the Method
470          *
471          *@param tag Description of Parameter
472          */

473         public void visitTag(Tag tag) {
474             key = key + ":" + tag.getTagName();
475             addElemenWhitKey(key, tag);
476             //logger.debug("added key: " + key);
477

478             //logger.debug("\n" + tag.getTagName() + tag.getStartPosition());
479
}
480
481
482         /**
483          * Description of the Method
484          *
485          *@param string Description of Parameter
486          */

487         public void visitStringNode(Text string) {
488             //logger.debug(string);
489
}
490
491     }
492
493
494     /**
495      * Description of the Class
496      *
497      *@author Tlili Khaled
498      */

499     public class BuildSelectedPartVisitorViewMode extends NodeVisitor {
500         private String JavaDoc key = "";
501         private String JavaDoc selectedPart = "";
502         private Hashtable tagHash;
503         private Set addedTagSet = new HashSet();
504
505
506         /**
507          * Constructor for the MyVisitor object
508          *
509          *@param docString Description of Parameter
510          *@param bestMatch Description of Parameter
511          *@exception Exception Description of Exception
512          */

513         public BuildSelectedPartVisitorViewMode(String JavaDoc docString, String JavaDoc bestMatch) throws Exception JavaDoc {
514             key = "";
515             tagHash = getAllTagsFromBestMatch(docString, bestMatch);
516
517         }
518
519
520         /**
521          * Gets the BuildedKey attribute of the BuildKeyVisitor object
522          *
523          *@return The BuildedKey value
524          */

525         public String JavaDoc getBuildedKey() {
526             return key;
527         }
528
529
530         /**
531          * Gets the SelectedPart attribute of the BuildSelectedPartVisitor object
532          *
533          *@return The SelectedPart value
534          */

535         public String JavaDoc getSelectedPart() {
536             return selectedPart;
537         }
538
539
540         /**
541          * Description of the Method
542          *
543          *@param tag Description of Parameter
544          */

545         public void visitTag(Tag tag) {
546             logger.debug(tag.getTagName());
547             //add style and script
548
if (tag instanceof StyleTag || tag instanceof ScriptTag) {
549                 logger.debug("Style or Script tag added");
550                 selectedPart = selectedPart + tag.toHtml();
551             }
552             processSelectedTag(tag);
553
554         }
555
556
557
558         /**
559          * Description of the Method
560          *
561          *@param string Description of Parameter
562          */

563         public void visitStringNode(Text string) {
564             // do nothing
565
}
566
567
568         /**
569          * Gets the SelectedTagEmbeddedInFormTag attribute of the
570          * BuildSelectedPartVisitor object
571          *
572          *@param currentTag Description of Parameter
573          *@param pTag Description of Parameter
574          *@return The SelectedTagEmbeddedInFormTag value
575          */

576         private FormTag getSelectedTagEmbeddedInFormTag(
577                 Tag currentTag, FormTag pTag) {
578             Hashtable atts = pTag.getAttributes();
579             pTag = new FormTag();
580             pTag.setAttributes(atts);
581             FormTag endTag = new FormTag();
582             endTag.setTagName("/form");
583             pTag.setEndTag(endTag);
584             NodeList children = new NodeList();
585             children.add(currentTag);
586             pTag.setChildren(children);
587             return pTag;
588         }
589
590
591         /**
592          * Description of the Method
593          *
594          *@param tag Description of Parameter
595          */

596         private void processSelectedTag(Tag tag) {
597             // add the selected tags
598
key = key + ":" + tag.getTagName();
599             Tag currentTag = (Tag) tagHash.get(key);
600
601             if (currentTag != null) {
602                 logger.debug("This tag is selected");
603                 addedTagSet.add(currentTag);
604                 Tag pTag = (Tag) currentTag.getParent();
605
606                 if (addedTagSet.contains(pTag)) {
607                     logger.debug("This tag is already HttpServleted");
608                     return;
609                 }
610                 //process form parent tag
611
if (pTag instanceof FormTag) {
612                     // logger.debug("Tag added whith form parent");
613
// FormTag fTag = getSelectedTagEmbeddedInFormTag(currentTag, (FormTag) pTag);
614
// selectedPart = selectedPart + fTag.toHtml();
615
}
616                 else {
617                     //Tag is selected
618
logger.debug("Tag added");
619                     updateCssHeritance(currentTag, pTag);
620                     selectedPart = selectedPart + currentTag.toHtml();
621                 }
622             }
623         }
624
625
626         /**
627          * Description of the Method
628          *
629          *@param tag Description of Parameter
630          *@param parentNode Description of Parameter
631          */

632         private void updateCssHeritance(Tag tag, Node parentNode) {
633             if (parentNode instanceof Tag) {
634                 Tag pTag = (Tag) parentNode;
635                 Node ppNode = parentNode.getParent();
636                 if (ppNode != null) {
637                     updateCssHeritance(pTag, ppNode);
638                 }
639                 //Style att
640
String JavaDoc pStyleAtt = pTag.getAttribute(HTML.Attribute.STYLE.toString());
641                 if (pStyleAtt != null && !pStyleAtt.equalsIgnoreCase("")) {
642                     logger.debug("update style att");
643                     String JavaDoc styleAtt = tag.getAttribute(HTML.Attribute.STYLE.toString());
644                     if (styleAtt == null) {
645                         tag.setAttribute(HTML.Attribute.STYLE.toString(), pStyleAtt);
646                     }
647                     else {
648                         tag.setAttribute(HTML.Attribute.STYLE.toString(), styleAtt + " " + pStyleAtt);
649                     }
650
651                 }
652
653                 //class att
654
String JavaDoc pClassAtt = pTag.getAttribute(HTML.Attribute.CLASS.toString());
655                 if (pClassAtt != null && !pClassAtt.equalsIgnoreCase("")) {
656                     logger.debug("update class att");
657                     String JavaDoc classAtt = tag.getAttribute(HTML.Attribute.CLASS.toString());
658                     if (classAtt == null) {
659                         tag.setAttribute(HTML.Attribute.CLASS.toString(), pClassAtt);
660
661                     }
662                     else {
663                         tag.setAttribute(HTML.Attribute.CLASS.toString(), classAtt + " " + pClassAtt);
664                     }
665                 }
666
667             }
668             else {
669                 logger.debug("parent node is not a tag");
670             }
671         }
672
673     }
674
675
676
677     /**
678      * Description of the Class
679      *
680      *@author Tlili Khaled
681      */

682     public class BuildSelectedPartVisitorCss extends VisibilityPositionVisitor {
683         private String JavaDoc key = "";
684         private Hashtable tagHash;
685         private String JavaDoc selectedPart = "";
686         private Set addedTagSet = new HashSet();
687         private int count = 0;
688
689
690         /**
691          * Constructor for the MyVisitor object
692          *
693          *@param docString Description of Parameter
694          *@param bestMatch Description of Parameter
695          *@exception Exception Description of Exception
696          */

697         public BuildSelectedPartVisitorCss(String JavaDoc docString, String JavaDoc bestMatch) throws Exception JavaDoc {
698             key = "";
699             tagHash = getAllTagsFromBestMatch(docString, bestMatch);
700
701         }
702
703
704         /**
705          * Gets the BuildedKey attribute of the BuildKeyVisitor object
706          *
707          *@return The BuildedKey value
708          */

709         public String JavaDoc getBuildedKey() {
710             return key;
711         }
712
713
714         /**
715          * Gets the SelectedPart attribute of the BuildSelectedPartVisitor object
716          *
717          *@return The SelectedPart value
718          */

719         public String JavaDoc getSelectedPart() {
720             return selectedPart;
721         }
722
723
724         /**
725          * Description of the Method
726          *
727          *@param tag Description of Parameter
728          */

729         public void visitTag(Tag tag) {
730             logger.debug(tag.getTagName());
731             //add style and script
732
buildSelectedPArtAsString(tag);
733
734             count++;
735
736         }
737
738
739
740         /**
741          * Description of the Method
742          *
743          *@param string Description of Parameter
744          */

745         public void visitStringNode(Text string) {
746             // do nothing
747
}
748
749
750         /**
751          * Description of the Method
752          *
753          *@param tag Description of Parameter
754          */

755         private void buildSelectedPArtAsString(Tag tag) {
756             // add the selected tags
757
key = key + ":" + tag.getTagName();
758             Tag currentTag = (Tag) tagHash.get(key);
759
760             //tag is selected
761
if (currentTag != null) {
762                 logger.debug("This tag is selected");
763                 addedTagSet.add(currentTag);
764                 Tag pTag = (Tag) currentTag.getParent();
765                 if (!addedTagSet.contains(pTag)) {
766                     logger.debug("Set position");
767                     selectedPart = selectedPart + currentTag.toHtml();
768
769                 }
770
771             }
772         }
773
774     }
775
776 }
777
Popular Tags