1 package org.jahia.clipbuilder.html.web.html.Impl.ExtractorFilter; 2 3 import java.util.*; 4 5 import javax.swing.text.html.*; 6 7 import org.jahia.clipbuilder.html.util.*; 8 import org.jahia.clipbuilder.html.web.Constant.*; 9 import org.jahia.clipbuilder.html.web.html.*; 10 import org.jahia.clipbuilder.html.web.html.HTMLDocument; 11 import org.htmlparser.*; 12 import org.htmlparser.Parser; 13 import org.htmlparser.Tag; 14 import org.htmlparser.tags.*; 15 import org.htmlparser.util.*; 16 import org.htmlparser.visitors.*; 17 import org.jahia.clipbuilder.html.web.html.Impl.ExtractorFilter.util.*; 18 19 24 public class StringTreeExtractorFilter extends AbsctractExtractoreFilter { 25 31 private String selectedPart = "<p> Can't retrieve the selected part </p>"; 32 private final String KEY_SELECTED_PART = "keyPart"; 33 private Hashtable stringTagHash = new Hashtable(); 34 private final String NAME = "StringTreeExtractorFilter"; 35 private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(StringTreeExtractorFilter.class); 36 37 38 41 public StringTreeExtractorFilter() { 42 super("StringTreeExtractorFilter"); 43 } 44 45 46 47 52 public StringTreeExtractorFilter(String originalSelectedPart) { 53 this(); 54 buildKeySelectedPart(originalSelectedPart); 55 } 56 57 58 64 public StringTreeExtractorFilter(HTMLDocument doc, String originalSelectedPart) { 65 this(originalSelectedPart); 66 buildKeySelectedPart(originalSelectedPart); 67 } 68 69 70 71 76 public void setStringTagHash(Hashtable stringTagHash) { 77 this.stringTagHash = stringTagHash; 78 } 79 80 81 86 public void setSelectedPart(String selectedPart) { 87 this.selectedPart = selectedPart; 88 } 89 90 91 96 public String getSelectedPart() { 97 return this.selectedPart; 98 } 99 100 101 102 107 public String getName() { 108 return NAME.toString(); 109 } 110 111 112 113 118 public String getKeyPart() { 119 String key = (String ) getKeyMap().get(KEY_SELECTED_PART); 120 return key; 121 } 122 123 124 129 public Hashtable getStringTagHash() { 130 return stringTagHash; 131 } 132 133 134 135 144 public String getSelectedPart(HTMLDocument doc, int action) throws Exception { 145 if (action == ACTION_BUILD_KEY_PART) { 146 doc.getTransformedDocumentAsString(); 148 } 149 else if (action == ACTION_RETRIEVE_SELECTED_PART) { 150 doc.getUserDocumentAsString(); 152 } 153 154 processFiltering(doc, action); 156 return getSelectedPart(); 157 } 158 159 160 165 public void addKeyPart(String keyPart) { 166 getKeyMap().put(this.KEY_SELECTED_PART, keyPart); 167 } 168 169 170 171 176 public void buildKeySelectedPart(String originalSelectedPart) { 177 178 String charSet = ""; 180 Parser parser = Parser.createParser(originalSelectedPart, charSet); 181 BuildKeyVisitor visitor = new BuildKeyVisitor(); 182 try { 183 parser.visitAllNodesWith(visitor); 184 logger.debug("key part is: " + visitor.getBuildedKey()); 185 } 186 catch (Exception ex) { 187 ex.printStackTrace(); 188 logger.error("buldKeySelectedPart error " + ex.getMessage()); 189 } 190 191 String key = visitor.getBuildedKey(); 192 key = refactorKey(key); 193 addKeyPart(key); 194 195 } 196 197 198 204 private Tag getTagFromStringKey(String key) { 205 return (Tag) getStringTagHash().get(key); 206 } 207 208 209 210 217 private String getValidBestMatch(String where, String bestMatch) { 218 if (bestMatch == null || bestMatch.length() == 0) { 220 return bestMatch; 221 } 222 if (bestMatch.charAt(0) == ':') { 223 return bestMatch; 224 } 225 int position = where.indexOf(bestMatch); 226 227 String toAppend = ""; 228 while (where.charAt(position) != ':') { 229 toAppend = toAppend + where.charAt(position); 230 position--; 231 } 232 logger.debug("[ Append ]" + toAppend); 233 return toAppend + bestMatch; 234 } 235 236 237 246 private Hashtable getAllTagsFromBestMatch(String docString, String bestMatch) throws Exception { 247 Hashtable tagList = new Hashtable(); 248 249 int indexBestMatch = docString.indexOf(bestMatch); 251 logger.debug("[Best match is " + bestMatch + " ]"); 252 String key = docString.substring(0, indexBestMatch); 253 Tag firtsTag = getTagFromStringKey(key); 254 255 if (firtsTag == null) { 257 logger.error("[No element whith key " + key + " found]"); 258 } 259 else { 260 logger.debug("[Element --" + firtsTag.getTagName() + "-- whith key " + key + " added]"); 261 tagList.put(key, firtsTag); 262 } 263 264 String [] nameEleArray = bestMatch.split(":"); 266 String keyElementJustBefore = key; 269 for (int i = 2; i < nameEleArray.length; i++) { 270 logger.debug("[Process: " + nameEleArray[i] + " ]"); 271 String nameElementJustBefore = nameEleArray[i - 1]; 273 String newKey = keyElementJustBefore + ":" + nameElementJustBefore; 274 275 Tag currentEle = getTagFromStringKey(newKey); 277 if (currentEle == null) { 278 logger.error("[No element whith key " + newKey + " found]"); 279 } 280 else { 281 logger.debug("[Element <" + currentEle.getTagName() + "> whith key " + newKey + " added]"); 282 tagList.put(newKey, currentEle); 283 284 } 285 286 keyElementJustBefore = newKey; 288 289 } 290 return tagList; 291 } 292 293 294 301 private String refactorKey(String originalSelectedPart) { 302 String realSelectedPart = originalSelectedPart; 303 realSelectedPart = originalSelectedPart.replaceAll(":TBODY", ""); 304 return realSelectedPart; 305 } 306 307 308 309 316 private void addElemenWhitKey(String key, Tag ele) { 317 getStringTagHash().put(key, ele); 318 } 320 321 322 329 330 private void processFiltering(HTMLDocument htmlDoc, int action) throws Exception { 331 String html = ""; 333 if (action == ACTION_BUILD_KEY_PART) { 334 html = htmlDoc.getTransformedDocumentAsString(); 336 } 337 else if (action == ACTION_RETRIEVE_SELECTED_PART) { 338 html = htmlDoc.getUserDocumentAsString(); 340 } 341 342 String charSet = ""; 343 Parser parser = Parser.createParser(html, ""); 344 BuildKeyVisitor visitor = new BuildKeyVisitor(); 345 String docString = ""; 346 try { 347 parser.visitAllNodesWith(visitor); 348 docString = visitor.getBuildedKey(); 349 logger.debug("pattern of the documnet part is: " + docString); 350 } 351 catch (ParserException ex) { 352 ex.printStackTrace(); 353 logger.error("buldKeySelectedPart error " + ex.getMessage()); 354 throw new WebClippingException("Select part", ex); 355 } 356 357 String bestMatch = StringUtilities.getBestMatchString(docString, getKeyPart(), ':'); 359 bestMatch = getValidBestMatch(docString, bestMatch); 360 361 logger.debug("[KeyPart is : " + getKeyPart() + " ]"); 362 logger.debug("[BestPart is : " + bestMatch + " ]"); 363 364 366 int mode = getMode(); 368 if (mode == MODE_WHITOUT_CSS) { 369 processWhithViewMode(parser, visitor, docString, bestMatch, htmlDoc); 370 } 371 else { 372 processWhithCssMode(parser, visitor, docString, bestMatch, htmlDoc); 373 } 374 } 375 376 377 387 private void processWhithViewMode(Parser parser, BuildKeyVisitor visitor, String docString, String bestMatch, HTMLDocument doc) throws Exception { 388 BuildSelectedPartVisitorViewMode bspv = new BuildSelectedPartVisitorViewMode(docString, bestMatch); 389 try { 390 parser.reset(); 391 parser.visitAllNodesWith(bspv); 392 docString = visitor.getBuildedKey(); 393 logger.debug("pattern of the documnet part is: " + docString); 394 String selectedPart = bspv.getSelectedPart(); 395 String encoded = selectedPart; 396 397 setSelectedPart(encoded); 399 } 400 catch (Exception ex) { 401 ex.printStackTrace(); 402 logger.error("Selectet part error " + ex.getMessage()); 403 throw new WebClippingException("Select part", ex); 404 } 405 } 406 407 408 418 private void processWhithCssMode(Parser parser, BuildKeyVisitor visitor, String docString, String bestMatch, HTMLDocument doc) throws Exception { 419 BuildSelectedPartVisitorCss bspv = new BuildSelectedPartVisitorCss(docString, bestMatch); 420 try { 421 parser.reset(); 422 parser.visitAllNodesWith(bspv); 423 docString = visitor.getBuildedKey(); 424 logger.debug("pattern of the documnet part is: " + docString); 425 426 String selectedPart = bspv.getSelectedPart(); 427 String encoded = selectedPart; 428 429 setSelectedPart(encoded); 431 } 432 catch (Exception ex) { 433 ex.printStackTrace(); 434 logger.error("Selectet part error " + ex.getMessage()); 435 throw new WebClippingException("Select part", ex); 436 } 437 } 438 439 440 441 446 public class BuildKeyVisitor extends NodeVisitor { 447 private String key = ""; 448 449 450 453 public BuildKeyVisitor() { 454 key = ""; 455 } 456 457 458 463 public String getBuildedKey() { 464 return key; 465 } 466 467 468 473 public void visitTag(Tag tag) { 474 key = key + ":" + tag.getTagName(); 475 addElemenWhitKey(key, tag); 476 478 } 480 481 482 487 public void visitStringNode(Text string) { 488 } 490 491 } 492 493 494 499 public class BuildSelectedPartVisitorViewMode extends NodeVisitor { 500 private String key = ""; 501 private String selectedPart = ""; 502 private Hashtable tagHash; 503 private Set addedTagSet = new HashSet(); 504 505 506 513 public BuildSelectedPartVisitorViewMode(String docString, String bestMatch) throws Exception { 514 key = ""; 515 tagHash = getAllTagsFromBestMatch(docString, bestMatch); 516 517 } 518 519 520 525 public String getBuildedKey() { 526 return key; 527 } 528 529 530 535 public String getSelectedPart() { 536 return selectedPart; 537 } 538 539 540 545 public void visitTag(Tag tag) { 546 logger.debug(tag.getTagName()); 547 if (tag instanceof StyleTag || tag instanceof ScriptTag) { 549 logger.debug("Style or Script tag added"); 550 selectedPart = selectedPart + tag.toHtml(); 551 } 552 processSelectedTag(tag); 553 554 } 555 556 557 558 563 public void visitStringNode(Text string) { 564 } 566 567 568 576 private FormTag getSelectedTagEmbeddedInFormTag( 577 Tag currentTag, FormTag pTag) { 578 Hashtable atts = pTag.getAttributes(); 579 pTag = new FormTag(); 580 pTag.setAttributes(atts); 581 FormTag endTag = new FormTag(); 582 endTag.setTagName("/form"); 583 pTag.setEndTag(endTag); 584 NodeList children = new NodeList(); 585 children.add(currentTag); 586 pTag.setChildren(children); 587 return pTag; 588 } 589 590 591 596 private void processSelectedTag(Tag tag) { 597 key = key + ":" + tag.getTagName(); 599 Tag currentTag = (Tag) tagHash.get(key); 600 601 if (currentTag != null) { 602 logger.debug("This tag is selected"); 603 addedTagSet.add(currentTag); 604 Tag pTag = (Tag) currentTag.getParent(); 605 606 if (addedTagSet.contains(pTag)) { 607 logger.debug("This tag is already HttpServleted"); 608 return; 609 } 610 if (pTag instanceof FormTag) { 612 } 616 else { 617 logger.debug("Tag added"); 619 updateCssHeritance(currentTag, pTag); 620 selectedPart = selectedPart + currentTag.toHtml(); 621 } 622 } 623 } 624 625 626 632 private void updateCssHeritance(Tag tag, Node parentNode) { 633 if (parentNode instanceof Tag) { 634 Tag pTag = (Tag) parentNode; 635 Node ppNode = parentNode.getParent(); 636 if (ppNode != null) { 637 updateCssHeritance(pTag, ppNode); 638 } 639 String pStyleAtt = pTag.getAttribute(HTML.Attribute.STYLE.toString()); 641 if (pStyleAtt != null && !pStyleAtt.equalsIgnoreCase("")) { 642 logger.debug("update style att"); 643 String styleAtt = tag.getAttribute(HTML.Attribute.STYLE.toString()); 644 if (styleAtt == null) { 645 tag.setAttribute(HTML.Attribute.STYLE.toString(), pStyleAtt); 646 } 647 else { 648 tag.setAttribute(HTML.Attribute.STYLE.toString(), styleAtt + " " + pStyleAtt); 649 } 650 651 } 652 653 String pClassAtt = pTag.getAttribute(HTML.Attribute.CLASS.toString()); 655 if (pClassAtt != null && !pClassAtt.equalsIgnoreCase("")) { 656 logger.debug("update class att"); 657 String classAtt = tag.getAttribute(HTML.Attribute.CLASS.toString()); 658 if (classAtt == null) { 659 tag.setAttribute(HTML.Attribute.CLASS.toString(), pClassAtt); 660 661 } 662 else { 663 tag.setAttribute(HTML.Attribute.CLASS.toString(), classAtt + " " + pClassAtt); 664 } 665 } 666 667 } 668 else { 669 logger.debug("parent node is not a tag"); 670 } 671 } 672 673 } 674 675 676 677 682 public class BuildSelectedPartVisitorCss extends VisibilityPositionVisitor { 683 private String key = ""; 684 private Hashtable tagHash; 685 private String selectedPart = ""; 686 private Set addedTagSet = new HashSet(); 687 private int count = 0; 688 689 690 697 public BuildSelectedPartVisitorCss(String docString, String bestMatch) throws Exception { 698 key = ""; 699 tagHash = getAllTagsFromBestMatch(docString, bestMatch); 700 701 } 702 703 704 709 public String getBuildedKey() { 710 return key; 711 } 712 713 714 719 public String getSelectedPart() { 720 return selectedPart; 721 } 722 723 724 729 public void visitTag(Tag tag) { 730 logger.debug(tag.getTagName()); 731 buildSelectedPArtAsString(tag); 733 734 count++; 735 736 } 737 738 739 740 745 public void visitStringNode(Text string) { 746 } 748 749 750 755 private void buildSelectedPArtAsString(Tag tag) { 756 key = key + ":" + tag.getTagName(); 758 Tag currentTag = (Tag) tagHash.get(key); 759 760 if (currentTag != null) { 762 logger.debug("This tag is selected"); 763 addedTagSet.add(currentTag); 764 Tag pTag = (Tag) currentTag.getParent(); 765 if (!addedTagSet.contains(pTag)) { 766 logger.debug("Set position"); 767 selectedPart = selectedPart + currentTag.toHtml(); 768 769 } 770 771 } 772 } 773 774 } 775 776 } 777 | Popular Tags |