1 27 package org.htmlparser.visitors; 28 29 import org.htmlparser.Text; 30 import org.htmlparser.Tag; 31 import org.htmlparser.util.Translate; 32 33 34 44 public class TextExtractingVisitor extends NodeVisitor { 45 private StringBuffer textAccumulator; 46 private boolean preTagBeingProcessed; 47 48 public TextExtractingVisitor() { 49 textAccumulator = new StringBuffer (); 50 preTagBeingProcessed = false; 51 } 52 53 public String getExtractedText() { 54 return textAccumulator.toString(); 55 } 56 57 public void visitStringNode(Text stringNode) { 58 String text = stringNode.getText(); 59 if (!preTagBeingProcessed) { 60 text = Translate.decode(text); 61 text = replaceNonBreakingSpaceWithOrdinarySpace(text); 62 } 63 textAccumulator.append(text); 64 } 65 66 private String replaceNonBreakingSpaceWithOrdinarySpace(String text) { 67 return text.replace('\u00a0',' '); 68 } 69 70 public void visitTag(Tag tag) 71 { 72 if (isPreTag(tag)) 73 preTagBeingProcessed = true; 74 } 75 76 public void visitEndTag(Tag tag) 77 { 78 if (isPreTag(tag)) 79 preTagBeingProcessed = false; 80 } 81 82 private boolean isPreTag(Tag tag) { 83 return tag.getTagName().equals("PRE"); 84 } 85 86 } 87 | Popular Tags |