1 19 20 33 package org.htmlparser.parserHelper; 34 35 import org.htmlparser.Node; 36 import org.htmlparser.NodeReader; 37 import org.htmlparser.scanners.CompositeTagScanner; 38 import org.htmlparser.tags.CompositeTag; 39 import org.htmlparser.tags.EndTag; 40 import org.htmlparser.tags.Tag; 41 import org.htmlparser.tags.data.CompositeTagData; 42 import org.htmlparser.tags.data.TagData; 43 import org.htmlparser.util.NodeList; 44 import org.htmlparser.util.ParserException; 45 46 public class CompositeTagScannerHelper 47 { 48 private CompositeTagScanner scanner; 49 private Tag tag; 50 private String url; 51 private NodeReader reader; 52 private String currLine; 53 private Tag endTag; 54 private NodeList nodeList; 55 private boolean endTagFound; 56 private int startingLineNumber; 57 private int endingLineNumber; 58 private boolean balance_quotes; 59 60 public CompositeTagScannerHelper( 61 CompositeTagScanner scanner, 62 Tag tag, 63 String url, 64 NodeReader reader, 65 String currLine, 66 boolean balance_quotes) 67 { 68 69 this.scanner = scanner; 70 this.tag = tag; 71 this.url = url; 72 this.reader = reader; 73 this.currLine = currLine; 74 this.endTag = null; 75 this.nodeList = new NodeList(); 76 this.endTagFound = false; 77 this.balance_quotes = balance_quotes; 78 } 79 80 public Tag scan() throws ParserException 81 { 82 this.startingLineNumber = reader.getLastLineNumber(); 83 if (shouldCreateEndTagAndExit()) 84 { 85 return createEndTagAndRepositionReader(); 86 } 87 scanner.beforeScanningStarts(); 88 Node currentNode = tag; 89 90 doEmptyXmlTagCheckOn(currentNode); 91 if (!endTagFound) 92 { 93 do 94 { 95 currentNode = reader.readElement(balance_quotes); 96 if (currentNode == null) 97 continue; 98 currLine = reader.getCurrentLine(); 99 if (currentNode instanceof Tag) 100 doForceCorrectionCheckOn((Tag) currentNode); 101 102 doEmptyXmlTagCheckOn(currentNode); 103 if (!endTagFound) 104 doChildAndEndTagCheckOn(currentNode); 105 } 106 while (currentNode != null && !endTagFound); 107 } 108 if (endTag == null) 109 { 110 createCorrectionEndTagBefore(reader.getLastReadPosition() + 1); 111 } 112 113 this.endingLineNumber = reader.getLastLineNumber(); 114 return createTag(); 115 } 116 117 private boolean shouldCreateEndTagAndExit() 118 { 119 return scanner.shouldCreateEndTagAndExit(); 120 } 121 122 private Tag createEndTagAndRepositionReader() 123 { 124 createCorrectionEndTagBefore(tag.elementBegin()); 125 reader.setPosInLine(tag.elementBegin()); 126 reader.setDontReadNextLine(true); 127 return endTag; 128 } 129 130 private void createCorrectionEndTagBefore(int pos) 131 { 132 String endTagName = tag.getTagName(); 133 int endTagBegin = pos; 134 int endTagEnd = endTagBegin + endTagName.length() + 2; 135 endTag = 136 new EndTag( 137 new TagData(endTagBegin, endTagEnd, endTagName, currLine)); 138 } 139 140 private void createCorrectionEndTagBefore(Tag possibleEndTagCauser) 141 { 142 String endTagName = tag.getTagName(); 143 int endTagBegin = possibleEndTagCauser.elementBegin(); 144 int endTagEnd = endTagBegin + endTagName.length() + 2; 145 possibleEndTagCauser.setTagBegin(endTagEnd + 1); 146 reader.addNextParsedNode(possibleEndTagCauser); 147 endTag = 148 new EndTag( 149 new TagData(endTagBegin, endTagEnd, endTagName, currLine)); 150 } 151 152 private StringBuffer createModifiedLine(String endTagName, int endTagBegin) 153 { 154 StringBuffer newLine = new StringBuffer (); 155 newLine.append(currLine.substring(0, endTagBegin)); 156 newLine.append("</"); 157 newLine.append(endTagName); 158 newLine.append(">"); 159 newLine.append(currLine.substring(endTagBegin, currLine.length())); 160 return newLine; 161 } 162 163 private Tag createTag() throws ParserException 164 { 165 CompositeTag newTag = 166 (CompositeTag) scanner.createTag( 167 new TagData( 168 tag.elementBegin(), 169 endTag.elementEnd(), 170 startingLineNumber, 171 endingLineNumber, 172 tag.getText(), 173 currLine, 174 url, 175 tag.isEmptyXmlTag()), 176 new CompositeTagData(tag, endTag, nodeList)); 177 for (int i = 0; i < newTag.getChildCount(); i++) 178 { 179 Node child = newTag.childAt(i); 180 child.setParent(newTag); 181 } 182 return newTag; 183 } 184 185 private void doChildAndEndTagCheckOn(Node currentNode) 186 { 187 if (currentNode instanceof EndTag) 188 { 189 EndTag possibleEndTag = (EndTag) currentNode; 190 if (isExpectedEndTag(possibleEndTag)) 191 { 192 endTagFound = true; 193 endTag = possibleEndTag; 194 return; 195 } 196 } 197 nodeList.add(currentNode); 198 scanner.childNodeEncountered(currentNode); 199 } 200 201 private boolean isExpectedEndTag(EndTag possibleEndTag) 202 { 203 return possibleEndTag.getTagName().equals(tag.getTagName()); 204 } 205 206 private void doEmptyXmlTagCheckOn(Node currentNode) 207 { 208 if (currentNode instanceof Tag) 209 { 210 Tag possibleEndTag = (Tag) currentNode; 211 if (isXmlEndTag(tag)) 212 { 213 endTag = possibleEndTag; 214 endTagFound = true; 215 } 216 } 217 } 218 219 private void doForceCorrectionCheckOn(Tag possibleEndTagCauser) 220 { 221 if (isEndTagMissing(possibleEndTagCauser)) 222 { 223 createCorrectionEndTagBefore(possibleEndTagCauser); 224 225 endTagFound = true; 226 } 227 } 228 229 private boolean isEndTagMissing(Tag possibleEndTag) 230 { 231 return scanner.isTagToBeEndedFor(possibleEndTag) 232 || isSelfChildTagRecievedIncorrectly(possibleEndTag); 233 } 234 235 private boolean isSelfChildTagRecievedIncorrectly(Tag possibleEndTag) 236 { 237 return ( 238 !(possibleEndTag instanceof EndTag) 239 && !scanner.isAllowSelfChildren() 240 && possibleEndTag.getTagName().equals(tag.getTagName())); 241 } 242 243 public boolean isXmlEndTag(Tag tag) 244 { 245 String tagText = tag.getText(); 246 int lastSlash = tagText.lastIndexOf("/"); 247 return (lastSlash == tagText.length() - 1 || tag.isEmptyXmlTag()) 248 && tag.getText().indexOf("://") == -1; 249 } 250 } 251 | Popular Tags |