1 19 20 33 package org.htmlparser.tags; 34 35 import java.util.Enumeration ; 36 import java.util.HashSet ; 37 import java.util.Hashtable ; 38 import java.util.Map ; 39 40 import org.htmlparser.Node; 41 import org.htmlparser.NodeReader; 42 import org.htmlparser.parserHelper.AttributeParser; 43 import org.htmlparser.parserHelper.TagParser; 44 import org.htmlparser.scanners.TagScanner; 45 import org.htmlparser.tags.data.TagData; 46 import org.htmlparser.util.NodeList; 47 import org.htmlparser.util.ParserException; 48 import org.htmlparser.visitors.NodeVisitor; 49 50 57 public class Tag extends Node 58 { 59 public static final String TYPE = "TAG"; 60 64 public final static String TAGNAME = "$<TAGNAME>$"; 65 public final static String EMPTYTAG = "$<EMPTYTAG>$"; 66 private final static int TAG_BEFORE_PARSING_STATE = 1; 67 private final static int TAG_BEGIN_PARSING_STATE = 2; 68 private final static int TAG_FINISHED_PARSING_STATE = 3; 69 private final static int TAG_ILLEGAL_STATE = 4; 70 private final static int TAG_IGNORE_DATA_STATE = 5; 71 private final static int TAG_IGNORE_BEGIN_TAG_STATE = 6; 72 private final static String EMPTY_STRING = ""; 73 74 private static AttributeParser paramParser = new AttributeParser(); 75 private static TagParser tagParser; 76 79 protected StringBuffer tagContents; 80 private boolean emptyXmlTag = false; 81 86 protected Hashtable attributes = null; 87 88 92 protected TagScanner thisScanner = null; 93 private java.lang.String tagLine; 94 95 98 private String [] tagLines; 99 100 103 private int startLine; 104 105 108 protected static HashSet breakTags; 109 static { 110 breakTags = new HashSet (30); 111 breakTags.add("BLOCKQUOTE"); 112 breakTags.add("BODY"); 113 breakTags.add("BR"); 114 breakTags.add("CENTER"); 115 breakTags.add("DD"); 116 breakTags.add("DIR"); 117 breakTags.add("DIV"); 118 breakTags.add("DL"); 119 breakTags.add("DT"); 120 breakTags.add("FORM"); 121 breakTags.add("H1"); 122 breakTags.add("H2"); 123 breakTags.add("H3"); 124 breakTags.add("H4"); 125 breakTags.add("H5"); 126 breakTags.add("H6"); 127 breakTags.add("HEAD"); 128 breakTags.add("HR"); 129 breakTags.add("HTML"); 130 breakTags.add("ISINDEX"); 131 breakTags.add("LI"); 132 breakTags.add("MENU"); 133 breakTags.add("NOFRAMES"); 134 breakTags.add("OL"); 135 breakTags.add("P"); 136 breakTags.add("PRE"); 137 breakTags.add("TD"); 138 breakTags.add("TH"); 139 breakTags.add("TITLE"); 140 breakTags.add("UL"); 141 } 142 143 148 public Tag(TagData tagData) 149 { 150 super(tagData.getTagBegin(), tagData.getTagEnd()); 151 this.startLine = tagData.getStartLine(); 152 this.tagContents = new StringBuffer (); 153 this.tagContents.append(tagData.getTagContents()); 154 this.tagLine = tagData.getTagLine(); 155 this.tagLines = new String [] { tagData.getTagLine()}; 156 this.emptyXmlTag = tagData.isEmptyXmlTag(); 157 } 158 159 public void append(char ch) 160 { 161 tagContents.append(ch); 162 } 163 164 public void append(String ch) 165 { 166 tagContents.append(ch); 167 } 168 169 175 public static Tag find(NodeReader reader, String input, int position) 176 { 177 return tagParser.find(reader, input, position); 178 } 179 180 189 private Hashtable parseAttributes() 190 { 191 return paramParser.parseAttributes(this); 192 } 193 194 199 public String getAttribute(String name) 200 { 201 return (String ) getAttributes().get(name.toUpperCase()); 202 } 203 204 209 public void setAttribute(String key, String value) 210 { 211 attributes.put(key, value); 212 } 213 214 220 public String getParameter(String name) 221 { 222 return (String ) getAttributes().get(name.toUpperCase()); 223 } 224 225 229 public Hashtable getAttributes() 230 { 231 if (attributes == null) 232 { 233 attributes = parseAttributes(); 234 } 235 return attributes; 236 } 237 238 public String getTagName() 239 { 240 return (String ) getAttributes().get(TAGNAME); 241 } 242 243 247 public String getTagLine() 248 { 249 return tagLine; 250 } 251 252 256 public String [] getTagLines() 257 { 258 return tagLines; 259 } 260 261 264 public String getText() 265 { 266 return tagContents.toString(); 267 } 268 269 272 public TagScanner getThisScanner() 273 { 274 return thisScanner; 275 } 276 277 283 public static String extractWord(String s) 284 { 285 int length; 286 boolean parse; 287 char ch; 288 StringBuffer ret; 289 290 length = s.length(); 291 ret = new StringBuffer (length); 292 parse = true; 293 for (int i = 0; i < length && parse; i++) 294 { 295 ch = s.charAt(i); 296 if (Character.isWhitespace(ch) || ch == '=') 297 parse = false; 298 else 299 ret.append(Character.toUpperCase(ch)); 300 } 301 302 return (ret.toString()); 303 } 304 305 310 public Node scan(Map scanners, String url, NodeReader reader) 311 throws ParserException 312 { 313 if (tagContents.length() == 0) 314 return this; 315 try 316 { 317 boolean found = false; 318 Node retVal = null; 319 String firstWord = extractWord(tagContents.toString()); 321 TagScanner scanner = (TagScanner) scanners.get(firstWord); 323 324 if (scanner != null 326 && scanner.evaluate( 327 tagContents.toString(), 328 reader.getPreviousOpenScanner())) 329 { 330 found = true; 331 TagScanner save; 332 save = reader.getPreviousOpenScanner(); 333 reader.setPreviousOpenScanner(scanner); 334 retVal = scanner.createScannedNode(this, url, reader, tagLine); 335 reader.setPreviousOpenScanner(save); 336 } 337 338 if (!found) 339 return this; 340 else 341 { 342 return retVal; 343 } 344 } 345 catch (Exception e) 346 { 347 String errorMsg; 348 if (tagContents != null) 349 errorMsg = tagContents.toString(); 350 else 351 errorMsg = "null"; 352 throw new ParserException( 353 "Tag.scan() : Error while scanning tag, tag contents = " 354 + errorMsg 355 + ", tagLine = " 356 + tagLine, 357 e); 358 } 359 } 360 361 365 public void setAttributes(Hashtable attributes) 366 { 367 this.attributes = attributes; 368 } 369 370 374 public void setTagBegin(int tagBegin) 375 { 376 this.nodeBegin = tagBegin; 377 } 378 379 383 public int getTagBegin() 384 { 385 return (nodeBegin); 386 } 387 388 392 public void setTagEnd(int tagEnd) 393 { 394 this.nodeEnd = tagEnd; 395 } 396 397 401 public int getTagEnd() 402 { 403 return (nodeEnd); 404 } 405 406 410 public int getTagStartLine() 411 { 412 return startLine; 413 } 414 415 419 public int getTagEndLine() 420 { 421 return startLine + tagLines.length - 1; 422 } 423 424 public void setTagLine(java.lang.String newTagLine) 425 { 426 tagLine = newTagLine; 427 428 String [] newTagLines = new String [tagLines.length + 1]; 432 for (int i = 0; i < tagLines.length; i++) 433 newTagLines[i] = tagLines[i]; 434 newTagLines[tagLines.length] = newTagLine; 435 tagLines = newTagLines; 436 } 437 438 public void setText(String text) 439 { 440 tagContents = new StringBuffer (text); 441 } 442 443 public void setThisScanner(TagScanner scanner) 444 { 445 thisScanner = scanner; 446 } 447 448 public String toPlainTextString() 449 { 450 return EMPTY_STRING; 451 } 452 453 459 public String toHtml() 460 { 461 StringBuffer sb = new StringBuffer (); 462 sb.append("<"); 463 sb.append(getTagName()); 464 if (containsMoreThanOneKey()) 465 sb.append(" "); 466 String key, value; 467 String empty = null; 468 int i = 0; 469 for (Enumeration e = attributes.keys(); e.hasMoreElements();) 470 { 471 key = (String ) e.nextElement(); 472 i++; 473 if (!key.equals(TAGNAME)) 474 { 475 if (key.equals(EMPTYTAG)) 476 { 477 empty = "/"; 478 } 479 else 480 { 481 value = getAttribute(key); 482 sb.append(key + "=\"" + value + "\""); 483 if (i < attributes.size()) 484 sb.append(" "); 485 } 486 } 487 } 488 if (empty != null) 489 sb.append(empty); 490 if (isEmptyXmlTag()) 491 sb.append("/"); 492 sb.append(">"); 493 return sb.toString(); 494 } 495 496 private boolean containsMoreThanOneKey() 497 { 498 return attributes.keySet().size() > 1; 499 } 500 501 504 public String toString() 505 { 506 return "Begin Tag : " 507 + tagContents 508 + "; begins at : " 509 + elementBegin() 510 + "; ends at : " 511 + elementEnd(); 512 } 513 514 518 public static void setTagParser(TagParser tagParser) 519 { 520 Tag.tagParser = tagParser; 521 } 522 523 528 public boolean breaksFlow() 529 { 530 return (breakTags.contains(getText().toUpperCase())); 531 } 532 533 540 public void collectInto(NodeList collectionList, String filter) 541 { 542 if (thisScanner != null && thisScanner.getFilter().equals(filter)) 543 collectionList.add(this); 544 } 545 546 551 public Hashtable getParsed() 552 { 553 return attributes; 554 } 555 556 565 public Hashtable redoParseAttributes() 566 { 567 return parseAttributes(); 568 } 569 570 public void accept(NodeVisitor visitor) 571 { 572 visitor.visitTag(this); 573 } 574 575 public String getType() 576 { 577 return TYPE; 578 } 579 580 585 public boolean isEmptyXmlTag() 586 { 587 return emptyXmlTag; 588 } 589 590 public void setEmptyXmlTag(boolean emptyXmlTag) 591 { 592 this.emptyXmlTag = emptyXmlTag; 593 } 594 595 } 596 | Popular Tags |