1 26 package org.htmlparser.tests.lexerTests; 27 28 import java.io.IOException ; 29 import java.net.URL ; 30 import java.util.Vector ; 31 import javax.swing.text.BadLocationException ; 32 import javax.swing.text.MutableAttributeSet ; 33 import javax.swing.text.html.HTML ; 34 import javax.swing.text.html.HTMLEditorKit ; 35 import javax.swing.text.html.HTMLEditorKit.Parser; 36 import javax.swing.text.html.HTMLEditorKit.ParserCallback; 37 38 import org.htmlparser.Attribute; 39 import org.htmlparser.Node; 40 import org.htmlparser.Tag; 41 import org.htmlparser.nodes.AbstractNode; 42 import org.htmlparser.lexer.Cursor; 43 import org.htmlparser.lexer.Lexer; 44 import org.htmlparser.util.ParserException; 45 import org.htmlparser.util.Translate; 46 47 60 public class KitTest extends ParserCallback 61 { 62 Vector mNodes; 63 int mIndex; 64 65 69 public KitTest (Vector nodes) 70 { 71 mNodes = nodes; 72 mIndex = 0; 73 } 74 75 80 String snowhite (String s) 81 { 82 int length; 83 char ch; 84 StringBuffer ret; 85 86 length = s.length (); 87 ret = new StringBuffer (length); 88 for (int i = 0; i < length; i++) 89 { 90 ch = s.charAt (i); 91 if (!Character.isWhitespace (ch) && !(160 == ch)) 92 ret.append (ch); 93 } 94 95 return (ret.toString ()); 96 } 97 98 104 boolean match (String s1, String s2) 105 { 106 s1 = snowhite (Translate.decode (s1)); 107 s2 = snowhite (Translate.decode (s2)); 108 return (s1.equalsIgnoreCase (s2)); 109 } 110 111 119 public void handleText (char[] data, int pos) 120 { 121 StringBuffer sb; 122 String theirs; 123 Node node; 124 int match; 125 String ours; 126 127 sb = new StringBuffer (data.length); 128 for (int i = 0; i < data.length; i++) 129 { 130 if (160 == data[i]) 131 sb.append (" "); 132 else 133 sb.append (data[i]); 134 } 135 theirs = sb.toString (); 136 match = -1; 137 for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) 138 { 139 node = (Node)mNodes.elementAt (i); 140 ours = node.getText (); 141 if (match (theirs, ours)) 142 { 143 match = i; 144 break; 145 } 146 } 147 if (-1 == match) 148 { 149 node = (Node)mNodes.elementAt (mIndex); 150 ours = node.getText (); 151 System.out.println ("theirs: " + theirs); 152 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 153 System.out.println ("ours " + cursor + ": " + ours); 154 } 155 else 156 { 157 boolean skipped = false; 158 for (int i = mIndex; i < match; i++) 159 { 160 ours = ((Node)mNodes.elementAt (i)).toHtml (); 161 if (0 != ours.trim ().length ()) 162 { 163 if (!skipped) 164 System.out.println ("skipping:"); 165 System.out.println (ours); 166 skipped = true; 167 } 168 } 169 if (skipped) 170 { 171 System.out.println ("to match:"); 172 node = (Node)mNodes.elementAt (match); 173 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 174 System.out.println ("@" + cursor + ": " + node.toHtml ()); 175 } 176 mIndex = match + 1; 178 } 179 } 180 181 189 public void handleComment (char[] data, int pos) 190 { 191 StringBuffer sb; 192 String theirs; 193 Node node; 194 int match; 195 String ours; 196 197 sb = new StringBuffer (data.length); 198 sb.append (data); 199 theirs = sb.toString (); 200 match = -1; 201 for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) 202 { 203 node = (Node)mNodes.elementAt (i); 204 ours = node.getText (); 205 if (match (theirs, ours)) 206 { 207 match = i; 208 break; 209 } 210 } 211 if (-1 == match) 212 { 213 node = (Node)mNodes.elementAt (mIndex); 214 ours = node.getText (); 215 System.out.println ("theirs: " + theirs); 216 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 217 System.out.println ("ours " + cursor + ": " + ours); 218 } 219 else 220 { 221 boolean skipped = false; 222 for (int i = mIndex; i < match; i++) 223 { 224 ours = ((Node)mNodes.elementAt (i)).toHtml (); 225 if (0 != ours.trim ().length ()) 226 { 227 if (!skipped) 228 System.out.println ("skipping:"); 229 System.out.println (ours); 230 skipped = true; 231 } 232 } 233 if (skipped) 234 { 235 System.out.println ("to match:"); 236 node = (Node)mNodes.elementAt (match); 237 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 238 System.out.println ("@" + cursor + ": " + node.toHtml ()); 239 } 240 mIndex = match + 1; 242 } 243 } 244 245 254 public void handleStartTag (HTML.Tag t, MutableAttributeSet a, int pos) 255 { 256 String theirs; 257 Node node; 258 int match; 259 String ours; 260 261 theirs = t.toString (); 262 match = -1; 263 for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) 264 { 265 node = (Node)mNodes.elementAt (i); 266 if (node instanceof Tag) 267 { 268 ours = ((Attribute)(((Tag)node).getAttributesEx ().elementAt (0))).getName (); 269 if (match (theirs, ours)) 270 { 271 match = i; 272 break; 273 } 274 } 275 } 276 if (-1 == match) 277 { 278 node = (Node)mNodes.elementAt (mIndex); 279 ours = node.getText (); 280 System.out.println ("theirs: " + theirs); 281 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 282 System.out.println ("ours " + cursor + ": " + ours); 283 } 284 else 285 { 286 boolean skipped = false; 287 for (int i = mIndex; i < match; i++) 288 { 289 ours = ((Node)mNodes.elementAt (i)).toHtml (); 290 if (0 != ours.trim ().length ()) 291 { 292 if (!skipped) 293 System.out.println ("skipping:"); 294 System.out.println (ours); 295 skipped = true; 296 } 297 } 298 if (skipped) 299 { 300 System.out.println ("to match:"); 301 node = (Node)mNodes.elementAt (match); 302 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 303 System.out.println ("@" + cursor + ": " + node.toHtml ()); 304 } 305 mIndex = match + 1; 307 } 308 } 309 310 318 public void handleEndTag (HTML.Tag t, int pos) 319 { 320 String theirs; 321 Node node; 322 int match; 323 String ours; 324 325 theirs = t.toString (); 326 match = -1; 327 for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) 328 { 329 node = (Node)mNodes.elementAt (i); 330 if (node instanceof Tag) 331 { 332 ours = ((Attribute)(((Tag)node).getAttributesEx ().elementAt (0))).getName ().substring (1); 333 if (match (theirs, ours)) 334 { 335 match = i; 336 break; 337 } 338 } 339 } 340 if (-1 == match) 341 { 342 node = (Node)mNodes.elementAt (mIndex); 343 ours = node.getText (); 344 System.out.println ("theirs: " + theirs); 345 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 346 System.out.println ("ours " + cursor + ": " + ours); 347 } 348 else 349 { 350 boolean skipped = false; 351 for (int i = mIndex; i < match; i++) 352 { 353 ours = ((Node)mNodes.elementAt (i)).toHtml (); 354 if (0 != ours.trim ().length ()) 355 { 356 if (!skipped) 357 System.out.println ("skipping:"); 358 System.out.println (ours); 359 skipped = true; 360 } 361 } 362 if (skipped) 363 { 364 System.out.println ("to match:"); 365 node = (Node)mNodes.elementAt (match); 366 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 367 System.out.println ("@" + cursor + ": " + node.toHtml ()); 368 } 369 mIndex = match + 1; 371 } 372 } 373 374 383 public void handleSimpleTag (HTML.Tag t, MutableAttributeSet a, int pos) 384 { 385 String theirs; 386 Node node; 387 int match; 388 String ours; 389 390 theirs = t.toString (); 391 match = -1; 392 for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) 393 { 394 node = (Node)mNodes.elementAt (i); 395 if (node instanceof Tag) 396 { 397 ours = ((Attribute)(((Tag)node).getAttributesEx ().elementAt (0))).getName (); 398 if (match (theirs, ours)) 399 { 400 match = i; 401 break; 402 } 403 if (match (theirs, ours)) 404 { 405 match = i; 406 break; 407 } 408 } 409 } 410 if (-1 == match) 411 { 412 node = (Node)mNodes.elementAt (mIndex); 413 ours = node.getText (); 414 System.out.println ("theirs: " + theirs); 415 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 416 System.out.println ("ours " + cursor + ": " + ours); 417 } 418 else 419 { 420 boolean skipped = false; 421 for (int i = mIndex; i < match; i++) 422 { 423 ours = ((Node)mNodes.elementAt (i)).toHtml (); 424 if (0 != ours.trim ().length ()) 425 { 426 if (!skipped) 427 System.out.println ("skipping:"); 428 System.out.println (ours); 429 skipped = true; 430 } 431 } 432 if (skipped) 433 { 434 System.out.println ("to match:"); 435 node = (Node)mNodes.elementAt (match); 436 Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.elementBegin ()); 437 System.out.println ("@" + cursor + ": " + node.toHtml ()); 438 } 439 mIndex = match + 1; 441 } 442 } 443 444 445 453 public void handleError (String errorMsg, int pos) 454 { 455 System.out.println ("******* error @" + pos + " ******** " + errorMsg); 456 } 457 458 461 public void flush () throws BadLocationException 462 { 463 } 464 465 473 public void handleEndOfLineString (String eol) 474 { 475 } 476 477 514 538 541 class MyKit extends HTMLEditorKit 542 { 543 public MyKit () 544 { 545 } 546 547 public HTMLEditorKit.Parser getParser () 548 { 549 return (super.getParser ()); 550 } 551 } 552 553 556 public MyKit getKit () 557 { 558 return (new MyKit ()); 559 } 560 561 566 public static void main (String [] args) throws ParserException, IOException 567 { 568 String link; 569 Lexer lexer; 570 Node node; 571 Vector nodes; 572 KitTest test; 573 MyKit kit; 574 Parser parser; 575 576 577 if (0 == args.length) 578 link = "http://sourceforge.net/projects/htmlparser"; 579 else 580 link = args[0]; 581 URL url = new URL (link); 583 lexer = new Lexer (url.openConnection ()); 584 nodes = new Vector (); 585 while (null != (node = lexer.nextNode ())) 586 nodes.addElement (node); 587 588 lexer.getPage ().getSource ().reset (); 590 test = new KitTest (nodes); 591 kit = test.getKit (); 592 parser = kit.getParser (); 593 parser.parse (lexer.getPage ().getSource (), test, true); 594 } 595 } 596 597 | Popular Tags |