1 16 17 131 package org.apache.jetspeed.util; 132 133 import java.io.Reader ; 134 import java.io.StringWriter ; 135 import java.net.MalformedURLException ; 136 import java.net.URL ; 137 import java.util.Enumeration ; 138 import javax.swing.text.html.HTML ; 139 import javax.swing.text.html.HTMLEditorKit ; 140 import javax.swing.text.MutableAttributeSet ; 141 142 import org.apache.jetspeed.services.logging.JetspeedLogFactoryService; 144 import org.apache.jetspeed.services.logging.JetspeedLogger; 145 146 153 154 public class HTMLRewriter 155 { 156 159 private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(HTMLRewriter.class.getName()); 160 161 private HTMLRewriter.Callback cb = new HTMLRewriter.Callback(); 162 163 173 public HTMLRewriter(boolean removeScript, 174 boolean removeStyle, 175 boolean removeNoScript, 176 boolean removeMeta, 177 boolean removeApplet, 178 boolean removeObject, 179 boolean removeHead, 180 boolean removeOnSomething) { 181 init ( removeScript, 182 removeStyle, 183 removeNoScript, 184 removeMeta, 185 removeApplet, 186 removeObject, 187 removeHead, 188 removeOnSomething, 189 false); 190 } 191 192 203 public HTMLRewriter(boolean removeScript, 204 boolean removeStyle, 205 boolean removeNoScript, 206 boolean removeMeta, 207 boolean removeApplet, 208 boolean removeObject, 209 boolean removeHead, 210 boolean removeOnSomething, 211 boolean openInNewWindow ) { 212 init ( removeScript, 213 removeStyle, 214 removeNoScript, 215 removeMeta, 216 removeApplet, 217 removeObject, 218 removeHead, 219 removeOnSomething, 220 openInNewWindow ); 221 } 222 223 236 private void init (boolean removeScript, 237 boolean removeStyle, 238 boolean removeNoScript, 239 boolean removeMeta, 240 boolean removeApplet, 241 boolean removeObject, 242 boolean removeHead, 243 boolean removeOnSomething, 244 boolean openInNewWindow ) 245 { 246 cb.removeScript = removeScript; 247 cb.removeStyle = removeStyle; 248 cb.removeNoScript = removeNoScript; 249 cb.removeMeta = removeMeta; 250 cb.removeApplet = removeApplet; 251 cb.removeObject = removeObject; 252 cb.removeHead = removeHead; 253 cb.removeOnSomething = removeOnSomething; 254 cb.openInNewWindow = openInNewWindow; 255 } 256 257 268 public synchronized String convertURLs(Reader HTMLrdr, String BaseUrl) throws MalformedURLException 269 { 270 HTMLEditorKit.Parser parse = new HTMLRewriter.ParserGetter().getParser(); 271 String res =""; 272 try { 273 if (cb.result != null) { 274 cb.result = null; 275 cb.result = new StringWriter (); 276 } 277 cb.baseUrl = new URL (BaseUrl); 278 parse.parse(HTMLrdr,cb,true); 279 res = cb.getResult(); 280 } catch (Exception e) 281 { 282 logger.error( "Unable to convertURLS", e ); 283 throw new MalformedURLException (e.toString()); 284 } 285 return res; 286 } 287 288 289 292 class ParserGetter extends HTMLEditorKit { 293 296 public HTMLEditorKit.Parser getParser(){ 297 return super.getParser(); 298 } 299 } 300 301 302 class Callback extends HTMLEditorKit.ParserCallback { 303 304 private URL baseUrl; 306 307 private boolean inForm = false; 310 311 312 private int ignoreLevel = 0; 316 317 private boolean removeScript = true; 318 private boolean removeStyle = true; 319 private boolean removeNoScript = true; 320 private boolean removeMeta = true; 321 private boolean removeApplet = true; 322 private boolean removeObject = true; 323 private boolean removeHead = true; 324 private boolean openInNewWindow = false; 325 326 private boolean removeOnSomething = true; 328 329 private boolean inScript = false; 330 private boolean inStyle = false; 331 332 private StringWriter result = new StringWriter (); 333 334 private Callback () { 335 } 336 337 338 private Callback addToResult(Object txt) 339 { 340 if (ignoreLevel > 0) return this; 343 344 try { 345 result.write(txt.toString()); 346 } catch (Exception e) { } 347 return this; 348 } 349 350 private Callback addToResult(char[] txt) 351 { 352 if (ignoreLevel > 0) return this; 353 354 try { 355 result.write(txt); 356 } catch (Exception e) { } 357 return this; 358 } 359 360 363 public String getResult() { 364 try { 365 result.flush(); 366 } catch (Exception e) { } 367 368 String res = " " + result.toString(); 370 371 return res; 372 } 373 374 375 public void flush() throws javax.swing.text.BadLocationException { 376 } 378 379 383 public void handleComment(char[] values,int param) { 384 if ( !( inStyle || inScript)) 385 return; 386 387 try { 388 result.write("<!--"); 389 result.write(values); 390 result.write("-->"); 391 } catch (Exception e) { } 392 } 394 395 public void handleEndOfLineString(java.lang.String str) { 396 addToResult("\n"); 397 } 398 399 public void handleError(java.lang.String str,int param) { 400 } 402 403 public void handleSimpleTag(HTML.Tag tag,MutableAttributeSet attrs,int param) { 404 if (removeMeta && (tag == HTML.Tag.META)) { 405 return; 406 } 407 appendTagToResult(tag,attrs); 408 } 409 410 public void handleStartTag(HTML.Tag tag, MutableAttributeSet attrs, int position) { 411 appendTagToResult(tag,attrs); 412 } 413 414 public void handleEndTag(HTML.Tag tag, int position) { 415 if ((tag ==HTML.Tag.FORM) && (inForm)) { 416 addToResult("</").addToResult(tag).addToResult(">"); 418 inForm = false; 419 } else if (tag == HTML.Tag.FORM) { 420 } else { 423 addToResult("</").addToResult(tag).addToResult(">"); 424 } 425 426 427 if ( (removeScript == false) && (tag == HTML.Tag.SCRIPT)) { 428 inScript = false; 429 } else if ( (removeStyle == false) && (tag == HTML.Tag.STYLE)) { 430 inStyle = false; 431 } 432 433 if ( removeScript && (tag == HTML.Tag.SCRIPT)) { 434 ignoreLevel --; 435 } else if ( removeStyle && (tag == HTML.Tag.STYLE)) { 436 ignoreLevel --; 437 } else if ( removeHead && (tag == HTML.Tag.HEAD)) { 438 ignoreLevel --; 439 } else if ( removeApplet && (tag == HTML.Tag.APPLET)) { 440 ignoreLevel --; 441 } else if ( removeObject && (tag == HTML.Tag.OBJECT)) { 442 ignoreLevel --; 443 } else if ( removeNoScript && (tag.toString().equalsIgnoreCase("NOSCRIPT"))) { 444 ignoreLevel --; 445 } 446 } 447 448 private void appendTagToResult(HTML.Tag tag, MutableAttributeSet attrs) { 449 450 if (tag.toString().equalsIgnoreCase("__ENDOFLINETAG__")) { 451 return; 454 } 455 456 if (tag.toString().equalsIgnoreCase("__IMPLIED__")) { 457 return; 460 } 461 462 convertURLS(tag,attrs); 463 Enumeration e = attrs.getAttributeNames(); 464 if (tag == HTML.Tag.BASE) 465 return; 466 467 addToResult("<").addToResult(tag); 468 while (e.hasMoreElements()) { 469 Object attr = e.nextElement(); 470 String attrName = attr.toString(); 471 String value = attrs.getAttribute(attr).toString(); 472 473 if (!(removeOnSomething 475 && attrName.toLowerCase().startsWith("on") 476 && (attrName.length() > 2))) { 477 addToResult(" ").addToResult(attr).addToResult("=\"") 479 .addToResult(value).addToResult("\""); 480 } 481 } 482 addToResult(">"); 483 } 484 485 491 492 private void convertURLS( HTML.Tag tag, MutableAttributeSet attrs ) { 493 494 496 if (tag == HTML.Tag.A) { 497 if (attrs.getAttribute(HTML.Attribute.HREF) != null) { 498 addConvertedAttribute( HTML.Attribute.HREF, 500 attrs ); 501 } 502 if ((attrs.getAttribute(HTML.Attribute.TARGET) == null) && cb.openInNewWindow) { 503 attrs.addAttribute(HTML.Attribute.TARGET, "_BLANK"); 504 } 505 } else if (tag == HTML.Tag.AREA) { 506 if (attrs.getAttribute(HTML.Attribute.HREF) != null) { 507 addConvertedAttribute( HTML.Attribute.HREF, 509 attrs ); 510 } 511 if ((attrs.getAttribute(HTML.Attribute.TARGET) == null) && cb.openInNewWindow) { 512 attrs.addAttribute(HTML.Attribute.TARGET, "_BLANK"); 513 } 514 } else if (((tag == HTML.Tag.IMG) || (tag == HTML.Tag.INPUT) || (tag == HTML.Tag.SCRIPT)) 515 && (attrs.getAttribute(HTML.Attribute.SRC) != null)) { 516 addConvertedAttribute( HTML.Attribute.SRC, 518 attrs ); 519 } else if (tag == HTML.Tag.LINK) { 520 if (attrs.getAttribute(HTML.Attribute.HREF) != null) { 521 addConvertedAttribute( HTML.Attribute.HREF, 523 attrs ); 524 } 525 } else if ( tag == HTML.Tag.APPLET ) { 526 if (attrs.getAttribute(HTML.Attribute.CODEBASE) == null) { 528 int endOfPath = baseUrl.toString().lastIndexOf("/"); 529 attrs.addAttribute(HTML.Attribute.CODEBASE, 530 baseUrl.toString().substring(0,endOfPath +1)); 531 } else { 532 addConvertedAttribute( HTML.Attribute.CODEBASE, attrs ); 533 } 534 } else if (tag == HTML.Tag.OBJECT) { 535 if (attrs.getAttribute(HTML.Attribute.CODEBASE) == null) { 537 int endOfPath = baseUrl.toString().lastIndexOf("/"); 538 attrs.addAttribute(HTML.Attribute.CODEBASE, 539 baseUrl.toString().substring(0,endOfPath +1)); 540 } else { 541 addConvertedAttribute( HTML.Attribute.CODEBASE, attrs ); 542 } 543 } else if (tag == HTML.Tag.BODY) { 544 if (attrs.getAttribute(HTML.Attribute.BACKGROUND) != null) { 545 attrs.removeAttribute( HTML.Attribute.BACKGROUND); 547 } 548 } else if (tag == HTML.Tag.BASE) { 549 if (attrs.getAttribute(HTML.Attribute.HREF) != null) { 550 try { 551 baseUrl = new URL (attrs.getAttribute(HTML.Attribute.HREF).toString()); 552 } catch (Throwable t) { 553 logger.error( "HTMLRewriter: Setting BASE=" 554 + attrs.getAttribute(HTML.Attribute.HREF).toString() 555 + t.getMessage()); 556 } 557 attrs.removeAttribute(HTML.Attribute.HREF); 558 } 559 } else if (tag == HTML.Tag.FORM) { 560 inForm = true; if (attrs.getAttribute(HTML.Attribute.ACTION) == null) { 563 attrs.addAttribute(HTML.Attribute.ACTION, 565 baseUrl.toString()); 566 } else { 567 addConvertedAttribute( HTML.Attribute.ACTION, 568 attrs ); 569 } 570 } else if (tag == HTML.Tag.TD) { 571 if (! (attrs.getAttribute(HTML.Attribute.BACKGROUND) == null)) { 573 addConvertedAttribute( HTML.Attribute.BACKGROUND, 574 attrs ); 575 } 576 } 577 578 579 583 586 if ( (removeScript == false) && (tag == HTML.Tag.SCRIPT)) { 587 inScript = true; 588 } else if ( (removeStyle == false) && (tag == HTML.Tag.STYLE)) { 589 inStyle = true; 590 } 591 592 if ( removeScript && (tag == HTML.Tag.SCRIPT)) { 593 ignoreLevel ++; 594 } else if ( removeStyle && (tag == HTML.Tag.STYLE)) { 595 ignoreLevel ++; 596 } else if ( removeHead && (tag == HTML.Tag.HEAD)) { 597 ignoreLevel ++; 598 } else if ( removeApplet && (tag == HTML.Tag.APPLET)) { 599 ignoreLevel ++; 600 } else if ( removeObject && (tag == HTML.Tag.OBJECT)) { 601 ignoreLevel ++; 602 } else if (removeNoScript && (tag.toString().equalsIgnoreCase("NOSCRIPT"))) { 603 ignoreLevel ++; 604 } 605 } 606 607 612 private void addConvertedAttribute( HTML.Attribute attr, 613 MutableAttributeSet attrs ) { 614 if( attrs.getAttribute( attr ) != null ) { 615 String attrSource = attrs.getAttribute( attr ).toString(); 616 attrs.addAttribute( attr, 617 generateNewUrl( attrSource ) ); 618 } 619 } 620 621 622 private String generateNewUrl(String oldURL) { 623 try { 624 URL x = new URL (baseUrl,oldURL); 625 return x.toString(); 626 } catch (Throwable t) { 627 if (oldURL.toLowerCase().startsWith("javascript:")) { 628 return oldURL; 629 } 630 logger.error( "HTMLRewriter: Setting BASE=" 631 + baseUrl 632 + " Old = " 633 + oldURL 634 + t.getMessage()); 635 return oldURL; } 637 } 638 639 public void handleText(char[] values,int param) { 640 addToResult(values); 641 } 642 } 643 } 644 | Popular Tags |