1 16 package org.apache.cocoon.bean; 17 18 import org.apache.cocoon.Constants; 19 import org.apache.cocoon.ProcessingException; 20 import org.apache.cocoon.bean.helpers.Crawler; 21 import org.apache.cocoon.bean.helpers.DelayedOutputStream; 22 import org.apache.cocoon.components.notification.SimpleNotifyingBean; 23 import org.apache.cocoon.components.notification.Notifier; 24 import org.apache.cocoon.components.notification.DefaultNotifyingBuilder; 25 import org.apache.cocoon.components.notification.Notifying; 26 import org.apache.cocoon.matching.helpers.WildcardHelper; 27 import org.apache.commons.lang.SystemUtils; 28 29 import org.apache.excalibur.source.ModifiableSource; 30 import org.apache.excalibur.source.SourceResolver; 31 import org.apache.excalibur.source.Source; 32 import org.apache.excalibur.source.SourceNotFoundException; 33 import org.apache.excalibur.source.SourceUtil; 34 35 import java.io.BufferedReader ; 36 import java.io.InputStreamReader ; 37 import java.io.IOException ; 38 import java.io.OutputStream ; 39 import java.io.OutputStreamWriter ; 40 import java.io.PrintStream ; 41 import java.io.PrintWriter ; 42 import java.security.MessageDigest ; 43 import java.security.NoSuchAlgorithmException ; 44 45 import java.util.ArrayList ; 46 import java.util.HashMap ; 47 import java.util.Iterator ; 48 import java.util.List ; 49 import java.util.Map ; 50 51 65 public class CocoonBean extends CocoonWrapper { 66 67 private boolean followLinks = true; 69 private boolean precompileOnly = false; 70 private boolean confirmExtension = true; 71 private String defaultFilename = Constants.INDEX_URI; 72 private boolean brokenLinkGenerate = false; 73 private String brokenLinkExtension = ""; 74 private List excludePatterns = new ArrayList (); 75 private List includePatterns = new ArrayList (); 76 private List includeLinkExtensions = null; 77 78 private boolean initialized; 80 private List listeners = new ArrayList (); 81 private boolean verbose; 82 SourceResolver sourceResolver; 83 84 private Crawler crawler; 85 private String checksumsURI = null; 86 private Map checksums; 87 88 public CocoonBean() { 89 this.crawler = new Crawler(); 90 } 91 92 96 public void initialize() throws Exception { 97 if (this.initialized == false) { 98 super.initialize(); 99 100 this.sourceResolver = 101 (SourceResolver) getComponentManager().lookup( 102 SourceResolver.ROLE); 103 104 initialized = true; 105 } 106 } 107 108 protected void finalize() throws Throwable { 109 dispose(); 110 super.finalize(); 111 } 112 113 117 public void setFollowLinks(boolean follow) { 118 followLinks = follow; 119 } 120 121 public void setConfirmExtensions(boolean confirmExtension) { 122 this.confirmExtension = confirmExtension; 123 } 124 125 public void setPrecompileOnly(boolean precompileOnly) { 126 this.precompileOnly = precompileOnly; 127 } 128 129 public boolean isPrecompileOnly() { 130 return precompileOnly; 131 } 132 133 public void setVerbose(boolean verbose) { 134 this.verbose = verbose; 135 } 136 137 public void setDefaultFilename(String filename) { 138 defaultFilename = filename; 139 } 140 141 public void setBrokenLinkGenerate(boolean brokenLinkGenerate) { 142 this.brokenLinkGenerate = brokenLinkGenerate; 143 } 144 145 public void setBrokenLinkExtension(String brokenLinkExtension) { 146 this.brokenLinkExtension = brokenLinkExtension; 147 } 148 149 public void setChecksumURI(String uri) { 150 this.checksumsURI = uri; 151 } 152 153 public boolean followLinks() { 154 return followLinks; 155 } 156 157 public boolean confirmExtensions() { 158 return confirmExtension; 159 } 160 169 public void addTarget( 170 String type, 171 String root, 172 String sourceURI, 173 String destURI) 174 throws IllegalArgumentException { 175 Target target = new Target(type, root, sourceURI, destURI); 176 target.setDefaultFilename(this.defaultFilename); 177 target.setFollowLinks(this.followLinks); 178 target.setConfirmExtension(this.confirmExtension); 179 target.setLogger(this.logger); 180 crawler.addTarget(target); 181 } 182 183 public void addTarget(String type, String sourceURI, String destURI) 184 throws IllegalArgumentException { 185 Target target = new Target(type, sourceURI, destURI); 186 target.setDefaultFilename(this.defaultFilename); 187 target.setFollowLinks(this.followLinks); 188 target.setConfirmExtension(this.confirmExtension); 189 target.setLogger(this.logger); 190 crawler.addTarget(target); 191 } 192 193 public void addTarget(String sourceURI, String destURI) 194 throws IllegalArgumentException { 195 Target target = new Target(sourceURI, destURI); 196 target.setDefaultFilename(this.defaultFilename); 197 target.setFollowLinks(this.followLinks); 198 target.setConfirmExtension(this.confirmExtension); 199 target.setLogger(this.logger); 200 crawler.addTarget(target); 201 } 202 203 public void addTargets(List uris, String destURI) 204 throws IllegalArgumentException { 205 Iterator i = uris.iterator(); 206 while (i.hasNext()) { 207 Target target = new Target((String ) i.next(), destURI); 208 target.setDefaultFilename(this.defaultFilename); 209 target.setFollowLinks(this.followLinks); 210 target.setConfirmExtension(this.confirmExtension); 211 target.setLogger(this.logger); 212 crawler.addTarget(target); 213 } 214 } 215 216 public void addTarget( 217 String type, 218 String root, 219 String sourceURI, 220 String destURI, 221 boolean followLinks, 222 boolean confirmExtension, 223 String logger) 224 throws IllegalArgumentException { 225 226 Target target; 227 if (root == null && type == null) { 228 target = new Target(sourceURI, destURI); 229 } else if (root == null) { 230 target = new Target(type, sourceURI, destURI); 231 } else { 232 target = new Target(type, root, sourceURI, destURI); 233 } 234 target.setDefaultFilename(this.defaultFilename); 235 target.setFollowLinks(followLinks); 236 target.setConfirmExtension(confirmExtension); 237 target.setLogger(logger); 238 crawler.addTarget(target); 239 } 240 241 public int getTargetCount() { 242 return crawler.getRemainingCount(); 243 } 244 245 public void addExcludePattern(String pattern) { 246 int preparedPattern[] = WildcardHelper.compilePattern(pattern); 247 excludePatterns.add(preparedPattern); 248 } 249 250 public void addIncludePattern(String pattern) { 251 int preparedPattern[] = WildcardHelper.compilePattern(pattern); 252 includePatterns.add(preparedPattern); 253 } 254 255 public void addIncludeLinkExtension(String extension) { 256 if (includeLinkExtensions == null) { 257 includeLinkExtensions = new ArrayList (); 258 } 259 includeLinkExtensions.add(extension); 260 } 261 262 public void addListener(BeanListener listener) { 263 this.listeners.add(listener); 264 } 265 266 public void pageGenerated(String sourceURI, 267 String destURI, 268 int pageSize, 269 int linksInPage, 270 int newLinksInPage, 271 int pagesRemaining, 272 int pagesComplete, 273 long timeTaken) { 274 Iterator i = listeners.iterator(); 275 while (i.hasNext()) { 276 BeanListener l = (BeanListener) i.next(); 277 l.pageGenerated(sourceURI, 278 destURI, 279 pageSize, 280 linksInPage, 281 newLinksInPage, 282 pagesRemaining, 283 pagesComplete, 284 timeTaken); 285 } 286 } 287 288 public void sendMessage(String msg) { 289 Iterator i = listeners.iterator(); 290 while (i.hasNext()) { 291 BeanListener l = (BeanListener) i.next(); 292 l.messageGenerated(msg); 293 } 294 } 295 296 public void sendWarning(String uri, String warning) { 297 Iterator i = listeners.iterator(); 298 while (i.hasNext()) { 299 BeanListener l = (BeanListener) i.next(); 300 l.warningGenerated(uri, warning); 301 } 302 } 303 304 public void sendBrokenLinkWarning(String uri, String warning) { 305 Iterator i = listeners.iterator(); 306 while (i.hasNext()) { 307 BeanListener l = (BeanListener) i.next(); 308 l.brokenLinkFound(uri, "", warning, null); 309 } 310 } 311 312 public void pageSkipped(String uri, String message) { 313 Iterator i = listeners.iterator(); 314 while (i.hasNext()) { 315 BeanListener l = (BeanListener) i.next(); 316 l.pageSkipped(uri, message); 317 } 318 } 319 320 public void dispose() { 321 if (this.initialized) { 322 if (this.sourceResolver != null) { 323 getComponentManager().release(this.sourceResolver); 324 this.sourceResolver = null; 325 } 326 super.dispose(); 327 } 328 } 329 330 334 public void process() throws Exception { 335 336 if (!this.initialized) { 337 this.initialize(); 338 } 339 340 if (crawler.getRemainingCount() == 0 && !precompileOnly) { 341 log.info("No targets for to be processed."); 342 return; 343 } 344 345 if (this.checksumsURI != null) { 346 readChecksumFile(); 347 } 348 349 if (crawler.getRemainingCount()>=0) { 350 Iterator iterator = crawler.iterator(); 351 while (iterator.hasNext()) { 352 Target target = (Target) iterator.next(); 353 if (!precompileOnly) { 354 processTarget(crawler, target); 355 } 356 } 357 } 358 359 if (this.checksumsURI != null) { 360 writeChecksumFile(); 361 } 362 363 if (log.isInfoEnabled()) { 364 log.info( 365 " Memory used: " 366 + (Runtime.getRuntime().totalMemory() 367 - Runtime.getRuntime().freeMemory())); 368 log.info( 369 " Processed, Translated & Left: " 370 + crawler.getProcessedCount() 371 + ", " 372 + crawler.getTranslatedCount() 373 + ", " 374 + crawler.getRemainingCount()); 375 } 376 } 377 378 410 private void processTarget(Crawler crawler, Target target) throws Exception { 411 412 int status = 0; 413 414 int linkCount = 0; 415 int newLinkCount = 0; 416 int pageSize = 0; 417 long startTimeMillis = System.currentTimeMillis(); 418 419 if (target.confirmExtensions()) { 420 if (!crawler.hasTranslatedLink(target)) { 421 final String mimeType = getType(target.getDeparameterizedSourceURI(), target.getParameters()); 422 target.setMimeType(mimeType); 423 crawler.addTranslatedLink(target); 424 } 425 } 426 427 432 final HashMap translatedLinks = new HashMap (); 434 if (target.followLinks() && target.confirmExtensions() && isCrawlablePage(target)) { 435 final Iterator i = 436 this.getLinks(target.getDeparameterizedSourceURI(), target.getParameters()).iterator(); 437 438 while (i.hasNext()) { 439 String linkURI = (String ) i.next(); 440 Target linkTarget = target.getDerivedTarget(linkURI); 441 442 if (linkTarget == null) { 443 pageSkipped(linkURI, "link does not share same root as parent"); 444 continue; 445 } 446 447 if (!isIncluded(linkTarget.getSourceURI())) { 448 pageSkipped(linkTarget.getSourceURI(), "matched include/exclude rules"); 449 continue; 450 } 451 452 if (!crawler.hasTranslatedLink(linkTarget)) { 453 try { 454 final String mimeType = 455 getType(linkTarget.getDeparameterizedSourceURI(), linkTarget.getParameters()); 456 linkTarget.setMimeType(mimeType); 457 crawler.addTranslatedLink(linkTarget); 458 log.info(" Link translated: " + linkTarget.getSourceURI()); 459 if (crawler.addTarget(linkTarget)) { 460 newLinkCount++; 461 } 462 } catch (ProcessingException pe) { 463 this.sendBrokenLinkWarning(linkTarget.getSourceURI(), pe.getMessage()); 464 if (this.brokenLinkGenerate) { 465 if (crawler.addTarget(linkTarget)) { 466 newLinkCount++; 467 } 468 } 469 } 470 } else { 471 String originalURI = linkTarget.getOriginalSourceURI(); 472 linkTarget = crawler.getTranslatedLink(linkTarget); 473 linkTarget.setOriginalURI(originalURI); 474 } 475 476 translatedLinks.put(linkTarget.getOriginalSourceURI(), linkTarget.getTranslatedURI(target.getPath())); 477 } 478 479 linkCount = translatedLinks.size(); 480 } 481 482 try { 483 DelayedOutputStream output = new DelayedOutputStream(); 485 try { 486 List gatheredLinks; 487 if (!target.confirmExtensions() && target.followLinks() && isCrawlablePage(target)) { 488 gatheredLinks = new ArrayList (); 489 } else { 490 gatheredLinks = null; 491 } 492 493 status = 494 getPage( 495 target.getDeparameterizedSourceURI(), 496 getLastModified(target), 497 target.getParameters(), 498 target.confirmExtensions() ? translatedLinks : null, 499 gatheredLinks, 500 output); 501 502 if (status >= 400) { 503 throw new ProcessingException( 504 "Resource not found: " + status); 505 } 506 507 if (gatheredLinks != null) { 508 for (Iterator it = gatheredLinks.iterator();it.hasNext();) { 509 String linkURI = (String ) it.next(); 510 Target linkTarget = target.getDerivedTarget(linkURI); 511 512 if (linkTarget == null) { 513 pageSkipped(linkURI, "link does not share same root as parent"); 514 continue; 515 } 516 517 if (!isIncluded(linkTarget.getSourceURI())) { 518 pageSkipped(linkTarget.getSourceURI(), "matched include/exclude rules"); 519 continue; 520 } 521 if (crawler.addTarget(linkTarget)) { 522 newLinkCount++; 523 } 524 } 525 linkCount = gatheredLinks.size(); 526 } 527 528 } catch (ProcessingException pe) { 529 output.close(); 530 output = null; 531 this.resourceUnavailable(target); 532 this.sendBrokenLinkWarning(target.getSourceURI(), 533 DefaultNotifyingBuilder.getRootCause(pe).getMessage()); 534 } finally { 535 if (output != null && status != -1) { 536 537 ModifiableSource source = getSource(target); 538 try { 539 pageSize = output.size(); 540 541 if (this.checksumsURI == null || !isSameContent(output, target)) { 542 OutputStream stream = source.getOutputStream(); 543 output.setFileOutputStream(stream); 544 output.flush(); 545 output.close(); 546 pageGenerated(target.getSourceURI(), 547 target.getAuthlessDestURI(), 548 pageSize, 549 linkCount, 550 newLinkCount, 551 crawler.getRemainingCount(), 552 crawler.getProcessedCount(), 553 System.currentTimeMillis()- startTimeMillis); 554 } else { 555 output.close(); 556 pageSkipped(target.getSourceURI(), "Page not changed"); 557 } 558 } catch (IOException ioex) { 559 log.warn(ioex.toString()); 560 } finally { 561 releaseSource(source); 562 } 563 } 564 } 565 } catch (Exception rnfe) { 566 log.warn("Could not process URI: " + target.getSourceURI()); 567 rnfe.printStackTrace(); 568 this.sendBrokenLinkWarning(target.getSourceURI(), "URI not found: "+rnfe.getMessage()); 569 } 570 } 571 572 578 private void resourceUnavailable(Target target) 579 throws IOException , ProcessingException { 580 if (brokenLinkGenerate) { 581 584 if (brokenLinkExtension != null) { 585 target.setExtraExtension(brokenLinkExtension); 586 } 587 SimpleNotifyingBean n = new SimpleNotifyingBean(this); 588 n.setType("resource-not-found"); 589 n.setTitle("Resource not Found"); 590 n.setSource("Cocoon commandline (Main.java)"); 591 n.setMessage("Page Not Available."); 592 n.setDescription("The requested resource couldn't be found."); 593 n.addExtraDescription(Notifying.EXTRA_REQUESTURI, target.getSourceURI()); 594 n.addExtraDescription("missing-file", target.getSourceURI()); 595 596 ModifiableSource source = getSource(target); 597 try { 598 OutputStream stream = source.getOutputStream(); 599 600 PrintStream out = new PrintStream (stream); 601 Notifier.notify(n, out, "text/html"); 602 out.flush(); 603 out.close(); 604 } finally { 605 releaseSource(source); 606 } 607 } 608 } 609 610 public ModifiableSource getSource(Target target) 611 throws IOException , ProcessingException { 612 final String finalDestinationURI = target.getDestinationURI(); 613 Source src = sourceResolver.resolveURI(finalDestinationURI); 614 if (!(src instanceof ModifiableSource)) { 615 sourceResolver.release(src); 616 throw new ProcessingException( 617 "Source is not Modifiable: " + finalDestinationURI); 618 } 619 return (ModifiableSource) src; 620 } 621 622 public long getLastModified(Target target) throws IOException , ProcessingException { 623 Source src = getSource(target); 624 long lastModified = src.getLastModified(); 625 this.releaseSource(src); 626 return lastModified; 627 } 628 629 public void releaseSource(Source source) { 630 sourceResolver.release(source); 631 } 632 private boolean isIncluded(String uri) { 633 boolean included; 634 Iterator i; 635 HashMap map = new HashMap (); 636 637 if (includePatterns.size() == 0) { 638 included = true; 639 } else { 640 included = false; 641 i = includePatterns.iterator(); 642 while (i.hasNext()){ 643 int pattern[] = (int[])i.next(); 644 if (WildcardHelper.match(map, uri, pattern)) { 645 included=true; 646 break; 647 } 648 } 649 } 650 if (excludePatterns.size() != 0) { 651 i = excludePatterns.iterator(); 652 while (i.hasNext()) { 653 int pattern[] = (int[])i.next(); 654 if (WildcardHelper.match(map, uri, pattern)) { 655 included=false; 656 break; 657 } 658 } 659 } 660 return included; 661 } 662 private boolean isCrawlablePage(Target target) { 663 if (includeLinkExtensions == null) { 664 return true; 665 } else { 666 return includeLinkExtensions.contains(target.getExtension()); 667 } 668 } 669 670 673 private void readChecksumFile() throws Exception { 674 checksums = new HashMap (); 675 676 try { 677 Source checksumSource = sourceResolver.resolveURI(checksumsURI); 678 BufferedReader reader = new BufferedReader (new InputStreamReader (checksumSource.getInputStream())); 679 String line; 680 int lineNo=0; 681 while ((line = reader.readLine())!=null) { 682 lineNo++; 683 if (line.trim().startsWith("#") || line.trim().length()==0 ) { 684 continue; 685 } 686 if (line.indexOf("\t")==-1) { 687 throw new ProcessingException("Missing tab at line "+lineNo+" of " + checksumsURI); 688 } 689 String filename = line.substring(0,line.indexOf("\t")); 690 String checksum = line.substring(line.indexOf("\t")+1); 691 checksums.put(filename, checksum); 692 } 693 reader.close(); 694 } catch (SourceNotFoundException e) { 695 } 697 } 698 699 private void writeChecksumFile() throws Exception { 700 Source checksumSource = sourceResolver.resolveURI(checksumsURI); 701 if (!(checksumSource instanceof ModifiableSource)) { 702 throw new ProcessingException("Checksum file is not Modifiable:" + checksumSource); 703 } 704 ModifiableSource source = (ModifiableSource) checksumSource; 705 PrintWriter writer = new PrintWriter (new OutputStreamWriter (source.getOutputStream())); 706 Iterator i = checksums.keySet().iterator(); 707 while (i.hasNext()){ 708 String key = (String ) i.next(); 709 String checksum = (String ) checksums.get(key); 710 writer.println(key + "\t" + checksum); 711 } 712 writer.close(); 713 } 714 715 private boolean isSameContent(DelayedOutputStream stream, Target target) { 716 try { 717 MessageDigest md5 = MessageDigest.getInstance("MD5"); 718 md5.update(stream.getContent()); 719 String streamDigest = SourceUtil.encodeBASE64(new String (md5.digest())); 720 String targetDigest = (String )checksums.get(target.getSourceURI()); 721 722 if (streamDigest.equals(targetDigest)) { 723 return true; 724 } else { 725 checksums.put(target.getSourceURI(), streamDigest); 726 return false; 727 } 728 } catch (NoSuchAlgorithmException e) { 729 return false; 731 } 732 } 733 736 public static String getProlog() { 737 String lSep = SystemUtils.LINE_SEPARATOR; 738 StringBuffer msg = new StringBuffer (); 739 msg.append("------------------------------------------------------------------------ ").append(lSep); 740 msg.append(Constants.NAME).append(" ").append(Constants.VERSION).append(lSep); 741 msg.append("Copyright (c) ").append(Constants.YEAR).append(" Apache Software Foundation. All rights reserved.").append(lSep); 742 msg.append("------------------------------------------------------------------------ ").append(lSep).append(lSep); 743 return msg.toString(); 744 } 745 } 746 | Popular Tags |