1 package org.enhydra.snapper.business; 2 3 5 6 import org.enhydra.snapper.Log; 7 import org.enhydra.snapper.SnapperAdmin; 8 import org.enhydra.snapper.SnapperManager; 9 import org.enhydra.snapper.api.*; 10 import org.enhydra.snapper.parsers.Parser; 11 import org.enhydra.snapper.spec.*; 12 13 import com.lutris.appserver.server.sql.DatabaseManager; 14 15 import java.io.*; 16 17 import java.util.ArrayList ; 18 import java.util.Date ; 19 import java.util.Iterator ; 20 import java.util.List ; 21 import java.util.TreeSet ; 22 import java.util.Vector ; 23 24 public class IndexerBase implements IndexerBaseSpec { 25 Parser parser; 26 27 File indexdir; 28 29 long age, size; 30 31 String [] types; 32 33 Indexer indexer; 34 35 FileChecker fc; 36 37 String storageType, sitename; 38 39 Vector notIndexed = new Vector (); 40 41 Vector filtered = new Vector (); 42 43 TreeSet included = new TreeSet (); 44 45 boolean meta = false, filter = false, include = false; 46 47 String metaDB = "", metaTable = "", metaFileColumn = "", 48 metaKeyColumn = "", metaValueColumn = ""; 49 50 String includeDB = "", includeTable = "", includeFileColumn = "", 51 includeModifiedColumn = ""; 52 53 String excludeDB = "", excludeTable = "", excludeFileColumn = ""; 54 55 List allfiles = new ArrayList (); 56 57 private boolean doStop; 58 59 private String title = ""; 60 61 private DatabaseManager dbm; 62 63 private String originalHost = ""; 64 65 public IndexerBase() { 66 } 67 68 public void setUpIndexer(boolean parserAfterMetadata, String siteName, 69 String language, boolean create, String classname) { 70 71 try { 72 if (filter) { 73 filtered = getFiltered(); 74 } 75 76 if (include) { 77 included = getIncluded(); 78 } 79 80 indexer = SnapperManager.getInstance().getIndexerFactory() 81 .newIndexer(); 82 indexdir = new File(siteName); 83 indexer.setUpIndexer(siteName, language, create, SnapperAdmin 84 .getmaxFieldLength()); 85 sitename = siteName; 86 } catch (Exception e) { 87 try { 88 SnapperManager.getInstance().getLoggingManager().error( 89 "Could not setup Indexer"); 90 } catch (Exception ex) { 91 System.out.println(" caught a " + e.getClass() 92 + "\n with message: " + e.getMessage()); 93 } 94 } 95 } 96 97 public void initChecker(int age, int size, Site site) { 98 try { 99 fc = new FileChecker(age, size, site.getDOC(), site.getHTML(), site 100 .getMSG(), site.getPDF(), site.getRTF(), site.getTXT(), 101 site.getXLS(), site.getPPT(), site.getPPS(), site.getEML(), 102 site.getZIP(), site.getOO(), site.getOO(), site.getOTHER()); 103 } catch (Exception e) { 104 try { 105 SnapperManager.getInstance().getLoggingManager().error( 106 "Could not setup FileChecker"); 107 } catch (Exception ex) { 108 System.out.println("Could not setup FileChecker"); 109 } 110 } 111 } 112 113 public void optimize() { 114 try { 115 indexer.optimize(); 116 } catch (Exception e) { 117 try { 118 SnapperManager.getInstance().getLoggingManager().error( 119 "Could not optimize Indexer"); 120 } catch (Exception ex) { 121 System.out.println(" caught a " + e.getClass() 122 + "\n with message: " + e.getMessage()); 123 } 124 } 125 } 126 127 public void close() { 128 try { 129 indexer.close(); 130 indexer = null; 131 SnapperManager.getInstance().getIndexerFactory().removeIndexer(); 132 if (this.notIndexed != null) { 133 this.notIndexed.removeAllElements(); 134 this.notIndexed = null; 135 } 136 137 } catch (Exception e) { 138 if (this.notIndexed != null) { 139 this.notIndexed.removeAllElements(); 140 this.notIndexed = null; 141 } 142 SnapperManager.getInstance().getLoggingManager().error( 143 "Could not close Indexer"); 144 } 145 } 146 147 public void indexDocs(File originalFile) throws IOException { 148 String metadata = ""; 150 title = ""; 151 152 if (isFiltered(new String (originalFile.getPath()))) 153 return; 154 155 if (this.meta) { 156 try { 157 BusinessUtil bu = new BusinessUtil(dbm); 158 metadata = new String (bu.getMetadata(originalFile.getPath(), 159 metaDB, metaTable, metaFileColumn, metaKeyColumn, 160 metaValueColumn)); 161 String ln = SnapperManager.getInstance() 162 .getLogicalNameFromDatabase(); 163 if (ln.equals("1")) 164 title = new String (bu.title); 165 bu.gc(); 166 bu = null; 167 } catch (Exception ex) { 168 SnapperManager.getInstance().getLoggingManager().debug( 169 "Not metadata for file: " + originalFile.getPath()); 170 171 } 172 173 } 174 checksize(); 175 176 parser = new Parser(); 177 Vector data = parser.parse(originalFile); 178 179 if (originalFile.getPath().endsWith("zip") 180 || originalFile.getPath().endsWith("ZIP")) { 181 indexUnZippedFiles(data); 182 return; 183 } 184 if (!checkData(data)) 185 return; 186 187 try { 188 SnapperManager.getInstance().getLoggingManager().debug( 189 "adding : " + originalFile.getName()); 190 191 if (title.length() == 0) 192 title = new String (data.elementAt(4).toString()); 193 String consPath = data.elementAt(1).toString(); 194 195 196 if(SnapperManager.getInstance().getFileSeparatorConvention()!=null && 197 SnapperManager.getInstance().getFileSeparatorConvention().equalsIgnoreCase("unix")) { 198 consPath = consPath.replaceAll("\\\\","/"); 199 originalHost = originalHost.replaceAll("\\\\","/"); 200 } 201 202 if(SnapperManager.getInstance().getRelativeIndexPaths() !=null && 203 SnapperManager.getInstance().getRelativeIndexPaths().equalsIgnoreCase("true")) { 204 205 if(consPath.startsWith(originalHost)){ 206 consPath=consPath.substring(originalHost.length()); 207 if(consPath.startsWith("/")) 208 consPath=consPath.substring(1); 209 } 210 } 211 indexer.indexDoc(((Long ) data.elementAt(0)).longValue(), consPath, 212 data.elementAt(2).toString(), data.elementAt(3).toString(), title, 213 data.elementAt(5).toString(), metadata, originalFile.getName()); 214 SnapperManager.getInstance().getLoggingManager().debug("done adding : " + originalFile.getName()); 215 } 216 217 catch (Exception ex) { 218 originalFile = null; 219 parser = null; 220 if(data!=null) 221 data.removeAllElements(); 222 data = null; 223 metadata = null; 224 try { 225 SnapperManager.getInstance().getLoggingManager().error( 226 "Could not remove site-lock!"); 227 } catch (Exception e) { 228 System.out.println("Could not remove site-lock!"); 229 } 230 } 231 232 originalFile = null; 233 if(data!=null) 234 data.removeAllElements(); 235 data = null; 236 parser = null; 237 metadata = null; 238 239 } 240 241 public void indexFTPDocs(File tempFile, String originalFile, long timestamp) 242 throws IOException { 243 String metadata = ""; 245 title = ""; 246 checksize(); 247 248 if (isFiltered(new String (originalFile))) 249 return; 250 251 if (this.meta) { 252 try { 253 BusinessUtil bu = new BusinessUtil(dbm); 254 metadata = new String (bu.getMetadata(originalFile, 255 metaDB, metaTable, metaFileColumn, metaKeyColumn, 256 metaValueColumn)); 257 String ln = SnapperManager.getInstance() 258 .getLogicalNameFromDatabase(); 259 if (ln.equals("1")) 260 title = new String (bu.title); 261 bu.gc(); 262 bu = null; 263 264 265 270 } 271 272 catch (Exception ex) { 273 SnapperManager.getInstance().getLoggingManager().debug( 275 "Not metadata for file: " + originalFile); 276 } 277 } 278 279 parser = new Parser(); 280 Vector data = parser.parse(tempFile); 281 282 if (originalFile.endsWith("zip") 283 || originalFile.endsWith("ZIP")) { 284 return; 286 } 287 288 boolean parsedWell = true; 289 290 for (int i = 0; i < data.size(); i++) { 291 if (data.elementAt(i) == null) { 292 parsedWell = false; 293 try { 294 data.setElementAt(originalFile,1); 295 } catch (Exception ex) { 296 } 297 } 298 299 } 300 301 if (!checkData(data)) 302 return; 303 304 305 try { 306 try { 307 SnapperManager.getInstance().getLoggingManager().debug( 308 "adding : " + originalFile); 309 } catch (Exception ex) { 310 System.out.println("adding " + originalFile); 311 } 312 317 if (title.length() == 0) 318 title = data.elementAt(4).toString(); 319 320 if(SnapperManager.getInstance().getFileSeparatorConvention()!=null && 321 SnapperManager.getInstance().getFileSeparatorConvention().equalsIgnoreCase("unix")) { 322 originalFile= originalFile.replaceAll("\\\\","/"); 323 originalHost = originalHost.replaceAll("\\\\","/"); 324 } 325 326 if(SnapperManager.getInstance().getRelativeIndexPaths() !=null && 327 SnapperManager.getInstance().getRelativeIndexPaths().equalsIgnoreCase("true")) { 328 329 330 if(originalFile.startsWith(originalHost)){ 331 originalFile=originalFile.substring(originalHost.length()); 332 if(originalFile.startsWith("/")) 333 originalFile=originalFile.substring(1); 334 } 335 } 336 337 338 indexer.indexDoc(timestamp, originalFile, data.elementAt(2) 339 .toString(), data.elementAt(3).toString(), title, data 340 .elementAt(5).toString(), metadata, tempFile.getName()); 341 342 try { 343 SnapperManager.getInstance().getLoggingManager().debug( 344 "done adding : " + originalFile); 345 } catch (Exception ex) { 346 System.out.println("done adding " + originalFile); 347 } 348 } 349 350 catch (Exception ex) { 351 parser = null; 352 Log.logException(ex); 353 } 354 parser = null; 355 } 356 357 public void indexDocs(String id, String siteName, String host, 358 String language, String location, String protocol, String username, 359 String password, boolean create) { 360 Date start = new Date (); 361 parser = new Parser(); 362 File loc; 363 originalHost=location; 364 365 if (included.size() > 0) { 366 367 } else if (protocol.equals("FileSystem")) { 368 indexFS(location, start); 369 } else if (protocol.equals("FTP")) { 370 indexFTP(host, location, username, password, start); 371 } else if (protocol.equals("UNC")) { 372 indexUNC(host, location, username, password, start); 373 } 374 parser = null; 375 } 376 377 384 private void indexUNC(String host, String location, String username, 385 String password, Date start) { 386 try { 387 DocumentStore ds = new DocumentStore(username, location, "UNC", 388 host, "", username, password); 389 File[] uncFiles = ds.retrieveUNCFiles(location); 390 391 if (uncFiles != null) { 392 for (int p = 0; p < uncFiles.length; p++) { 393 if (doStop) 394 return; 395 File fl = new File(host + File.separator + location, 396 uncFiles[p].getName()); 397 398 if (fl.isDirectory()) { 399 String [] fls = fl.list(); 400 indexSubfolders(fl, fls); 401 } 402 403 if (fileOK(uncFiles[p])) { 404 indexDocs(new File(location, uncFiles[p].getName())); 405 } 406 } 407 } 408 Date end = new Date (); 409 SnapperManager.getInstance().getIndexerFactory().removeIndexer(); 410 411 SnapperManager.getInstance().getLoggingManager().info( 412 end.getTime() - start.getTime() + " total milliseconds"); 413 416 } catch (Exception e) { 417 try { 418 SnapperManager.getInstance().getLoggingManager().error( 419 "Could not remove site-lock!"); 420 } catch (Exception ex) { 421 System.out.println("Could not remove site-lock!"); 422 } 423 } 424 } 425 426 433 private void indexFTP(String host, String location, String username, 434 String password, Date start) { 435 try { 436 DocumentStore ds = new DocumentStore(username, location, "FTP", 437 host, "", username, password); 438 ds.connect(); 439 ds.retrieveFile(location); 440 441 Vector tempfiles = ds.getTempFiles(); 442 Vector origfiles = ds.getOriginalFiles(); 443 Vector timestamps = ds.getTimeStamps(); 444 if (tempfiles != null) { 445 for (int p = 0; p < tempfiles.size(); p++) { 446 if (doStop) 447 return; 448 if (fileOK((File) tempfiles.elementAt(p))) { 449 indexFTPDocs((File) tempfiles.elementAt(p), 450 (String ) origfiles.elementAt(p), 451 ((Long ) timestamps.elementAt(p)).longValue()); 452 } 453 } 454 } 455 456 ds.disconnect(); 457 ds.delTempFiles(tempfiles); 458 SnapperManager.getInstance().getIndexerFactory().removeIndexer(); 459 Date end = new Date (); 460 461 SnapperManager.getInstance().getLoggingManager().info( 462 end.getTime() - start.getTime() + " total milliseconds"); 463 466 } catch (Exception e) { 467 try { 468 SnapperManager.getInstance().getLoggingManager().error( 469 "Could not remove site-lock!"); 470 } catch (Exception ex) { 471 System.out.println("Could not remove site-lock!"); 472 } 473 } 474 } 475 476 480 private void indexFS(String location, Date start) { 481 File loc; 482 try { 483 484 loc = new File(location); 485 if (loc.canRead()) { 486 if (loc.isDirectory()) { 487 String [] files = loc.list(); 488 if (files != null) { 489 for (int p = 0; p < files.length; p++) { 490 if (doStop) 491 return; 492 File fl = new File(location, files[p]); 493 494 if (fl.isDirectory()) { 495 String [] fls = fl.list(); 496 indexSubfolders(fl, fls); 497 } 498 if (fileOK(new File(location, files[p]))) 500 indexDocs(new File(location, files[p])); 502 } 503 } 504 } 505 } 506 507 Date end = new Date (); 508 SnapperManager.getInstance().getIndexerFactory().removeIndexer(); 509 510 SnapperManager.getInstance().getLoggingManager().info( 511 end.getTime() - start.getTime() + " total milliseconds"); 512 515 } catch (IOException e) { 516 parser = null; 517 try { 518 SnapperManager.getInstance().getLoggingManager().error( 519 "Could not remove site-lock!"); 520 } catch (Exception ex) { 521 System.out.println("Could not remove site-lock!"); 522 } 523 } 524 } 525 526 public boolean fileOK(File file) { 527 return fc.check(file); 528 } 529 530 private void indexSubfolders(File dir, String [] files) { 531 if (files != null) { 532 for (int q = 0; q < files.length; q++) { 533 if (doStop) 534 return; 535 File fl = new File(dir, files[q]); 536 if (fl.isDirectory()) { 537 String [] fls = fl.list(); 538 indexSubfolders(fl, fls); 539 } 540 if (fileOK(new File(dir, files[q]))) { 541 try { 543 indexDocs(new File(dir, files[q])); 545 } catch (Exception ex) { 546 Log.logException(ex); 547 } 548 } 549 } 550 } 551 } 552 553 public Vector getNotIndexed() { 554 return notIndexed; 555 } 556 557 private void checksize() { 558 String [] files = indexdir.list(); 559 long length = 0; 560 for (int q = 0; q < files.length; q++) { 561 length += (new File(indexdir, files[q])).length(); 562 } 563 if (length >= SnapperAdmin.getmaxIndexLength()) { 564 try { 565 SnapperManager.getInstance().getLoggingManager().debug( 566 "***INDEX GREATER THAN THAN " 567 + SnapperAdmin.getmaxIndexLength() 568 + " bytes ***"); 569 } catch (Exception ex) { 570 System.out.println("***INDEX GREATER THAN THAN " 571 + SnapperAdmin.getmaxIndexLength() + " bytes ***"); 572 } 573 } 574 } 575 576 private boolean isFiltered(String filePath) { 577 String path = filePath; 578 path = path.replaceAll("\\\\", "/"); 579 if (filtered.contains(path)) { 581 try { 582 SnapperManager.getInstance().getLoggingManager().debug( 583 "File filtered! ---> " + filePath); 584 } catch (Exception ex) { 585 System.out.println("File filtered! ---> " + filePath); 586 } 587 return true; 588 } 589 return false; 590 } 591 592 public void setMeta(String db, String table, String file, String key, 593 String value) { 594 this.metaDB = db; 595 this.metaTable = table; 596 this.metaFileColumn = file; 597 this.metaKeyColumn = key; 598 this.metaValueColumn = value; 599 this.meta = true; 600 } 601 602 public void setInclude(String db, String table, String column, 603 String modified) { 604 this.includeDB = db; 605 this.includeTable = table; 606 this.includeFileColumn = column; 607 this.includeModifiedColumn = modified; 608 this.include = true; 609 } 610 611 public void setExclude(String db, String table, String column) { 612 this.excludeDB = db; 613 this.excludeTable = table; 614 this.excludeFileColumn = column; 615 this.filter = true; 616 } 617 618 public Vector getFiltered() throws Exception { 619 BusinessUtil bu = new BusinessUtil(dbm); 620 return bu.getFilteredFiles(excludeDB, excludeTable, excludeFileColumn); 621 } 622 623 public TreeSet getIncluded() { 624 try { 625 BusinessUtil bu = new BusinessUtil(dbm); 626 return bu.getIncludedFiles(includeDB, includeTable, 627 includeFileColumn); 628 } catch (Exception e) { 629 try { 630 SnapperManager.getInstance().getLoggingManager().debug( 631 "Could not get included files"); 632 } catch (Exception ex) { 633 System.out.println("Could not get included files"); 634 } 635 return new TreeSet (); 636 } 637 } 638 639 public void setDoStop(boolean n) { 640 doStop = n; 641 } 642 643 public void indexInclude() { 644 Date start = new Date (); 645 try { 646 BusinessUtil bu = new BusinessUtil(dbm); 647 TreeSet included = bu.getIncludedFiles(this.includeDB, 648 this.includeTable, this.includeFileColumn); 649 Iterator itr = included.iterator(); 650 while (itr.hasNext()) { 651 if (doStop) 652 return; 653 File fl = new File(String.valueOf(itr.next().toString())); 654 655 if (fl.isDirectory()) { 656 String [] fls = fl.list(); 657 indexSubfolders(fl, fls); 658 } 659 if (fileOK(fl)) 661 indexDocs(fl); 663 fl = null; 664 } 665 666 included = null; 668 bu.gc(); 669 bu = null; 670 Date end = new Date (); 671 this.fc = null; 672 673 this.indexdir = null; 674 675 SnapperManager.getInstance().getIndexerFactory().removeIndexer(); 677 678 SnapperManager.getInstance().getLoggingManager().info( 679 end.getTime() - start.getTime() + " total milliseconds"); 680 683 } catch (Exception e) { 684 parser = null; 685 try { 686 SnapperManager.getInstance().getLoggingManager().error( 687 "Could not remove site-lock!"); 688 } catch (Exception ex) { 689 System.out.println("Could not remove site-lock!"); 690 } 691 } 692 693 } 694 695 public void setDBM(DatabaseManager dbm) { 696 this.dbm = dbm; 697 } 698 699 public void indexUnZippedFiles(Vector files) { 700 for (int i = 0; i < files.size(); i++) { 701 702 Vector data = (Vector ) files.elementAt(i); 703 if (!checkData(data)) 704 { 705 continue; 706 } 707 try { 708 SnapperManager.getInstance().getLoggingManager().debug( 709 "adding : " + data.elementAt(4).toString()); 710 711 712 String title = new String (data.elementAt(4).toString()); 713 String confPath = data.elementAt(1).toString(); 715 716 if(SnapperManager.getInstance().getFileSeparatorConvention()!=null && 717 SnapperManager.getInstance().getFileSeparatorConvention().equalsIgnoreCase("unix")) { 718 confPath = confPath.replaceAll("\\\\","/"); 719 originalHost = originalHost.replaceAll("\\\\","/"); 720 } 721 722 if(SnapperManager.getInstance().getRelativeIndexPaths() !=null && 723 SnapperManager.getInstance().getRelativeIndexPaths().equalsIgnoreCase("true")) { 724 725 726 if(confPath.startsWith(originalHost)){ 727 confPath=confPath.substring(originalHost.length()); 728 if(confPath.startsWith("/")) 729 confPath=confPath.substring(1); 730 } 731 } 732 indexer.indexDoc(((Long ) data.elementAt(0)).longValue(), confPath, 733 data.elementAt(2).toString(), 734 data.elementAt(3).toString(), data.elementAt(4) 735 .toString(), data.elementAt(5).toString(), "", 736 data.elementAt(4).toString()); 737 SnapperManager.getInstance().getLoggingManager().debug( 738 "done adding : " + data.elementAt(4).toString()); 739 } 740 741 catch (Exception ex) { 742 parser = null; 743 data.removeAllElements(); 744 data = null; 745 try { 746 SnapperManager.getInstance().getLoggingManager().error( 747 "Could not remove site-lock!"); 748 } catch (Exception e) { 749 System.out.println("Could not remove site-lock!"); 750 } 751 } 752 finally { 753 if (data != null) 754 data.removeAllElements(); 755 data = null; 756 parser = null; 757 } 758 759 760 761 } 762 763 } 764 765 private boolean checkData(Vector data) { 766 if ((data == null) || (data.size() == 0)) { 767 768 780 785 return false; 786 } 787 788 for (int i = 0; i < data.size(); i++) { 789 if (data.elementAt(i) == null) { 790 try { 791 SnapperManager.getInstance().getLoggingManager().debug( 792 "Could not index file: " 793 + data.elementAt(1).toString()); 795 798 String filePath = data.elementAt(1).toString(); 799 800 if(SnapperManager.getInstance().getFileSeparatorConvention()!=null && 801 SnapperManager.getInstance().getFileSeparatorConvention().equalsIgnoreCase("unix")) { 802 filePath= filePath.replaceAll("\\\\","/"); 803 } 804 805 806 notIndexed.add(new String (filePath)); 807 } catch (Exception ex) { 808 System.out.println("Could not index file: " 809 + data.elementAt(1).toString()); 810 } 811 812 return false; 813 } 814 815 } 816 return true; 817 } 818 819 } | Popular Tags |