|                                                                                                              1
 17
 18
 19
 20  package org.apache.lenya.search.crawler;
 21
 22  import java.io.File
  ; 23  import java.io.IOException
  ; 24
 25  import org.apache.avalon.excalibur.io.FileUtil;
 26  import org.apache.lenya.xml.DOMUtil;
 27  import org.apache.lenya.xml.DocumentHelper;
 28  import org.apache.lenya.xml.XPath;
 29  import org.apache.log4j.Category;
 30  import org.w3c.dom.Document
  ; 31  import org.w3c.dom.Element
  ; 32
 33
 34
 37  public class CrawlerConfiguration {
 38      static Category log = Category.getInstance(CrawlerConfiguration.class);
 39      private String
  configurationFilePath; 40      private String
  base_url; 41      private String
  user_agent; 42      private String
  scope_url; 43      private String
  uri_list; 44      private String
  htdocs_dump_dir; 45      private String
  robots_file; 46      private String
  robots_domain; 47
 48
 53      public CrawlerConfiguration(String
  configurationFilePath) { 54          this.configurationFilePath = configurationFilePath;
 55
 56          File
  configurationFile = new File  (configurationFilePath); 57
 58          try {
 59              Document
  document = DocumentHelper.readDocument(configurationFile); 60              configure(document.getDocumentElement());
 61          } catch (Exception
  e) { 62              log.error("Cannot load publishing configuration! ", e);
 63          }
 64      }
 65
 66
 71      public static void main(String
  [] args) { 72          if (args.length == 0) {
 73              System.err.println(
 74                  "Usage: org.apache.lenya.search.crawler.CrawlerConfiguration crawler.xconf [-name <name>]");
 75
 76              return;
 77          }
 78
 79          CrawlerConfiguration ce = new CrawlerConfiguration(args[0]);
 80          String
  parameter; 81
 82          String
  name = null; 83
 84          for (int i = 0; i < args.length; i++) {
 85              if (args[i].equals("-name")) {
 86                  if ((i + 1) < args.length) {
 87                      name = args[i + 1];
 88                  }
 89              }
 90          }
 91
 92          if (name != null) {
 93              if (name.equals("htdocs-dump-dir")) {
 94                  parameter = ce.getHTDocsDumpDir();
 95                  System.out.println(ce.resolvePath(parameter));
 96              } else {
 97                  System.out.println("No such element: " + name);
 98              }
 99          } else {
 100             parameter = ce.getBaseURL();
 101             System.out.println("Crawler Config: Base URL: " + parameter);
 102
 103             parameter = ce.getScopeURL();
 104             System.out.println("Crawler Config: Scope URL: " + parameter);
 105
 106             parameter = ce.getUserAgent();
 107             System.out.println("Crawler Config: User Agent: " + parameter);
 108
 109             parameter = ce.getURIList();
 110             System.out.println("Crawler Config: URI List: " + ce.resolvePath(parameter) + " (" + parameter + ")");
 111
 112             parameter = ce.getHTDocsDumpDir();
 113             System.out.println("Crawler Config: HTDocs Dump Dir: " + ce.resolvePath(parameter) + " (" + parameter + ")");
 114
 115             parameter = ce.getRobotsFile();
 116             if (parameter != null) {
 117                 System.out.println("Crawler Config: Robots File: " + ce.resolvePath(parameter + " (" + parameter + ")"));
 118             }
 119
 120             parameter = ce.getRobotsDomain();
 121             if (parameter != null) {
 122                 System.out.println("Crawler Config: Robots Domain: " + parameter);
 123             }
 124         }
 125     }
 126
 127
 134     public void configure(Element
  root) throws Exception  { 135         DOMUtil du = new DOMUtil();
 136
 137         base_url = du.getAttributeValue(root, new XPath("base-url/@href"));
 138         scope_url = du.getAttributeValue(root, new XPath("scope-url/@href"));
 139         user_agent = du.getElementValue(root, new XPath("user-agent"));
 140         uri_list = du.getAttributeValue(root, new XPath("uri-list/@src"));
 141         htdocs_dump_dir = du.getAttributeValue(root, new XPath("htdocs-dump-dir/@src"));
 142         if (du.elementExists(root, new XPath("robots"))) {
 143             robots_file = du.getAttributeValue(root, new XPath("robots/@src"));
 144             robots_domain = du.getAttributeValue(root, new XPath("robots/@domain"));
 145         }
 146     }
 147
 148
 153     public String
  getBaseURL() { 154         log.debug(".getBaseURL(): " + base_url);
 155
 156         return base_url;
 157     }
 158
 159
 164     public String
  getScopeURL() { 165         log.debug(".getScopeURL(): " + scope_url);
 166
 167         return scope_url;
 168     }
 169
 170
 175     public String
  getUserAgent() { 176         log.debug(".getUserAgent(): " + user_agent);
 177
 178         return user_agent;
 179     }
 180
 181
 186     public String
  getURIList() { 187         log.debug(".getURIList(): " + uri_list);
 188
 189         return uri_list;
 190     }
 191
 192
 197     public String
  getURIListResolved() { 198         log.debug(".getURIList(): " + uri_list);
 199
 200         return resolvePath(uri_list);
 201     }
 202
 203
 208     public String
  getHTDocsDumpDir() { 209         log.debug(".getHTDocsDumpDir(): " + htdocs_dump_dir);
 210
 211         return htdocs_dump_dir;
 212     }
 213
 214
 219     public String
  getHTDocsDumpDirResolved() { 220
 221         return resolvePath(htdocs_dump_dir);
 222     }
 223
 224
 229     public String
  getRobotsFile() { 230         log.debug(robots_file);
 231
 232         return robots_file;
 233     }
 234
 235
 240     public String
  getRobotsFileResolved() { 241         log.debug(robots_file);
 242
 243         return resolvePath(robots_file);
 244     }
 245
 246
 251     public String
  getRobotsDomain() { 252         log.debug(robots_domain);
 253
 254         return robots_domain;
 255     }
 256
 257
 264     public String
  resolvePath(String  path) { 265
 266                 if ( new File
  (path) .isAbsolute() ) { 268             return path;
 269         }
 270
 271                         try {
 274             String
  configDir = new File  (configurationFilePath) .getAbsoluteFile() .getParent(); 275             return new File
  (configDir, path) .getCanonicalPath(); 276
 277         } catch (java.io.IOException
  e) { 278                         e.printStackTrace();
 280             return null;
 281         }
 282
 283     }
 284 }
 285
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |