1 6 package net.sourceforge.cvsgrab; 7 8 import net.sourceforge.cvsgrab.web.Chora2_0Interface; 9 import net.sourceforge.cvsgrab.web.CvsWeb1_0Interface; 10 import net.sourceforge.cvsgrab.web.CvsWeb2_0Interface; 11 import net.sourceforge.cvsgrab.web.CvsWeb3_0Interface; 12 import net.sourceforge.cvsgrab.web.FishEye1_0Interface; 13 import net.sourceforge.cvsgrab.web.Sourcecast1_0Interface; 14 import net.sourceforge.cvsgrab.web.Sourcecast2_0Interface; 15 import net.sourceforge.cvsgrab.web.Sourcecast3_0Interface; 16 import net.sourceforge.cvsgrab.web.ViewCvs0_7Interface; 17 import net.sourceforge.cvsgrab.web.ViewCvs0_8Interface; 18 import net.sourceforge.cvsgrab.web.ViewCvs0_9Interface; 19 import net.sourceforge.cvsgrab.web.ViewCvs1_0Interface; 20 21 import org.w3c.dom.Document ; 22 23 import java.util.ArrayList ; 24 import java.util.Arrays ; 25 import java.util.Comparator ; 26 import java.util.HashMap ; 27 import java.util.HashSet ; 28 import java.util.Iterator ; 29 import java.util.List ; 30 import java.util.Map ; 31 import java.util.Properties ; 32 import java.util.Set ; 33 import java.util.StringTokenizer ; 34 35 43 public abstract class CvsWebInterface { 44 45 public static final String DETECTED_WEB_INTERFACE = "detectedWebInterface"; 46 47 private static CvsWebInterface[] getWebInterfaces(CVSGrab grabber) { 48 return new CvsWebInterface[] { 49 new ViewCvs0_7Interface(grabber), 50 new ViewCvs0_8Interface(grabber), 51 new ViewCvs0_9Interface(grabber), 52 new ViewCvs1_0Interface(grabber), 53 new Sourcecast1_0Interface(grabber), 54 new Sourcecast2_0Interface(grabber), 55 new Sourcecast3_0Interface(grabber), 56 new CvsWeb1_0Interface(grabber), 57 new CvsWeb2_0Interface(grabber), 58 new CvsWeb3_0Interface(grabber), 59 new Chora2_0Interface(grabber), 60 new FishEye1_0Interface(grabber) 61 }; 62 } 63 64 private static Map documents = new HashMap (); 65 66 73 public static final CvsWebInterface getInterface(CVSGrab grabber, String interfaceId) throws Exception { 74 CvsWebInterface[] webInterfaces = getWebInterfaces(grabber); 75 for (int i = 0; i < webInterfaces.length; i++) { 76 if (webInterfaces[i].getId().equals(interfaceId)) { 77 webInterfaces[i].init(); 78 return webInterfaces[i]; 79 } 80 } 81 return null; 82 } 83 84 87 public static final String [] getInterfaceIds(CVSGrab grabber) { 88 CvsWebInterface[] webInterfaces = getWebInterfaces(grabber); 89 String ids[] = new String [webInterfaces.length]; 90 for (int i = 0; i < ids.length; i++) { 91 ids[i] = webInterfaces[i].getId(); 92 } 93 return ids; 94 } 95 96 public static final String [] getBaseUrls(CVSGrab grabber) { 97 CvsWebInterface[] webInterfaces = getWebInterfaces(grabber); 98 Set urls = new HashSet (); 99 for (int i = 0; i < webInterfaces.length; i++) { 100 CvsWebInterface webInterface = webInterfaces[i]; 101 urls.add(webInterface.getBaseUrl()); 102 urls.add(webInterface.getAltBaseUrl()); 103 } 104 urls.remove(null); 105 String [] listOfUrls = (String []) urls.toArray(new String [urls.size()]); 106 Arrays.sort(listOfUrls, new Comparator () { 108 public int compare(Object o1, Object o2) { 109 String s1 = (String ) o1; 110 String s2 = (String ) o2; 111 if (s1.length() > s2.length()) { 112 return -1; 113 } 114 if (s1.length() < s2.length()) { 115 return 1; 116 } 117 return 0; 118 } 119 120 }); 121 return listOfUrls; 122 } 123 124 129 public static CvsWebInterface findInterface(CVSGrab grabber) throws Exception { 130 checkRootUrl(grabber.getRootUrl()); 131 CvsWebInterface[] webInterfaces = getWebInterfaces(grabber); 132 List errors = new ArrayList (); 133 for (int i = 0; i < webInterfaces.length; i++) { 135 CvsWebInterface webInterface = webInterfaces[i]; 136 if (webInterface.presetMatch(grabber.getRootUrl(), grabber.getPackagePath())) { 137 return webInterface; 138 } 139 } 140 for (int i = 0; i < webInterfaces.length; i++) { 142 CvsWebInterface webInterface = webInterfaces[i]; 143 if (webInterface.validate(errors)) { 144 return webInterface; 145 } 146 } 147 CVSGrab.getLog().info("Tried to connect to the following urls: "); 148 for (Iterator i = documents.keySet().iterator(); i.hasNext(); ) { 149 CVSGrab.getLog().info(i.next()); 150 } 151 CVSGrab.getLog().info("Problems found during automatic detection: "); 152 for (Iterator i = errors.iterator(); i.hasNext();) { 153 String msg = (String ) i.next(); 154 CVSGrab.getLog().info(msg); 155 } 156 return null; 157 } 158 159 165 public static Properties getWebProperties(CVSGrab grabber, String rootUrl) { 166 CvsWebInterface[] webInterfaces = getWebInterfaces(grabber); 167 for (int i = 0; i < webInterfaces.length; i++) { 168 CvsWebInterface webInterface = webInterfaces[i]; 169 Properties webProperties = webInterface.guessWebProperties(rootUrl); 170 if (!webProperties.isEmpty()) { 171 Document doc = loadDocument(rootUrl); 172 if (doc == null) { 173 continue; 174 } 175 try { 176 grabber.getWebOptions().readProperties(webProperties); 177 webInterface.detect(doc); 180 webProperties.put(DETECTED_WEB_INTERFACE, webInterface); 182 return webProperties; 183 } catch (InvalidVersionException e) { 184 grabber.getWebOptions().clearLocation(); 186 } catch (MarkerNotFoundException e) { 187 grabber.getWebOptions().clearLocation(); 189 } 190 } 191 } 192 return new Properties (); 193 } 194 195 private static Document loadDocument(String url) { 196 if (url == null) { 197 throw new IllegalArgumentException ("Null url"); 198 } 199 Document doc = (Document ) documents.get(url); 200 if (doc == null) { 201 documents.put(url, null); 202 try { 203 doc = WebBrowser.getInstance().getDocument(url); 204 documents.put(url, doc); 205 } catch (Exception ex) { 206 CVSGrab.getLog().debug("Error when loading page " + url, ex); 208 } 209 } 210 return doc; 211 } 212 213 private static void checkRootUrl(String url) { 214 int slash = url.indexOf('/', 8); 217 if (slash > 0) { 218 String path = url.substring(slash); 219 String beforeLastPart = ""; 220 String lastPart = null; 221 StringTokenizer st = new StringTokenizer (path, "/", false); 222 while (st.hasMoreTokens()) { 223 if (lastPart != null) { 224 beforeLastPart += "/" + lastPart; 225 } 226 lastPart = st.nextToken(); 227 } 228 if (lastPart != null) { 229 lastPart = lastPart.toLowerCase(); 230 if (beforeLastPart.length() > 0 && lastPart.indexOf("cvs") < 0 && lastPart.indexOf(".") < 0 231 && lastPart.indexOf("source") < 0 && lastPart.indexOf("src") < 0 232 && lastPart.indexOf("browse") < 0) { 233 CVSGrab.getLog().warn("The root url " + url + " doesn't seem valid"); 234 String newRootUrl = url.substring(0, slash) + beforeLastPart; 235 CVSGrab.getLog().warn("Try " + newRootUrl + " as the root url instead"); 236 } 237 } 238 } 239 } 240 241 244 public static void registerDocument(String url, Document doc) { 245 documents.put(url, doc); 246 } 247 248 private String _versionTag; 249 private String _queryParams; 250 private CVSGrab _grabber; 251 252 256 public CvsWebInterface(CVSGrab grabber) { 257 super(); 258 _grabber = grabber; 259 } 260 261 public CVSGrab getGrabber() { 262 return _grabber; 263 } 264 265 270 public boolean presetMatch(String rootUrl, String packagePath) { 271 return false; 272 } 273 274 279 public boolean validate(List errors) { 280 if (presetMatch(_grabber.getRootUrl(), _grabber.getPackagePath())) { 282 return true; 283 } 284 Document doc = null; 285 String [] urls = new String [] {getBaseUrl(), getAltBaseUrl()}; 286 for (int j = 0; j < urls.length; j++) { 287 String url = urls[j]; 288 if (url == null) { 289 continue; 290 } 291 try { 292 CVSGrab.getLog().debug(getId() + ": Loading for validation " + url); 293 doc = loadDocument(url); 294 if (doc == null) { 295 errors.add(getId() + " tried to match page " + url + " but page doesn't exist"); 296 continue; 297 } 298 299 detect(doc); 300 return true; 301 302 } catch (DetectException ex) { 303 CVSGrab.getLog().debug(getId() + " doesn't match, cause is " + ex.toString()); 305 errors.add(getId() + " tried to match page " + url + " but found error " + ex.getMessage()); 306 } catch (RuntimeException ex) { 307 CVSGrab.getLog().debug(getId() + " doesn't match, cause is " + ex.toString()); 309 errors.add(getId() + " tried to match page " + url + " but found error " + ex.getMessage()); 310 } 311 } 312 return false; 313 } 314 315 318 public String getVersionTag() { 319 return _versionTag; 320 } 321 322 326 public void setVersionTag(String versionTag) { 327 this._versionTag = versionTag; 328 } 329 330 333 public String getQueryParams() { 334 return _queryParams; 335 } 336 337 341 public void setQueryParams(String params) { 342 _queryParams = params; 343 } 344 345 350 public abstract void init() throws Exception ; 351 352 360 public abstract void detect(Document htmlPage) throws MarkerNotFoundException, InvalidVersionException; 361 362 365 public abstract String getId(); 366 367 370 public abstract String getType(); 371 372 375 public abstract String getBaseUrl(); 376 377 380 public String getAltBaseUrl() { 381 return null; 382 } 383 384 389 public abstract String getDirectoryUrl(String rootUrl, String directoryName); 390 391 395 public abstract RemoteFile[] getFiles(Document doc); 396 397 401 public abstract String [] getDirectories(Document doc); 402 403 407 public abstract String getDownloadUrl(RemoteFile file); 408 409 414 public abstract Properties guessWebProperties(String fullUrl); 415 416 } 417 | Popular Tags |