1 6 package net.sourceforge.cvsgrab.web; 7 8 import java.util.ArrayList ; 9 import java.util.Iterator ; 10 import java.util.List ; 11 import java.util.Properties ; 12 13 import net.sourceforge.cvsgrab.CVSGrab; 14 import net.sourceforge.cvsgrab.CvsWebInterface; 15 import net.sourceforge.cvsgrab.InvalidVersionException; 16 import net.sourceforge.cvsgrab.MarkerNotFoundException; 17 import net.sourceforge.cvsgrab.RemoteFile; 18 import net.sourceforge.cvsgrab.WebBrowser; 19 20 import org.apache.commons.httpclient.URIException; 21 import org.apache.commons.httpclient.util.URIUtil; 22 import org.apache.commons.jxpath.JXPathContext; 23 import org.apache.commons.jxpath.Pointer; 24 import org.w3c.dom.Document ; 25 26 33 public abstract class ViewCvsInterface extends CvsWebInterface { 34 35 private String _type; 36 private String _filesXpath = "//TR[TD/A/IMG/@alt = '(file)']"; 37 private String _fileNameXpath = "TD[1]/A/@name"; 38 private String _fileVersionXpath = "TD[A/IMG/@alt != '(graph)'][2]/A/B"; 39 private String _directoriesXpath = "//TR[TD/A/IMG/@alt = '(dir)'][TD/A/@name != 'Attic']"; 40 private String _directoryXpath = "TD[1]/A/@name"; 41 private String _checkoutPath = "*checkout*/"; 42 private String _webInterfaceType = "viewcvs"; 43 private String _tagParam = "only_with_tag"; 44 private String _cvsrootParam = "cvsroot"; 45 46 50 public ViewCvsInterface(CVSGrab grabber) { 51 super(grabber); 52 } 53 54 57 public void init() throws Exception { 58 _type = getId(); 59 } 60 61 67 public void detect(Document htmlPage) throws MarkerNotFoundException, InvalidVersionException { 68 69 JXPathContext context = JXPathContext.newContext(htmlPage); 70 Iterator viewCvsTexts = context.iterate("//META[@name = 'generator']/@content[starts-with(.,'ViewCVS')] | //A[@href]/text()[starts-with(.,'ViewCVS')]"); 71 _type = null; 72 String viewCvsVersion = null; 73 while (viewCvsTexts.hasNext()) { 74 viewCvsVersion = (String ) viewCvsTexts.next(); 75 if (viewCvsVersion.startsWith(getVersionMarker())) { 76 _type = viewCvsVersion; 77 break; 78 } 79 } 80 if (_type == null) { 81 throw new MarkerNotFoundException("Expected marker " + getVersionMarker() + ", found " + viewCvsVersion); 82 } 83 } 84 85 88 public String getId() { 89 String className = getClass().getName(); 90 className = className.substring(className.lastIndexOf('.') + 1); 91 className = className.substring(0, className.indexOf("Interface")); 92 return className; 93 } 94 95 98 public String getType() { 99 return _type; 100 } 101 102 105 public String getBaseUrl() { 106 String url = WebBrowser.forceFinalSlash(getGrabber().getRootUrl()); 107 url += getGrabber().getPackagePath(); 108 if (getProjectRoot() != null) { 109 url = WebBrowser.addQueryParam(url, _cvsrootParam, getProjectRoot()); 110 } 111 url = WebBrowser.addQueryParam(url, getGrabber().getQueryParams()); 112 return url; 113 } 114 115 120 public String getDirectoryUrl(String rootUrl, String directoryName) { 121 try { 122 String tag = getVersionTag(); 123 String url = WebBrowser.forceFinalSlash(rootUrl); 124 url += WebBrowser.forceFinalSlash(quote(directoryName)); 125 if (getProjectRoot() != null) { 126 url = WebBrowser.addQueryParam(url, _cvsrootParam, getProjectRoot()); 127 } 128 url = WebBrowser.addQueryParam(url, _tagParam, tag); 129 url = WebBrowser.addQueryParam(url, getQueryParams()); 130 return url; 131 } catch (URIException ex) { 132 ex.printStackTrace(); 133 throw new RuntimeException ("Cannot create URI"); 134 } 135 } 136 137 140 public RemoteFile[] getFiles(Document htmlPage) { 141 JXPathContext context = JXPathContext.newContext(htmlPage); 142 List files = new ArrayList (); 143 Iterator i = context.iteratePointers(getFilesXpath()); 144 while (i.hasNext()) { 145 Pointer pointer = (Pointer) i.next(); 146 JXPathContext nodeContext = context.getRelativeContext(pointer); 147 String fileName = (String ) nodeContext.getValue(getFileNameXpath()); 148 String version = (String ) nodeContext.getValue(getFileVersionXpath()); 149 RemoteFile file = new RemoteFile(fileName, version); 150 adjustFile(file, nodeContext); 151 files.add(file); 152 } 153 return (RemoteFile[]) files.toArray(new RemoteFile[files.size()]); 154 } 155 156 159 public String [] getDirectories(Document htmlPage) { 160 JXPathContext context = JXPathContext.newContext(htmlPage); 161 context.registerNamespace("HTML", "http://www.w3.org/1999/xhtml"); 162 context.registerNamespace("", "http://www.w3.org/1999/xhtml"); 163 List directories = new ArrayList (); 164 Iterator i = context.iteratePointers(getDirectoriesXpath()); 165 while (i.hasNext()) { 166 Pointer pointer = (Pointer) i.next(); 167 JXPathContext nodeContext = context.getRelativeContext(pointer); 168 try { 169 String dir = (String ) nodeContext.getValue(getDirectoryXpath()); 170 directories.add(dir); 171 } catch (RuntimeException e) { 172 CVSGrab.getLog().error("Cannot localise directory name in document location " + nodeContext.getPointer("."), e); 173 } 174 } 175 return (String []) directories.toArray(new String [directories.size()]); 176 } 177 178 public String getDownloadUrl(RemoteFile file) { 179 try { 180 String url = WebBrowser.forceFinalSlash(file.getDirectory().getRemoteRepository().getRootUrl()); 182 String dir = file.getDirectory().getDirectoryPath(); 183 url += getCheckoutPath(); 184 url += WebBrowser.forceFinalSlash(quote(dir)); 185 if (file.isInAttic()) { 186 url += "Attic/"; 187 } 188 url += quote(file.getName()); 189 if (getProjectRoot() != null) { 190 url = WebBrowser.addQueryParam(url, _cvsrootParam, getProjectRoot()); 191 } 192 url = WebBrowser.addQueryParam(url, "rev", file.getVersion()); 193 url = WebBrowser.addQueryParam(url, getQueryParams()); 194 return url; 195 } catch (URIException ex) { 196 ex.printStackTrace(); 197 throw new RuntimeException ("Cannot create URI"); 198 } 199 } 200 201 public Properties guessWebProperties(String url) { 202 Properties properties = new Properties (); 203 int keywordPosition = url.toLowerCase().indexOf(_webInterfaceType); 205 if (keywordPosition > 0) { 206 int rootUrlPosition = url.indexOf('/', keywordPosition) + 1; 207 int cgiFolderPos = url.indexOf("cgi/", rootUrlPosition); 208 if (cgiFolderPos > 0) { 209 rootUrlPosition = cgiFolderPos + 4; 210 } 211 int nextSlashPos = url.indexOf('/', rootUrlPosition) + 1; 212 int magicScriptPos = url.indexOf(".cgi", rootUrlPosition); 213 if (magicScriptPos < 0 ) { 214 magicScriptPos = url.indexOf(".py", rootUrlPosition); 215 } 216 if (magicScriptPos > 0 && magicScriptPos < nextSlashPos) { 217 rootUrlPosition = nextSlashPos; 218 } 219 String guessedRootUrl = url.substring(0, rootUrlPosition); 220 String guessedPackagePath = url.substring(rootUrlPosition); 221 String versionTag = null; 222 String cvsroot = null; 223 String query = null; 224 int queryPos = guessedPackagePath.indexOf('?'); 225 if (queryPos >= 0) { 226 query = guessedPackagePath.substring(queryPos + 1); 227 guessedPackagePath = guessedPackagePath.substring(0, queryPos); 228 Properties queryItems = WebBrowser.getQueryParams(query); 229 versionTag = (String ) queryItems.remove(_tagParam); 230 cvsroot = (String ) queryItems.remove(_cvsrootParam); 231 query = WebBrowser.toQueryParams(queryItems); 232 } 233 properties.put(CVSGrab.ROOT_URL_OPTION, guessedRootUrl); 234 properties.put(CVSGrab.PACKAGE_PATH_OPTION, guessedPackagePath); 235 if (versionTag != null && versionTag.trim().length() > 0) { 236 properties.put(CVSGrab.TAG_OPTION, versionTag); 237 } 238 if (cvsroot != null && cvsroot.trim().length() > 0) { 239 properties.put(CVSGrab.PROJECT_ROOT_OPTION, cvsroot); 240 } 241 if (query != null && query.trim().length() > 0) { 242 properties.put(CVSGrab.QUERY_PARAMS_OPTION, query); 243 } 244 } 245 return properties; 246 } 247 248 public String getFilesXpath() { 249 return _filesXpath; 250 } 251 252 public String getFileNameXpath() { 253 return _fileNameXpath; 254 } 255 256 public String getFileVersionXpath() { 257 return _fileVersionXpath; 258 } 259 260 public String getDirectoriesXpath() { 261 return _directoriesXpath; 262 } 263 264 public String getDirectoryXpath() { 265 return _directoryXpath; 266 } 267 268 protected String getCheckoutPath() { 269 return _checkoutPath; 270 } 271 272 protected void setCheckoutPath(String checkoutPath) { 273 _checkoutPath = checkoutPath; 274 } 275 276 public void setDirectoryXpath(String directoryXpath) { 277 _directoryXpath = directoryXpath; 278 } 279 280 public void setDirectoriesXpath(String directoriesXpath) { 281 _directoriesXpath = directoriesXpath; 282 } 283 284 public void setFileVersionXpath(String fileVersionXpath) { 285 _fileVersionXpath = fileVersionXpath; 286 } 287 288 public void setFileNameXpath(String fileNameXpath) { 289 _fileNameXpath = fileNameXpath; 290 } 291 292 public void setFilesXpath(String filesXpath) { 293 _filesXpath = filesXpath; 294 } 295 296 public String getTagParam() { 297 return _tagParam; 298 } 299 300 public void setTagParam(String param) { 301 _tagParam = param; 302 } 303 304 public String getWebInterfaceType() { 305 return _webInterfaceType; 306 } 307 308 protected void setWebInterfaceType(String webInterfaceType) { 309 this._webInterfaceType = webInterfaceType; 310 } 311 312 public String getCvsrootParam() { 313 return _cvsrootParam; 314 } 315 316 public void setCvsrootParam(String cvsrootParam) { 317 _cvsrootParam = cvsrootParam; 318 } 319 320 323 protected void setType(String type) { 324 _type = type; 325 } 326 327 protected abstract String getVersionMarker(); 328 329 protected void adjustFile(RemoteFile file, JXPathContext nodeContext) { 330 String fileName = file.getName(); 331 if (fileName.startsWith("Attic/")) { 332 file.setName(fileName.substring(6)); 333 file.setInAttic(true); 334 } 335 } 336 337 343 protected String quote(String original) throws URIException { 344 return URIUtil.encodePath(original, "ISO-8859-1"); 345 } 346 347 protected String getProjectRoot() { 348 return getGrabber().getProjectRoot(); 349 } 350 351 } 352 | Popular Tags |