KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sourceforge > cvsgrab > CvsWebInterface


1 /*
2  * CVSGrab
3  * Author: Ludovic Claude (ludovicc@users.sourceforge.net)
4  * Distributable under BSD license.
5  */

6 package net.sourceforge.cvsgrab;
7
8 import net.sourceforge.cvsgrab.web.Chora2_0Interface;
9 import net.sourceforge.cvsgrab.web.CvsWeb1_0Interface;
10 import net.sourceforge.cvsgrab.web.CvsWeb2_0Interface;
11 import net.sourceforge.cvsgrab.web.CvsWeb3_0Interface;
12 import net.sourceforge.cvsgrab.web.FishEye1_0Interface;
13 import net.sourceforge.cvsgrab.web.Sourcecast1_0Interface;
14 import net.sourceforge.cvsgrab.web.Sourcecast2_0Interface;
15 import net.sourceforge.cvsgrab.web.Sourcecast3_0Interface;
16 import net.sourceforge.cvsgrab.web.ViewCvs0_7Interface;
17 import net.sourceforge.cvsgrab.web.ViewCvs0_8Interface;
18 import net.sourceforge.cvsgrab.web.ViewCvs0_9Interface;
19 import net.sourceforge.cvsgrab.web.ViewCvs1_0Interface;
20
21 import org.w3c.dom.Document JavaDoc;
22
23 import java.util.ArrayList JavaDoc;
24 import java.util.Arrays JavaDoc;
25 import java.util.Comparator JavaDoc;
26 import java.util.HashMap JavaDoc;
27 import java.util.HashSet JavaDoc;
28 import java.util.Iterator JavaDoc;
29 import java.util.List JavaDoc;
30 import java.util.Map JavaDoc;
31 import java.util.Properties JavaDoc;
32 import java.util.Set JavaDoc;
33 import java.util.StringTokenizer JavaDoc;
34
35 /**
36  * Abstracts the web interface available for a CVS repository. This allows us to
37  * support different web interfaces such as viewcvs, cvsweb, sourcecast and others.
38  *
39  * @author <a HREF="mailto:ludovicc@users.sourceforge.net">Ludovic Claude</a>
40  * @version $Revision: 1.18 $ $Date: 2005/06/25 19:51:33 $
41  * @created on 6 oct. 2003
42  */

43 public abstract class CvsWebInterface {
44
45     public static final String JavaDoc DETECTED_WEB_INTERFACE = "detectedWebInterface";
46
47     private static CvsWebInterface[] getWebInterfaces(CVSGrab grabber) {
48         return new CvsWebInterface[] {
49             new ViewCvs0_7Interface(grabber),
50             new ViewCvs0_8Interface(grabber),
51             new ViewCvs0_9Interface(grabber),
52             new ViewCvs1_0Interface(grabber),
53             new Sourcecast1_0Interface(grabber),
54             new Sourcecast2_0Interface(grabber),
55             new Sourcecast3_0Interface(grabber),
56             new CvsWeb1_0Interface(grabber),
57             new CvsWeb2_0Interface(grabber),
58             new CvsWeb3_0Interface(grabber),
59             new Chora2_0Interface(grabber),
60             new FishEye1_0Interface(grabber)
61         };
62     }
63
64     private static Map JavaDoc documents = new HashMap JavaDoc();
65
66     /**
67      * Explicitely select a web interface capable of handle the web pages.
68      * @param grabber The cvs grabber
69      * @param interfaceId The id of the interface
70      * @return the selected web interface, or null if the id is not recognized
71      * @throws Exception if initialisation of the web interface fails
72      */

73     public static final CvsWebInterface getInterface(CVSGrab grabber, String JavaDoc interfaceId) throws Exception JavaDoc {
74         CvsWebInterface[] webInterfaces = getWebInterfaces(grabber);
75         for (int i = 0; i < webInterfaces.length; i++) {
76             if (webInterfaces[i].getId().equals(interfaceId)) {
77                 webInterfaces[i].init();
78                 return webInterfaces[i];
79             }
80         }
81         return null;
82     }
83
84     /**
85      * @return an array containing the ids of the registered web interfaces
86      */

87     public static final String JavaDoc[] getInterfaceIds(CVSGrab grabber) {
88         CvsWebInterface[] webInterfaces = getWebInterfaces(grabber);
89         String JavaDoc ids[] = new String JavaDoc[webInterfaces.length];
90         for (int i = 0; i < ids.length; i++) {
91             ids[i] = webInterfaces[i].getId();
92         }
93         return ids;
94     }
95
96     public static final String JavaDoc[] getBaseUrls(CVSGrab grabber) {
97         CvsWebInterface[] webInterfaces = getWebInterfaces(grabber);
98         Set JavaDoc urls = new HashSet JavaDoc();
99         for (int i = 0; i < webInterfaces.length; i++) {
100             CvsWebInterface webInterface = webInterfaces[i];
101             urls.add(webInterface.getBaseUrl());
102             urls.add(webInterface.getAltBaseUrl());
103         }
104         urls.remove(null);
105         String JavaDoc[] listOfUrls = (String JavaDoc[]) urls.toArray(new String JavaDoc[urls.size()]);
106         // Sort with the longuest urls first, this can prevent bugs like #988162
107
Arrays.sort(listOfUrls, new Comparator JavaDoc() {
108             public int compare(Object JavaDoc o1, Object JavaDoc o2) {
109                 String JavaDoc s1 = (String JavaDoc) o1;
110                 String JavaDoc s2 = (String JavaDoc) o2;
111                 if (s1.length() > s2.length()) {
112                     return -1;
113                 }
114                 if (s1.length() < s2.length()) {
115                     return 1;
116                 }
117                 return 0;
118             }
119
120         });
121         return listOfUrls;
122     }
123
124     /**
125      * Find the cvs web interface that could have generated this html page
126      *
127      * @return the cvs web interface that matches best this page
128      */

129     public static CvsWebInterface findInterface(CVSGrab grabber) throws Exception JavaDoc {
130         checkRootUrl(grabber.getRootUrl());
131         CvsWebInterface[] webInterfaces = getWebInterfaces(grabber);
132         List JavaDoc errors = new ArrayList JavaDoc();
133         // Fast search on know repositories, here we need only to know the url of the web site
134
for (int i = 0; i < webInterfaces.length; i++) {
135             CvsWebInterface webInterface = webInterfaces[i];
136             if (webInterface.presetMatch(grabber.getRootUrl(), grabber.getPackagePath())) {
137                 return webInterface;
138             }
139         }
140         // Deep search, download the web pages and check for markers in the pages
141
for (int i = 0; i < webInterfaces.length; i++) {
142             CvsWebInterface webInterface = webInterfaces[i];
143             if (webInterface.validate(errors)) {
144                 return webInterface;
145             }
146         }
147         CVSGrab.getLog().info("Tried to connect to the following urls: ");
148         for (Iterator JavaDoc i = documents.keySet().iterator(); i.hasNext(); ) {
149             CVSGrab.getLog().info(i.next());
150         }
151         CVSGrab.getLog().info("Problems found during automatic detection: ");
152         for (Iterator JavaDoc i = errors.iterator(); i.hasNext();) {
153             String JavaDoc msg = (String JavaDoc) i.next();
154             CVSGrab.getLog().info(msg);
155         }
156         return null;
157     }
158
159     /**
160      * Guess the connection properties by parsing the full url used when connecting to the web repository
161      * @param rootUrl the rool url
162      * @return the properties compatible with the naming scheme of WebOptions, or an empty list of properties
163      * if nothing is found
164      */

165     public static Properties JavaDoc getWebProperties(CVSGrab grabber, String JavaDoc rootUrl) {
166         CvsWebInterface[] webInterfaces = getWebInterfaces(grabber);
167         for (int i = 0; i < webInterfaces.length; i++) {
168             CvsWebInterface webInterface = webInterfaces[i];
169             Properties JavaDoc webProperties = webInterface.guessWebProperties(rootUrl);
170             if (!webProperties.isEmpty()) {
171                 Document JavaDoc doc = loadDocument(rootUrl);
172                 if (doc == null) {
173                     continue;
174                 }
175                 try {
176                     grabber.getWebOptions().readProperties(webProperties);
177                     // check that if the url format is recognised by the web interface,
178
// then the actual page also matches, to eliminate false positives.
179
webInterface.detect(doc);
180                     // Keep a reference to the web interface we have just found
181
webProperties.put(DETECTED_WEB_INTERFACE, webInterface);
182                     return webProperties;
183                 } catch (InvalidVersionException e) {
184                     // ignore
185
grabber.getWebOptions().clearLocation();
186                 } catch (MarkerNotFoundException e) {
187                     // ignore
188
grabber.getWebOptions().clearLocation();
189                 }
190             }
191         }
192         return new Properties JavaDoc();
193     }
194
195     private static Document JavaDoc loadDocument(String JavaDoc url) {
196         if (url == null) {
197             throw new IllegalArgumentException JavaDoc("Null url");
198         }
199         Document JavaDoc doc = (Document JavaDoc) documents.get(url);
200         if (doc == null) {
201             documents.put(url, null);
202             try {
203                 doc = WebBrowser.getInstance().getDocument(url);
204                 documents.put(url, doc);
205             } catch (Exception JavaDoc ex) {
206                 // ignore
207
CVSGrab.getLog().debug("Error when loading page " + url, ex);
208             }
209         }
210         return doc;
211     }
212
213     private static void checkRootUrl(String JavaDoc url) {
214         // Sanity check
215
// Get the last part of the root url
216
int slash = url.indexOf('/', 8);
217         if (slash > 0) {
218             String JavaDoc path = url.substring(slash);
219             String JavaDoc beforeLastPart = "";
220             String JavaDoc lastPart = null;
221             StringTokenizer JavaDoc st = new StringTokenizer JavaDoc(path, "/", false);
222             while (st.hasMoreTokens()) {
223                 if (lastPart != null) {
224                     beforeLastPart += "/" + lastPart;
225                 }
226                 lastPart = st.nextToken();
227             }
228             if (lastPart != null) {
229                 lastPart = lastPart.toLowerCase();
230                 if (beforeLastPart.length() > 0 && lastPart.indexOf("cvs") < 0 && lastPart.indexOf(".") < 0
231                         && lastPart.indexOf("source") < 0 && lastPart.indexOf("src") < 0
232                         && lastPart.indexOf("browse") < 0) {
233                     CVSGrab.getLog().warn("The root url " + url + " doesn't seem valid");
234                     String JavaDoc newRootUrl = url.substring(0, slash) + beforeLastPart;
235                     CVSGrab.getLog().warn("Try " + newRootUrl + " as the root url instead");
236                 }
237             }
238         }
239     }
240
241     /**
242      * For test purposes
243      */

244     public static void registerDocument(String JavaDoc url, Document JavaDoc doc) {
245         documents.put(url, doc);
246     }
247
248     private String JavaDoc _versionTag;
249     private String JavaDoc _queryParams;
250     private CVSGrab _grabber;
251
252     /**
253      * Constructor for CvsWebInterface
254      *
255      */

256     public CvsWebInterface(CVSGrab grabber) {
257         super();
258         _grabber = grabber;
259     }
260
261     public CVSGrab getGrabber() {
262         return _grabber;
263     }
264     
265     /**
266      * Returns true if there is a rule that matches this web interface to the given url
267      * @param rootUrl The root url
268      * @param packagePath The package path
269      */

270     public boolean presetMatch(String JavaDoc rootUrl, String JavaDoc packagePath) {
271         return false;
272     }
273
274     /**
275      * Validate that this web interface can be used on the remote repository
276      * @param errors A list of errors to fill if any error is found
277      * @return true if this interface can work on the remote repository
278      */

279     public boolean validate(List JavaDoc errors) {
280         // Perform no checks on known repositories. Just hope that those web sites don't change too often ;-)
281
if (presetMatch(_grabber.getRootUrl(), _grabber.getPackagePath())) {
282             return true;
283         }
284         Document JavaDoc doc = null;
285         String JavaDoc[] urls = new String JavaDoc[] {getBaseUrl(), getAltBaseUrl()};
286         for (int j = 0; j < urls.length; j++) {
287             String JavaDoc url = urls[j];
288             if (url == null) {
289                 continue;
290             }
291             try {
292                 CVSGrab.getLog().debug(getId() + ": Loading for validation " + url);
293                 doc = loadDocument(url);
294                 if (doc == null) {
295                     errors.add(getId() + " tried to match page " + url + " but page doesn't exist");
296                     continue;
297                 }
298
299                 detect(doc);
300                 return true;
301
302             } catch (DetectException ex) {
303                 // ignore
304
CVSGrab.getLog().debug(getId() + " doesn't match, cause is " + ex.toString());
305                 errors.add(getId() + " tried to match page " + url + " but found error " + ex.getMessage());
306             } catch (RuntimeException JavaDoc ex) {
307                 // ignore
308
CVSGrab.getLog().debug(getId() + " doesn't match, cause is " + ex.toString());
309                 errors.add(getId() + " tried to match page " + url + " but found error " + ex.getMessage());
310             }
311         }
312         return false;
313     }
314
315     /**
316      * @return the version tag
317      */

318     public String JavaDoc getVersionTag() {
319         return _versionTag;
320     }
321
322     /**
323      * Sets the version tag
324      * @param versionTag
325      */

326     public void setVersionTag(String JavaDoc versionTag) {
327         this._versionTag = versionTag;
328     }
329
330     /**
331      * @return the queryParams.
332      */

333     public String JavaDoc getQueryParams() {
334         return _queryParams;
335     }
336
337     /**
338      * Sets the additional query parameters
339      * @param params
340      */

341     public void setQueryParams(String JavaDoc params) {
342         _queryParams = params;
343     }
344
345     /**
346      * Initialize the web interface
347      *
348      * @throws Exception if initialisation fails
349      */

350     public abstract void init() throws Exception JavaDoc;
351
352     /**
353      * Detects if the web page is compatible with this web interface, and if yes initialize it.
354      * @param htmlPage The web page
355      *
356      * @throws MarkerNotFoundException if the version marker for the web interface was not found.
357      * @throws InvalidVersionException if the version detected is incompatible with the version supported by this web interface.
358      * @throws IncompatibleInterfaceException if the web page is not compatible with this type of web interface
359      */

360     public abstract void detect(Document JavaDoc htmlPage) throws MarkerNotFoundException, InvalidVersionException;
361
362     /**
363      * @return the id identifying the web interface, and used for initialisation
364      */

365     public abstract String JavaDoc getId();
366
367     /**
368      * @return the type of the web interface as detected from the actual website
369      */

370     public abstract String JavaDoc getType();
371
372     /**
373      * @return the base url to use when trying to auto-detect this type of web interface
374      */

375     public abstract String JavaDoc getBaseUrl();
376
377     /**
378      * @return an alternate base url to use when trying to auto-detect this type of web interface
379      */

380     public String JavaDoc getAltBaseUrl() {
381         return null;
382     }
383
384     /**
385      * @param rootUrl
386      * @param directoryName
387      * @return the url to use to access the contents of the repository
388      */

389     public abstract String JavaDoc getDirectoryUrl(String JavaDoc rootUrl, String JavaDoc directoryName);
390
391     /**
392      * @param doc
393      * @return
394      */

395     public abstract RemoteFile[] getFiles(Document JavaDoc doc);
396
397     /**
398      * @param doc
399      * @return
400      */

401     public abstract String JavaDoc[] getDirectories(Document JavaDoc doc);
402
403     /**
404      * @param file
405      * @return
406      */

407     public abstract String JavaDoc getDownloadUrl(RemoteFile file);
408
409     /**
410      * Guess the web properties frmo the full url
411      * @param fullUrl the full url
412      * @return a list of Properties guessed from the full url, or an empty property list if no match was possible
413      */

414     public abstract Properties JavaDoc guessWebProperties(String JavaDoc fullUrl);
415
416 }
417
Popular Tags