KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > nbbuild > CheckLinks


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.nbbuild;
21
22 import java.io.*;
23 import java.net.*;
24 import java.util.*;
25 import java.util.regex.*;
26
27 import org.apache.tools.ant.BuildException;
28 import org.apache.tools.ant.FileScanner;
29 import org.apache.tools.ant.Project;
30 import org.apache.tools.ant.Task;
31 import org.apache.tools.ant.taskdefs.MatchingTask;
32
33 import org.apache.tools.ant.types.Mapper;
34
35 // XXX in Ant 1.6, permit <xmlcatalog> entries to make checking of "external" links
36
// work better in the case of cross-links between APIs
37

38 /** Task to check for broken links in HTML.
39  * Note that this is a matching task and you must give it a list of things to match.
40  * The Java VM's configured HTTP proxy will be used (${http.proxyHost} and ${http.proxyPort}).
41  * @author Jesse Glick
42  */

43 public class CheckLinks extends MatchingTask {
44
45     private File basedir;
46     private boolean checkexternal = true;
47     private boolean checkspaces = true;
48     private boolean checkforbidden = true;
49     private List<Mapper> mappers = new LinkedList<Mapper>();
50     private boolean failOnError;
51     private List<Filter> filters = new ArrayList<Filter>();
52
53     /** Set whether to check external links (absolute URLs).
54      * Local relative links are always checked.
55      * By default, external links are checked.
56      */

57     public void setCheckexternal (boolean ce) {
58         checkexternal = ce;
59     }
60     
61     /** False if spaces in URLs shall not be reported. Default to true.
62      */

63     public void setCheckspaces (boolean s) {
64         checkspaces = s;
65     }
66
67     /** Allows to disable check for forbidden links.
68      */

69     public void setCheckforbidden(boolean s) {
70         checkforbidden = s;
71     }
72     
73     /** Set to true, if you want the build to fail if a url is wrong.
74      */

75     public void setFailOnError (boolean f) {
76         failOnError = f;
77     }
78
79     /** Set the base directory from which to scan files.
80      */

81     public void setBasedir (File basedir) {
82         this.basedir = basedir;
83     }
84     
85     public Filter createFilter () {
86         Filter f = new Filter ();
87         filters.add (f);
88         return f;
89     }
90
91     /**
92      * Add a mapper to translate file names to the "originals".
93      */

94     public Mapper createMapper() {
95         Mapper m = new Mapper(getProject());
96         mappers.add(m);
97         return m;
98     }
99
100     public void execute () throws BuildException {
101         if (basedir == null) throw new BuildException ("Must specify the basedir attribute");
102         FileScanner scanner = getDirectoryScanner (basedir);
103         scanner.scan ();
104         String JavaDoc message = "Scanning for broken links in " + basedir + " ...";
105         if (! checkexternal) message += " (external URLs will be skipped)";
106         log (message);
107         String JavaDoc[] files = scanner.getIncludedFiles ();
108         Set<URI> okurls = new HashSet<URI>(1000);
109         Set<URI> badurls = new HashSet<URI>(100);
110         Set<URI> cleanurls = new HashSet<URI>(100);
111         for (int i = 0; i < files.length; i++) {
112             File file = new File (basedir, files[i]);
113             URI fileurl = file.toURI();
114             log ("Scanning " + file, Project.MSG_VERBOSE);
115             try {
116                 scan(this, getLocation().toString(), "", fileurl, okurls, badurls, cleanurls, checkexternal, checkspaces, checkforbidden, 1, mappers, filters);
117             } catch (IOException ioe) {
118                 throw new BuildException("Could not scan " + file + ": " + ioe, ioe, getLocation());
119             }
120         }
121         
122         if (failOnError && !badurls.isEmpty ()) {
123             throw new BuildException ("There were broken links");
124         }
125     }
126     
127     private static Pattern hrefOrAnchor = Pattern.compile("<(a|img)(\\s+shape=\"rect\")?\\s+(href|name|src)=\"([^\"#]*)(#[^\"]+)?\"(\\s+shape=\"rect\")?>", Pattern.CASE_INSENSITIVE);
128     private static Pattern lineBreak = Pattern.compile("^", Pattern.MULTILINE);
129     
130     /**
131      * Scan for broken links.
132      * @param task an Ant task to associate with this
133      * @param referrer the referrer file path (or full URL if not file:)
134      * @param referrerLocation the location in the referrer, e.g. ":38:12", or "" if unavailable
135      * @param u the URI to check
136      * @param okurls a set of URIs known to be fully checked (including all anchored variants etc.)
137      * @param badurls a set of URIs known to be bogus
138      * @param cleanurls a set of (base) URIs known to have had their contents checked
139      * @param checkexternal if true, check external links (all protocols besides file:)
140      * @param recurse one of:
141      * 0 - just check that it can be opened;
142      * 1 - check also that any links from it can be opened;
143      * 2 - recurse
144      * @param mappers a list of Mappers to apply to get source files from HTML files
145      */

146     public static void scan(Task task, String JavaDoc referrer, String JavaDoc referrerLocation, URI u, Set<URI> okurls, Set<URI> badurls, Set<URI> cleanurls, boolean checkexternal, boolean checkspaces, boolean checkforbidden, int recurse, List<Mapper> mappers) throws IOException {
147         scan (task, referrer, referrerLocation, u, okurls, badurls, cleanurls, checkexternal, checkspaces, checkforbidden, recurse, mappers, Collections.<Filter>emptyList());
148     }
149     
150     private static void scan(Task task, String JavaDoc referrer, String JavaDoc referrerLocation, URI u, Set<URI> okurls, Set<URI> badurls, Set<URI> cleanurls, boolean checkexternal, boolean checkspaces, boolean checkforbidden, int recurse, List<Mapper> mappers, List<Filter> filters) throws IOException {
151         //task.log("scan: u=" + u + " referrer=" + referrer + " okurls=" + okurls + " badurls=" + badurls + " cleanurls=" + cleanurls + " recurse=" + recurse, Project.MSG_DEBUG);
152
if (okurls.contains(u) && recurse == 0) {
153             // Yes it is OK.
154
return;
155         }
156         String JavaDoc b = u.toString();
157         int i = b.lastIndexOf('#');
158         if (i != -1) {
159             b = b.substring(0, i);
160         }
161         URI base;
162         try {
163             base = new URI(u.getScheme(), u.getUserInfo(), u.getHost(), u.getPort(), u.getPath(), u.getQuery(), /*fragment*/null);
164         } catch (URISyntaxException e) {
165             throw new Error JavaDoc(e);
166         }
167         String JavaDoc frag = u.getFragment();
168         String JavaDoc basepath = base.toString();
169         if ("file".equals(base.getScheme())) {
170             try {
171                 basepath = new File(base).getAbsolutePath();
172             } catch (IllegalArgumentException JavaDoc e) {
173                 task.log(normalize(referrer, mappers) + referrerLocation + ": malformed URL: " + base + " (" + e.getLocalizedMessage() + ")", Project.MSG_WARN);
174             }
175         }
176         //task.log("scan: base=" + base + " frag=" + frag, Project.MSG_DEBUG);
177
if (badurls.contains(u) || badurls.contains(base)) {
178             task.log(normalize(referrer, mappers) + referrerLocation + ": broken link (already reported): " + u, Project.MSG_WARN);
179             return;
180         }
181
182         if (checkforbidden) {
183             for (Filter f : filters) {
184                 Boolean JavaDoc decision = f.isOk (u);
185                 if (Boolean.TRUE.equals (decision)) {
186                     break;
187                 }
188                 if (Boolean.FALSE.equals (decision)) {
189                     task.log(normalize(referrer, mappers) + referrerLocation + ": forbidden link: " + base, Project.MSG_WARN);
190                     badurls.add(base);
191                     badurls.add(u);
192                     return;
193                 }
194             }
195         }
196         
197         if (! checkexternal && ! "file".equals(u.getScheme())) {
198             task.log("Skipping external link: " + base, Project.MSG_VERBOSE);
199             cleanurls.add(base);
200             okurls.add(base);
201             okurls.add(u);
202             return;
203         }
204         
205         task.log("Checking " + u + " (recursion level " + recurse + ")", Project.MSG_VERBOSE);
206         String JavaDoc content;
207         String JavaDoc mimeType;
208         try {
209             // XXX for protocol 'file', could more efficiently use a memmapped char buffer
210
URLConnection conn = base.toURL().openConnection ();
211             conn.connect ();
212             mimeType = conn.getContentType ();
213             InputStream is = conn.getInputStream ();
214             String JavaDoc enc = conn.getContentEncoding();
215             if (enc == null) {
216                 enc = "UTF-8";
217             }
218             try {
219                 ByteArrayOutputStream baos = new ByteArrayOutputStream();
220                 int read;
221                 byte[] buf = new byte[4096];
222                 while ((read = is.read(buf)) != -1) {
223                     baos.write(buf, 0, read);
224                 }
225                 content = baos.toString(enc);
226             } finally {
227                 is.close();
228             }
229         } catch (IOException ioe) {
230             task.log(normalize(referrer, mappers) + referrerLocation + ": broken link: " + base, Project.MSG_WARN);
231             task.log("Error: " + ioe, Project.MSG_VERBOSE);
232             badurls.add(base);
233             badurls.add(u);
234             return;
235         }
236         okurls.add(base);
237         // map from other URIs (hrefs) to line/col info where they occur in this file (format: ":1:2")
238
Map<URI,String JavaDoc> others = null;
239         if (recurse > 0 && cleanurls.add(base)) {
240             others = new HashMap<URI,String JavaDoc>(100);
241         }
242             if (recurse == 0 && frag == null) {
243                 // That is all we wanted to check.
244
return;
245             }
246             if ("text/html".equals(mimeType)) {
247                 task.log("Parsing " + base, Project.MSG_VERBOSE);
248                 Matcher m = hrefOrAnchor.matcher(content);
249                 Set<String JavaDoc> names = new HashSet<String JavaDoc>(100);
250                 while (m.find()) {
251                     // Get the stuff involved:
252
String JavaDoc type = m.group(3);
253                     if (type.equalsIgnoreCase("name")) {
254                         // We have an anchor, therefore refs to it are valid.
255
String JavaDoc name = unescape(m.group(4));
256                         if (names.add(name)) {
257                             try {
258                                 okurls.add(new URI(base.getScheme(), base.getUserInfo(), base.getHost(), base.getPort(), base.getPath(), base.getQuery(), /*fragment*/name));
259                             } catch (URISyntaxException e) {
260                                 task.log(normalize(basepath, mappers) + findLocation(content, m.start(4)) + ": bad anchor name: " + e.getMessage(), Project.MSG_WARN);
261                             }
262                         } else if (recurse == 1) {
263                             task.log(normalize(basepath, mappers) + findLocation(content, m.start(4)) + ": duplicate anchor name: " + name, Project.MSG_WARN);
264                         }
265                     } else {
266                         // A link to some other document: HREF=, SRC=.
267

268                         // check whether this URL is not commented out
269
int previousCommentStart = content.lastIndexOf ("<!--", m.start (0));
270                         int previousCommentEnd = content.lastIndexOf ("-->", m.start (0));
271                         boolean commentedOut = false;
272                         if (previousCommentEnd < previousCommentStart) {
273                             // comment start is there and end is before it
274
commentedOut = true;
275                         }
276                         
277                         if (others != null && !commentedOut) {
278                             String JavaDoc otherbase = unescape(m.group(4));
279                             String JavaDoc otheranchor = unescape(m.group(5));
280                             String JavaDoc uri = (otheranchor == null) ? otherbase : otherbase + otheranchor;
281                             String JavaDoc location = findLocation(content, m.start(4));
282                             String JavaDoc fixedUri;
283                             if (uri.indexOf(' ') != -1) {
284                                 fixedUri = uri.replaceAll(" ", "%20");
285                                 if (checkspaces) {
286                                     task.log(normalize(basepath, mappers) + location + ": spaces in URIs should be encoded as \"%20\": " + uri, Project.MSG_WARN);
287                                 }
288                             } else {
289                                 fixedUri = uri;
290                             }
291                             try {
292                                 URI relUri = new URI(fixedUri);
293                                 if (!relUri.isOpaque()) {
294                                     URI o = base.resolve(relUri).normalize();
295                                     //task.log("href: " + o);
296
if (!others.containsKey(o)) {
297                                         // Only keep location info for first reference.
298
others.put(o, location);
299                                     }
300                                 } // else mailto: or similar
301
} catch (URISyntaxException e) {
302                                 // Message should contain the URI.
303
task.log(normalize(basepath, mappers) + location + ": bad relative URI: " + e.getMessage(), Project.MSG_WARN);
304                             }
305                         } // else we are only checking that this one has right anchors
306
}
307                 }
308             } else {
309                 task.log("Not checking contents of " + base, Project.MSG_VERBOSE);
310             }
311         if (! okurls.contains(u)) {
312             task.log(normalize(referrer, mappers) + referrerLocation + ": broken link: " + u, Project.MSG_WARN);
313         }
314         if (others != null) {
315             Iterator it = others.entrySet().iterator();
316             while (it.hasNext()) {
317                 Map.Entry entry = (Map.Entry)it.next();
318                 URI other = (URI)entry.getKey();
319                 String JavaDoc location = (String JavaDoc)entry.getValue();
320                 scan(task, basepath, location, other, okurls, badurls, cleanurls, checkexternal, checkspaces, checkforbidden, recurse == 1 ? 0 : 2, mappers, filters);
321             }
322         }
323     }
324     
325     private static String JavaDoc normalize(String JavaDoc path, List<Mapper> mappers) throws IOException {
326         try {
327             for (Mapper m : mappers) {
328                 String JavaDoc[] nue = m.getImplementation().mapFileName(path);
329                 if (nue != null) {
330                     for (int i = 0; i < nue.length; i++) {
331                         File f = new File(nue[i]);
332                         if (f.isFile()) {
333                             return new File(f.toURI().normalize()).getAbsolutePath();
334                         }
335                     }
336                 }
337             }
338             return path;
339         } catch (BuildException e) {
340             throw new IOException(e.toString());
341         }
342     }
343     
344     private static String JavaDoc unescape(String JavaDoc text) {
345         if (text == null) {
346             return null;
347         }
348         int pos = 0;
349         int search;
350         while ((search = text.indexOf('&', pos)) != -1) {
351             int semi = text.indexOf(';', search + 1);
352             if (semi == -1) {
353                 // Unterminated &... leave rest as is??
354
return text;
355             }
356             String JavaDoc entity = text.substring(search + 1, semi);
357             String JavaDoc repl;
358             if (entity.equals("amp")) {
359                 repl = "&";
360             } else if (entity.equals("quot")) {
361                 repl = "\"";
362             } else if (entity.equals("lt")) {
363                 repl = "<";
364             } else if (entity.equals("gt")) {
365                 repl = ">";
366             } else if (entity.equals("apos")) {
367                 repl = "'";
368             } else {
369                 // ???
370
pos = semi + 1;
371                 continue;
372             }
373             text = text.substring(0, search) + repl + text.substring(semi + 1);
374             pos = search + repl.length();
375         }
376         return text;
377     }
378     
379     private static String JavaDoc findLocation(CharSequence JavaDoc content, int pos) {
380         Matcher lbm = lineBreak.matcher(content);
381         int line = 0;
382         int col = 1;
383         while (lbm.find()) {
384             if (lbm.start() <= pos) {
385                 line++;
386                 col = pos - lbm.start() + 1;
387             } else {
388                 break;
389             }
390         }
391         return ":" + line + ":" + col;
392     }
393
394     public final class Filter extends Object JavaDoc {
395         private Boolean JavaDoc accept;
396         private Pattern pattern;
397         
398         public void setAccept (boolean a) {
399             accept = Boolean.valueOf (a);
400         }
401         
402         public void setPattern (String JavaDoc s) {
403             pattern = Pattern.compile (s, Pattern.CASE_INSENSITIVE);
404         }
405         
406         /** Checks whether a URI is ok.
407          * @return null if not applicable, Boolean.TRUE if the URL is accepted, Boolean.FALSE if not
408          */

409         final Boolean JavaDoc isOk (URI u) throws BuildException {
410             if (accept == null) {
411                 throw new BuildException ("Each filter must have accept attribute");
412             }
413             if (pattern == null) {
414                 throw new BuildException ("Each filter must have pattern attribute");
415             }
416             
417             if (pattern.matcher (u.toString ()).matches ()) {
418                 log ("Matched " + u + " accepted: " + accept, org.apache.tools.ant.Project.MSG_VERBOSE);
419                 return accept;
420             }
421             return null;
422         }
423     }
424 }
425
Popular Tags