1 19 20 package org.netbeans.nbbuild; 21 22 import java.io.*; 23 import java.net.*; 24 import java.util.*; 25 import java.util.regex.*; 26 27 import org.apache.tools.ant.BuildException; 28 import org.apache.tools.ant.FileScanner; 29 import org.apache.tools.ant.Project; 30 import org.apache.tools.ant.Task; 31 import org.apache.tools.ant.taskdefs.MatchingTask; 32 33 import org.apache.tools.ant.types.Mapper; 34 35 38 43 public class CheckLinks extends MatchingTask { 44 45 private File basedir; 46 private boolean checkexternal = true; 47 private boolean checkspaces = true; 48 private boolean checkforbidden = true; 49 private List<Mapper> mappers = new LinkedList<Mapper>(); 50 private boolean failOnError; 51 private List<Filter> filters = new ArrayList<Filter>(); 52 53 57 public void setCheckexternal (boolean ce) { 58 checkexternal = ce; 59 } 60 61 63 public void setCheckspaces (boolean s) { 64 checkspaces = s; 65 } 66 67 69 public void setCheckforbidden(boolean s) { 70 checkforbidden = s; 71 } 72 73 75 public void setFailOnError (boolean f) { 76 failOnError = f; 77 } 78 79 81 public void setBasedir (File basedir) { 82 this.basedir = basedir; 83 } 84 85 public Filter createFilter () { 86 Filter f = new Filter (); 87 filters.add (f); 88 return f; 89 } 90 91 94 public Mapper createMapper() { 95 Mapper m = new Mapper(getProject()); 96 mappers.add(m); 97 return m; 98 } 99 100 public void execute () throws BuildException { 101 if (basedir == null) throw new BuildException ("Must specify the basedir attribute"); 102 FileScanner scanner = getDirectoryScanner (basedir); 103 scanner.scan (); 104 String message = "Scanning for broken links in " + basedir + " ..."; 105 if (! checkexternal) message += " (external URLs will be skipped)"; 106 log (message); 107 String [] files = scanner.getIncludedFiles (); 108 Set<URI> okurls = new HashSet<URI>(1000); 109 Set<URI> badurls = new HashSet<URI>(100); 110 Set<URI> cleanurls = new HashSet<URI>(100); 111 for (int i = 0; i < files.length; i++) { 112 File file = new File (basedir, files[i]); 113 URI fileurl = file.toURI(); 114 log ("Scanning " + file, Project.MSG_VERBOSE); 115 try { 116 scan(this, getLocation().toString(), "", fileurl, okurls, badurls, cleanurls, checkexternal, checkspaces, checkforbidden, 1, mappers, filters); 117 } catch (IOException ioe) { 118 throw new BuildException("Could not scan " + file + ": " + ioe, ioe, getLocation()); 119 } 120 } 121 122 if (failOnError && !badurls.isEmpty ()) { 123 throw new BuildException ("There were broken links"); 124 } 125 } 126 127 private static Pattern hrefOrAnchor = Pattern.compile("<(a|img)(\\s+shape=\"rect\")?\\s+(href|name|src)=\"([^\"#]*)(#[^\"]+)?\"(\\s+shape=\"rect\")?>", Pattern.CASE_INSENSITIVE); 128 private static Pattern lineBreak = Pattern.compile("^", Pattern.MULTILINE); 129 130 146 public static void scan(Task task, String referrer, String referrerLocation, URI u, Set<URI> okurls, Set<URI> badurls, Set<URI> cleanurls, boolean checkexternal, boolean checkspaces, boolean checkforbidden, int recurse, List<Mapper> mappers) throws IOException { 147 scan (task, referrer, referrerLocation, u, okurls, badurls, cleanurls, checkexternal, checkspaces, checkforbidden, recurse, mappers, Collections.<Filter>emptyList()); 148 } 149 150 private static void scan(Task task, String referrer, String referrerLocation, URI u, Set<URI> okurls, Set<URI> badurls, Set<URI> cleanurls, boolean checkexternal, boolean checkspaces, boolean checkforbidden, int recurse, List<Mapper> mappers, List<Filter> filters) throws IOException { 151 if (okurls.contains(u) && recurse == 0) { 153 return; 155 } 156 String b = u.toString(); 157 int i = b.lastIndexOf('#'); 158 if (i != -1) { 159 b = b.substring(0, i); 160 } 161 URI base; 162 try { 163 base = new URI(u.getScheme(), u.getUserInfo(), u.getHost(), u.getPort(), u.getPath(), u.getQuery(), null); 164 } catch (URISyntaxException e) { 165 throw new Error (e); 166 } 167 String frag = u.getFragment(); 168 String basepath = base.toString(); 169 if ("file".equals(base.getScheme())) { 170 try { 171 basepath = new File(base).getAbsolutePath(); 172 } catch (IllegalArgumentException e) { 173 task.log(normalize(referrer, mappers) + referrerLocation + ": malformed URL: " + base + " (" + e.getLocalizedMessage() + ")", Project.MSG_WARN); 174 } 175 } 176 if (badurls.contains(u) || badurls.contains(base)) { 178 task.log(normalize(referrer, mappers) + referrerLocation + ": broken link (already reported): " + u, Project.MSG_WARN); 179 return; 180 } 181 182 if (checkforbidden) { 183 for (Filter f : filters) { 184 Boolean decision = f.isOk (u); 185 if (Boolean.TRUE.equals (decision)) { 186 break; 187 } 188 if (Boolean.FALSE.equals (decision)) { 189 task.log(normalize(referrer, mappers) + referrerLocation + ": forbidden link: " + base, Project.MSG_WARN); 190 badurls.add(base); 191 badurls.add(u); 192 return; 193 } 194 } 195 } 196 197 if (! checkexternal && ! "file".equals(u.getScheme())) { 198 task.log("Skipping external link: " + base, Project.MSG_VERBOSE); 199 cleanurls.add(base); 200 okurls.add(base); 201 okurls.add(u); 202 return; 203 } 204 205 task.log("Checking " + u + " (recursion level " + recurse + ")", Project.MSG_VERBOSE); 206 String content; 207 String mimeType; 208 try { 209 URLConnection conn = base.toURL().openConnection (); 211 conn.connect (); 212 mimeType = conn.getContentType (); 213 InputStream is = conn.getInputStream (); 214 String enc = conn.getContentEncoding(); 215 if (enc == null) { 216 enc = "UTF-8"; 217 } 218 try { 219 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 220 int read; 221 byte[] buf = new byte[4096]; 222 while ((read = is.read(buf)) != -1) { 223 baos.write(buf, 0, read); 224 } 225 content = baos.toString(enc); 226 } finally { 227 is.close(); 228 } 229 } catch (IOException ioe) { 230 task.log(normalize(referrer, mappers) + referrerLocation + ": broken link: " + base, Project.MSG_WARN); 231 task.log("Error: " + ioe, Project.MSG_VERBOSE); 232 badurls.add(base); 233 badurls.add(u); 234 return; 235 } 236 okurls.add(base); 237 Map<URI,String > others = null; 239 if (recurse > 0 && cleanurls.add(base)) { 240 others = new HashMap<URI,String >(100); 241 } 242 if (recurse == 0 && frag == null) { 243 return; 245 } 246 if ("text/html".equals(mimeType)) { 247 task.log("Parsing " + base, Project.MSG_VERBOSE); 248 Matcher m = hrefOrAnchor.matcher(content); 249 Set<String > names = new HashSet<String >(100); 250 while (m.find()) { 251 String type = m.group(3); 253 if (type.equalsIgnoreCase("name")) { 254 String name = unescape(m.group(4)); 256 if (names.add(name)) { 257 try { 258 okurls.add(new URI(base.getScheme(), base.getUserInfo(), base.getHost(), base.getPort(), base.getPath(), base.getQuery(), name)); 259 } catch (URISyntaxException e) { 260 task.log(normalize(basepath, mappers) + findLocation(content, m.start(4)) + ": bad anchor name: " + e.getMessage(), Project.MSG_WARN); 261 } 262 } else if (recurse == 1) { 263 task.log(normalize(basepath, mappers) + findLocation(content, m.start(4)) + ": duplicate anchor name: " + name, Project.MSG_WARN); 264 } 265 } else { 266 268 int previousCommentStart = content.lastIndexOf ("<!--", m.start (0)); 270 int previousCommentEnd = content.lastIndexOf ("-->", m.start (0)); 271 boolean commentedOut = false; 272 if (previousCommentEnd < previousCommentStart) { 273 commentedOut = true; 275 } 276 277 if (others != null && !commentedOut) { 278 String otherbase = unescape(m.group(4)); 279 String otheranchor = unescape(m.group(5)); 280 String uri = (otheranchor == null) ? otherbase : otherbase + otheranchor; 281 String location = findLocation(content, m.start(4)); 282 String fixedUri; 283 if (uri.indexOf(' ') != -1) { 284 fixedUri = uri.replaceAll(" ", "%20"); 285 if (checkspaces) { 286 task.log(normalize(basepath, mappers) + location + ": spaces in URIs should be encoded as \"%20\": " + uri, Project.MSG_WARN); 287 } 288 } else { 289 fixedUri = uri; 290 } 291 try { 292 URI relUri = new URI(fixedUri); 293 if (!relUri.isOpaque()) { 294 URI o = base.resolve(relUri).normalize(); 295 if (!others.containsKey(o)) { 297 others.put(o, location); 299 } 300 } } catch (URISyntaxException e) { 302 task.log(normalize(basepath, mappers) + location + ": bad relative URI: " + e.getMessage(), Project.MSG_WARN); 304 } 305 } } 307 } 308 } else { 309 task.log("Not checking contents of " + base, Project.MSG_VERBOSE); 310 } 311 if (! okurls.contains(u)) { 312 task.log(normalize(referrer, mappers) + referrerLocation + ": broken link: " + u, Project.MSG_WARN); 313 } 314 if (others != null) { 315 Iterator it = others.entrySet().iterator(); 316 while (it.hasNext()) { 317 Map.Entry entry = (Map.Entry)it.next(); 318 URI other = (URI)entry.getKey(); 319 String location = (String )entry.getValue(); 320 scan(task, basepath, location, other, okurls, badurls, cleanurls, checkexternal, checkspaces, checkforbidden, recurse == 1 ? 0 : 2, mappers, filters); 321 } 322 } 323 } 324 325 private static String normalize(String path, List<Mapper> mappers) throws IOException { 326 try { 327 for (Mapper m : mappers) { 328 String [] nue = m.getImplementation().mapFileName(path); 329 if (nue != null) { 330 for (int i = 0; i < nue.length; i++) { 331 File f = new File(nue[i]); 332 if (f.isFile()) { 333 return new File(f.toURI().normalize()).getAbsolutePath(); 334 } 335 } 336 } 337 } 338 return path; 339 } catch (BuildException e) { 340 throw new IOException(e.toString()); 341 } 342 } 343 344 private static String unescape(String text) { 345 if (text == null) { 346 return null; 347 } 348 int pos = 0; 349 int search; 350 while ((search = text.indexOf('&', pos)) != -1) { 351 int semi = text.indexOf(';', search + 1); 352 if (semi == -1) { 353 return text; 355 } 356 String entity = text.substring(search + 1, semi); 357 String repl; 358 if (entity.equals("amp")) { 359 repl = "&"; 360 } else if (entity.equals("quot")) { 361 repl = "\""; 362 } else if (entity.equals("lt")) { 363 repl = "<"; 364 } else if (entity.equals("gt")) { 365 repl = ">"; 366 } else if (entity.equals("apos")) { 367 repl = "'"; 368 } else { 369 pos = semi + 1; 371 continue; 372 } 373 text = text.substring(0, search) + repl + text.substring(semi + 1); 374 pos = search + repl.length(); 375 } 376 return text; 377 } 378 379 private static String findLocation(CharSequence content, int pos) { 380 Matcher lbm = lineBreak.matcher(content); 381 int line = 0; 382 int col = 1; 383 while (lbm.find()) { 384 if (lbm.start() <= pos) { 385 line++; 386 col = pos - lbm.start() + 1; 387 } else { 388 break; 389 } 390 } 391 return ":" + line + ":" + col; 392 } 393 394 public final class Filter extends Object { 395 private Boolean accept; 396 private Pattern pattern; 397 398 public void setAccept (boolean a) { 399 accept = Boolean.valueOf (a); 400 } 401 402 public void setPattern (String s) { 403 pattern = Pattern.compile (s, Pattern.CASE_INSENSITIVE); 404 } 405 406 409 final Boolean isOk (URI u) throws BuildException { 410 if (accept == null) { 411 throw new BuildException ("Each filter must have accept attribute"); 412 } 413 if (pattern == null) { 414 throw new BuildException ("Each filter must have pattern attribute"); 415 } 416 417 if (pattern.matcher (u.toString ()).matches ()) { 418 log ("Matched " + u + " accepted: " + accept, org.apache.tools.ant.Project.MSG_VERBOSE); 419 return accept; 420 } 421 return null; 422 } 423 } 424 } 425 | Popular Tags |