1 23 package org.openharmonise.rm.resources.content.utils; 24 25 26 import java.io.IOException ; 27 import java.net.*; 28 import java.text.*; 29 import java.util.*; 30 import java.util.logging.*; 31 import java.util.regex.*; 32 33 import javax.xml.parsers.*; 34 35 import org.openharmonise.rm.DataAccessException; 36 import org.openharmonise.rm.resources.content.Asset; 37 import org.w3c.dom.*; 38 39 72 public class LinkChecker { 73 private List assetsToCheck; 74 75 private List errorsList; 77 private boolean bURLsChecked = false; 81 private Date dateRun; 82 83 private DateFormat dateFormat; 84 85 private DateFormat xmlDateTimeFormat; 86 87 private Hashtable errorCodes; 89 92 private static final Logger m_logger = Logger.getLogger(LinkChecker.class 93 .getName()); 94 95 { 96 errorCodes = new Hashtable(); 97 errorCodes 98 .put( 99 new Integer (201), 100 "Following a POST command, this indicates success, but the textual part of the response line indicates the URI by which the newly created document should be known."); 101 errorCodes 102 .put( 103 new Integer (202), 104 "The request has been accepted for processing, but the processing has not been completed. The request may or may not eventually be acted upon, as it may be disallowed when processing actually takes place. there is no facility for status returns from asynchronous operations such as this."); 105 errorCodes 106 .put( 107 new Integer (203), 108 "When received in the response to a GET command, this indicates that the returned metainformation is not a definitive set of the object from a server with a copy of the object, but is from a private overlaid web. This may include annotation information about the object, for example."); 109 errorCodes 110 .put( 111 new Integer (204), 112 "Server has received the request but there is no information to send back, and the client should stay in the same document view. This is mainly to allow input for scripts without changing the document at the same time."); 113 errorCodes.put(new Integer (300), "Multiple Choices"); 114 errorCodes 115 .put(new Integer (301), 116 "The requested resource has been assigned the following new URL: "); 117 errorCodes.put(new Integer (302), 118 "The requested resource resides temporarily under the "); 119 errorCodes.put(new Integer (304), "304 Not Modified"); 120 errorCodes 121 .put(new Integer (305), 122 "The requested resource MUST be accessed through the proxy given by "); 123 errorCodes.put(new Integer (306), "306 (Unused)"); 124 errorCodes 125 .put(new Integer (307), 126 "The requested resource resides temporarily under the following URI: "); 127 errorCodes 128 .put(new Integer (400), 129 "The request had bad syntax or was inherently impossible to be satisfied."); 130 errorCodes 131 .put( 132 new Integer (401), 133 "The parameter to this message gives a specification of authorization schemes which are acceptable. The client should retry the request with a suitable Authorization header."); 134 errorCodes 135 .put( 136 new Integer (402), 137 "The parameter to this message gives a specification of charging schemes acceptable. The client may retry the request with a suitable ChargeTo header."); 138 errorCodes 139 .put(new Integer (403), 140 "The request is for something forbidden. Authorization will not help."); 141 errorCodes.put(new Integer (404), 142 "The server has not found anything matching the URI given"); 143 errorCodes 144 .put( 145 new Integer (500), 146 "The server encountered an unexpected condition which prevented it from fulfilling the request."); 147 errorCodes.put(new Integer (501), 148 "The server does not support the facility required."); 149 errorCodes 150 .put( 151 new Integer (502), 152 "The server cannot process the request due to a high load (whether HTTP servicing or other requests). The implication is that this is a temporary condition which maybe alleviated at other times."); 153 errorCodes 154 .put( 155 new Integer (503), 156 "This is equivalent to Internal Error 500, but in the case of a server which is in turn accessing some other service, this indicates that the respose from the other service did not return within a time that the gateway was prepared to wait. As from the point of view of the clientand the HTTP transaction the other service is hidden within the server, this maybe treated identically to Internal error 500, but has more diagnostic value."); 157 } 158 159 167 public LinkChecker(List assetsToCheck) { 168 if (assetsToCheck == null) { 169 throw new NullPointerException ("urlsToCheck was null"); 171 } 172 this.assetsToCheck = assetsToCheck; 173 errorsList = new ArrayList(); 174 dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm"); 175 xmlDateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); 176 } 177 178 205 public void checkLinks() { 206 Iterator it = assetsToCheck.iterator(); 208 Pattern pattern = Pattern.compile(".*://.*"); 209 HttpURLConnection.setFollowRedirects(false); 210 while (it.hasNext()) { 211 Asset asset = (Asset) it.next(); 212 String URLString = null; 213 try { 214 URLString = asset.getFullURL(); 215 } catch (DataAccessException dae) { 216 createError(asset, dae); 217 handleException(dae); 218 } 219 220 227 Matcher matcher = pattern.matcher(URLString); 228 if (!matcher.matches()) { 229 URLString = "http://" + URLString; 232 } 233 234 try { 235 236 URL url = new URL(URLString); 237 238 String sHost = url.getHost(); 239 240 if(sHost.indexOf(" ") >= 0 || sHost.indexOf(".") < 0) { 241 throw new MalformedURLException("'" + URLString + "' is a malformed URL"); 242 } 243 244 HttpURLConnection conn = (HttpURLConnection) url 245 .openConnection(); 246 int status = conn.getResponseCode(); 247 if (status != HttpURLConnection.HTTP_OK) { 248 createError(asset, conn); 249 } 250 251 } catch (MalformedURLException mue) { 252 createError(asset, mue); 253 } catch (IOException ioe) { 254 createError(asset, ioe); 255 handleException(ioe); 256 } catch (ClassCastException cce) { createError(asset, cce); 259 handleException(cce); 260 } 261 } 262 bURLsChecked = true; 263 dateRun = new Date(); 264 } 265 266 280 public String getReport() { if (bURLsChecked == false) { 283 throw new IllegalStateException ("URLs have not yet been checked"); 284 } 285 StringBuffer report = new StringBuffer (1500); 286 287 report.append("Link Checking Report\n"); 288 report.append("Run at: "); 289 report.append(dateFormat.format(dateRun)); 290 report.append("\n"); 291 292 if (errorsList.size() == 0) { report.append("No errors were detected"); 294 } else { Iterator errorsIt = errorsList.iterator(); 296 while (errorsIt.hasNext()) { 297 try { 298 report.append("\n"); 299 LinkStatus status = (LinkStatus) errorsIt.next(); 300 report.append(status.getAsset().getURI()); 301 report.append("\n"); 302 report.append(status.getErrorMessage()); 303 if (status.getNewURL() != null) { 304 report.append("\nNew Location: " + status.getNewURL()); 305 } 306 report.append("\n"); 307 } catch (DataAccessException e) { 308 m_logger.log(Level.WARNING, e.getLocalizedMessage(), e); 309 } 310 } 311 } 312 return report.toString(); 313 } 314 315 public Document getXMLReport() { 316 if (bURLsChecked == false) { 317 throw new IllegalStateException ("URLs have not yet been checked"); 318 } 319 Document doc = null; 320 321 try { 322 DocumentBuilderFactory factory = DocumentBuilderFactory 323 .newInstance(); 324 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 325 doc = docBuilder.newDocument(); 326 } catch (FactoryConfigurationError e) { 327 m_logger.log(Level.WARNING, e.getLocalizedMessage(), e); 328 } catch (ParserConfigurationException e) { 329 m_logger.log(Level.WARNING, e.getLocalizedMessage(), e); 330 } 331 332 Element reportElement = doc.createElement("ReportInstance"); 333 doc.appendChild(reportElement); 334 reportElement.setAttribute("date", xmlDateTimeFormat.format(dateRun)); 335 Element listElement = doc.createElement("List"); 336 reportElement.appendChild(listElement); 337 338 Iterator errorsIt = errorsList.iterator(); 339 while (errorsIt.hasNext()) { 340 try { 341 LinkStatus status = (LinkStatus) errorsIt.next(); 342 Element reportRowElement = doc.createElement("ReportRow"); 345 listElement.appendChild(reportRowElement); 346 347 Element objectElement = doc.createElement("Object"); 348 reportRowElement.appendChild(objectElement); 349 Asset asset = status.getAsset(); 351 Element nameElement = null; 352 Text nameTxt = null; 353 354 nameElement = doc.createElement("DisplayName"); 355 356 if (asset.getDisplayName() == null 357 || asset.getDisplayName().equals("")) { 358 nameTxt = doc.createTextNode(asset.getName()); 359 } else { 360 nameTxt = doc.createTextNode(asset.getDisplayName()); 361 } 362 363 nameElement.appendChild(nameTxt); 364 objectElement.appendChild(nameElement); 365 366 Element pathElement = doc.createElement("Path"); 367 objectElement.appendChild(pathElement); 368 Text pathTxt = doc.createTextNode(asset.getFullPath()); 369 pathElement.appendChild(pathTxt); 370 371 Element properiesElement = doc.createElement("Properties"); 372 reportRowElement.appendChild(properiesElement); 373 374 Element userElement = doc.createElement("User"); 375 reportRowElement.appendChild(userElement); 376 377 Element dateModifiedElement = doc.createElement("DateModified"); 378 reportRowElement.appendChild(dateModifiedElement); 379 380 Element actionElement = doc.createElement("Action"); 381 reportRowElement.appendChild(actionElement); 382 383 Element statusElement = doc.createElement("Status"); 384 reportRowElement.appendChild(statusElement); 385 Text statusTxt = doc.createTextNode(status.getErrorMessage()); 386 statusElement.appendChild(statusTxt); 387 } catch (DOMException e) { 388 m_logger.log(Level.WARNING, e.getLocalizedMessage(), e); 389 } catch (DataAccessException e) { 390 m_logger.log(Level.WARNING, e.getLocalizedMessage(), e); 391 } 392 } 393 394 return doc; 395 } 396 397 409 public List getErrorsList() { 410 if (bURLsChecked == false) { 411 throw new IllegalStateException ("URLs have not yet been checked"); 412 } else { 413 return Collections.unmodifiableList(errorsList); 414 } 415 } 416 417 private void handleException(Exception e) { 420 m_logger.log(Level.WARNING, e.getLocalizedMessage(), e); 421 } 422 423 438 private void createError(Asset asset, HttpURLConnection conn) { 439 LinkStatus status = null; 440 try { 441 status = new LinkStatus(asset, (String ) errorCodes.get(new Integer ( 443 conn.getResponseCode())) 444 + conn.getHeaderField("Location")); 445 if (conn.getResponseCode() >= 300 && conn.getResponseCode() < 308) { status.setNewURL(conn.getHeaderField("Location")); 449 } 450 } catch (IOException ioe) { 451 status = new LinkStatus(asset, ioe.getMessage()); 452 } 453 errorsList.add(status); 454 } 455 456 471 private void createError(Asset asset, Exception ex) { 472 String errorMessage = ex.getMessage(); 473 474 try { 475 if (ex instanceof UnknownHostException) { 476 errorMessage = "The host " + asset.getURI() + " is unknown."; 477 } 478 } catch (DataAccessException e) { 479 errorMessage = "There was a problem trying to get the resource URI."; 480 } 481 482 LinkStatus status = new LinkStatus(asset, errorMessage); 483 errorsList.add(status); 484 } 485 486 } 487 | Popular Tags |