1 21 22 package org.apache.commons.validator; 23 24 import java.io.Serializable ; 25 import java.util.Arrays ; 26 import java.util.HashSet ; 27 import java.util.Set ; 28 29 import org.apache.commons.validator.util.Flags; 30 import org.apache.oro.text.perl.Perl5Util; 31 32 77 public class UrlValidator implements Serializable { 78 79 83 public static final int ALLOW_ALL_SCHEMES = 1 << 0; 84 85 88 public static final int ALLOW_2_SLASHES = 1 << 1; 89 90 93 public static final int NO_FRAGMENTS = 1 << 2; 94 95 private static final String ALPHA_CHARS = "a-zA-Z"; 96 97 private static final String ALPHA_NUMERIC_CHARS = ALPHA_CHARS + "\\d"; 98 99 private static final String SPECIAL_CHARS = ";/@&=,.?:+$"; 100 101 private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]"; 102 103 private static final String SCHEME_CHARS = ALPHA_CHARS; 104 105 private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\."; 107 108 private static final String ATOM = VALID_CHARS + '+'; 109 110 113 private static final String URL_PATTERN = 114 "/^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?/"; 115 117 120 private static final int PARSE_URL_SCHEME = 2; 121 122 125 private static final int PARSE_URL_AUTHORITY = 4; 126 127 private static final int PARSE_URL_PATH = 5; 128 129 private static final int PARSE_URL_QUERY = 7; 130 131 private static final int PARSE_URL_FRAGMENT = 9; 132 133 136 private static final String SCHEME_PATTERN = "/^[" + SCHEME_CHARS + "]/"; 137 138 private static final String AUTHORITY_PATTERN = 139 "/^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?/"; 140 142 private static final int PARSE_AUTHORITY_HOST_IP = 1; 143 144 private static final int PARSE_AUTHORITY_PORT = 2; 145 146 149 private static final int PARSE_AUTHORITY_EXTRA = 3; 150 151 private static final String PATH_PATTERN = "/^(/[-\\w:@&?=+,.!/~*'%$]*)$/"; 152 153 private static final String QUERY_PATTERN = "/^(.*)$/"; 154 155 private static final String LEGAL_ASCII_PATTERN = "/^[\\000-\\177]+$/"; 156 157 private static final String IP_V4_DOMAIN_PATTERN = 158 "/^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$/"; 159 160 private static final String DOMAIN_PATTERN = 161 "/^" + ATOM + "(\\." + ATOM + ")*$/"; 162 163 private static final String PORT_PATTERN = "/^:(\\d{1,5})$/"; 164 165 private static final String ATOM_PATTERN = "/(" + ATOM + ")/"; 166 167 private static final String ALPHA_PATTERN = "/^[" + ALPHA_CHARS + "]/"; 168 169 172 private Flags options = null; 173 174 177 private Set allowedSchemes = new HashSet (); 178 179 182 protected String [] defaultSchemes = {"http", "https", "ftp"}; 183 184 187 public UrlValidator() { 188 this(null); 189 } 190 191 199 public UrlValidator(String [] schemes) { 200 this(schemes, 0); 201 } 202 203 209 public UrlValidator(int options) { 210 this(null, options); 211 } 212 213 220 public UrlValidator(String [] schemes, int options) { 221 this.options = new Flags(options); 222 223 if (this.options.isOn(ALLOW_ALL_SCHEMES)) { 224 return; 225 } 226 227 if (schemes == null) { 228 schemes = this.defaultSchemes; 229 } 230 231 this.allowedSchemes.addAll(Arrays.asList(schemes)); 232 } 233 234 241 public boolean isValid(String value) { 242 if (value == null) { 243 return false; 244 } 245 246 Perl5Util matchUrlPat = new Perl5Util(); 247 Perl5Util matchAsciiPat = new Perl5Util(); 248 249 if (!matchAsciiPat.match(LEGAL_ASCII_PATTERN, value)) { 250 return false; 251 } 252 253 if (!matchUrlPat.match(URL_PATTERN, value)) { 255 return false; 256 } 257 258 if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) { 259 return false; 260 } 261 262 if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) { 263 return false; 264 } 265 266 if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) { 267 return false; 268 } 269 270 if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) { 271 return false; 272 } 273 274 if (!isValidFragment(matchUrlPat.group(PARSE_URL_FRAGMENT))) { 275 return false; 276 } 277 278 return true; 279 } 280 281 289 protected boolean isValidScheme(String scheme) { 290 if (scheme == null) { 291 return false; 292 } 293 294 Perl5Util schemeMatcher = new Perl5Util(); 295 if (!schemeMatcher.match(SCHEME_PATTERN, scheme)) { 296 return false; 297 } 298 299 if (this.options.isOff(ALLOW_ALL_SCHEMES)) { 300 301 if (!this.allowedSchemes.contains(scheme)) { 302 return false; 303 } 304 } 305 306 return true; 307 } 308 309 313 protected boolean isValidAuthority(String authority) { 314 if (authority == null) { 315 return false; 316 } 317 318 Perl5Util authorityMatcher = new Perl5Util(); 319 Perl5Util matchIPV4Pat = new Perl5Util(); 320 321 if (!authorityMatcher.match(AUTHORITY_PATTERN, authority)) { 322 return false; 323 } 324 325 boolean ipV4Address = false; 326 boolean hostname = false; 327 String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP); 329 ipV4Address = matchIPV4Pat.match(IP_V4_DOMAIN_PATTERN, hostIP); 330 331 if (ipV4Address) { 332 for (int i = 1; i <= 4; i++) { 334 String ipSegment = matchIPV4Pat.group(i); 335 if (ipSegment == null || ipSegment.length() <= 0) { 336 return false; 337 } 338 339 try { 340 if (Integer.parseInt(ipSegment) > 255) { 341 return false; 342 } 343 } catch(NumberFormatException e) { 344 return false; 345 } 346 347 } 348 } else { 349 Perl5Util domainMatcher = new Perl5Util(); 351 hostname = domainMatcher.match(DOMAIN_PATTERN, hostIP); 352 } 353 354 if (hostname) { 356 String [] domainSegment = new String [10]; 357 boolean match = true; 358 int segmentCount = 0; 359 int segmentLength = 0; 360 Perl5Util atomMatcher = new Perl5Util(); 361 362 while (match) { 363 match = atomMatcher.match(ATOM_PATTERN, hostIP); 364 if (match) { 365 domainSegment[segmentCount] = atomMatcher.group(1); 366 segmentLength = domainSegment[segmentCount].length() + 1; 367 hostIP = 368 (segmentLength >= hostIP.length()) 369 ? "" 370 : hostIP.substring(segmentLength); 371 372 segmentCount++; 373 } 374 } 375 String topLevel = domainSegment[segmentCount - 1]; 376 if (topLevel.length() < 2 || topLevel.length() > 4) { 377 return false; 378 } 379 380 Perl5Util alphaMatcher = new Perl5Util(); 382 if (!alphaMatcher.match(ALPHA_PATTERN, topLevel.substring(0, 1))) { 383 return false; 384 } 385 386 if (segmentCount < 2) { 388 return false; 389 } 390 } 391 392 if (!hostname && !ipV4Address) { 393 return false; 394 } 395 396 String port = authorityMatcher.group(PARSE_AUTHORITY_PORT); 397 if (port != null) { 398 Perl5Util portMatcher = new Perl5Util(); 399 if (!portMatcher.match(PORT_PATTERN, port)) { 400 return false; 401 } 402 } 403 404 String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA); 405 if (!GenericValidator.isBlankOrNull(extra)) { 406 return false; 407 } 408 409 return true; 410 } 411 412 415 protected boolean isValidPath(String path) { 416 if (path == null) { 417 return false; 418 } 419 420 Perl5Util pathMatcher = new Perl5Util(); 421 422 if (!pathMatcher.match(PATH_PATTERN, path)) { 423 return false; 424 } 425 426 if (path.endsWith("/")) { 427 return false; 428 } 429 430 int slash2Count = countToken("//", path); 431 if (this.options.isOff(ALLOW_2_SLASHES) && (slash2Count > 0)) { 432 return false; 433 } 434 435 int slashCount = countToken("/", path); 436 int dot2Count = countToken("..", path); 437 if (dot2Count > 0) { 438 if ((slashCount - slash2Count - 1) <= dot2Count) { 439 return false; 440 } 441 } 442 443 return true; 444 } 445 446 449 protected boolean isValidQuery(String query) { 450 if (query == null) { 451 return true; 452 } 453 454 Perl5Util queryMatcher = new Perl5Util(); 455 return queryMatcher.match(QUERY_PATTERN, query); 456 } 457 458 461 protected boolean isValidFragment(String fragment) { 462 if (fragment == null) { 463 return true; 464 } 465 466 return this.options.isOff(NO_FRAGMENTS); 467 } 468 469 472 protected int countToken(String token, String target) { 473 int tokenIndex = 0; 474 int count = 0; 475 while (tokenIndex != -1) { 476 tokenIndex = target.indexOf(token, tokenIndex); 477 if (tokenIndex > -1) { 478 tokenIndex++; 479 count++; 480 } 481 } 482 return count; 483 } 484 } 485 | Popular Tags |