1 package net.javacoding.jspider.core.util; 2 3 import java.net.MalformedURLException ; 4 import java.net.URL ; 5 import java.util.ArrayList ; 6 import java.util.StringTokenizer ; 7 8 15 public class URLUtil { 16 17 22 public static URL normalize(URL original) { 23 URL normalized = null; 24 25 if (original != null) { 26 27 String urlString = original.toString ( ); 28 29 urlString = normalizeDotFolders(urlString); 30 urlString = normalizeBackSlashes(urlString); 31 urlString = normalizeDoubleSlashes(urlString); 32 urlString = normalizeStripQuery(urlString) ; 33 35 try { 36 normalized = new URL (urlString); 37 } catch (MalformedURLException e) { 38 } 39 } 40 return normalized; 41 } 42 43 48 protected static String normalizeBackSlashes ( String original ) { 49 return StringUtil.replace(original, "\\", "/"); 50 } 51 52 57 protected static String normalizeDoubleSlashes ( String original ) { 58 return StringUtil.replace(original, "//", "/", 7); 59 } 60 61 67 protected static String normalizeDotFolders ( String original ) { 68 return StringUtil.replace(original, "/./", "/"); 69 } 70 71 77 protected static String normalizeStripQuery ( String original ) { 78 int index = original.indexOf('?'); 79 if (index >= 0) { 80 return original.substring(0, index); 81 } else { 82 return original; 83 } 84 } 85 86 91 protected static String normalizeStripTrailingSlash ( String original ) { 92 if (original.endsWith("/")) { 93 return original.substring(0, original.length() - 1); 94 } else { 95 return original; 96 } 97 } 98 99 104 public static URL getSiteURL(URL resourceURL) { 105 URL siteURL = null; 106 if (resourceURL != null) { 107 try { 108 siteURL = new URL (resourceURL.getProtocol(), resourceURL.getHost(), resourceURL.getPort(), ""); 109 } catch (MalformedURLException e) { 110 } 112 } 113 return siteURL; 114 } 115 116 121 public static URL getRobotsTXTURL(URL resourceURL) { 122 URL retVal = null; 123 if (resourceURL != null) { 124 try { 125 retVal = new URL (getSiteURL(resourceURL), "/robots.txt"); 126 } catch (MalformedURLException e) { 127 } 128 } 129 return retVal; 130 } 131 132 137 public static String stripResource(String path) { 138 String result = null; 139 if (path != null) { 140 int pos = path.lastIndexOf("/"); 141 result = path.substring(0, pos + 1); 142 } 143 return result; 144 } 145 146 151 public static int getDepth(URL url) { 152 int depth = 0; 153 154 if (url != null) { 155 String path = url.getPath(); 156 if (!isFileSpecified(url) && !path.endsWith("/")) { 157 path = path + "/"; 158 } 159 int pos = path.indexOf('/'); 160 while (pos != -1) { 161 if (pos > 0) { 162 depth++; 163 } 164 pos = path.indexOf('/', pos + 1); 165 } 166 } 167 return depth; 168 } 169 170 177 public static boolean isFileSpecified(URL url) { 178 boolean specified = false; 179 180 String path = url.getPath(); 181 int posLastSlash = path.lastIndexOf('/'); 182 int posLastDot = path.lastIndexOf('.'); 183 184 specified = posLastDot > posLastSlash; 185 186 return specified; 187 } 188 189 195 public static String [] getFolderNames(URL url) { 196 url = normalize(url); 197 ArrayList al = new ArrayList (); 198 199 String path = url.getPath(); 200 if (isFileSpecified(url)) { 201 path = stripResource(path); 202 } 203 StringTokenizer st = new StringTokenizer (path, "/"); 204 205 while (st.hasMoreTokens()) { 206 al.add(st.nextToken()); 207 } 208 return (String []) al.toArray(new String [al.size()]); 209 } 210 211 217 public static String getFileName(URL url) { 218 return url.getPath().substring(stripResource(url.getPath()).length()); 219 } 220 221 } 222 | Popular Tags |