1 6 package fr.jayasoft.ivy.url; 7 8 import fr.jayasoft.ivy.util.FileUtil; 9 import fr.jayasoft.ivy.util.Message; 10 11 import java.io.BufferedReader ; 12 import java.io.IOException ; 13 import java.io.InputStreamReader ; 14 import java.net.URL ; 15 import java.util.ArrayList ; 16 import java.util.List ; 17 import java.util.regex.Matcher ; 18 import java.util.regex.Pattern ; 19 20 21 30 public class ApacheURLLister { 31 33 private static final Pattern PATTERN = 34 Pattern.compile("<a[^>]*href=\"([^\"]*)\"[^>]*>(?:<[^>]+>)*?([^<>]+?)(?:<[^>]+>)*?</a>", 35 Pattern.CASE_INSENSITIVE); 36 37 39 49 public List listAll(URL url) throws IOException { 50 return retrieveListing(url, true, true); 51 } 52 53 54 64 public List listDirectories(URL url) throws IOException { 65 return retrieveListing(url, false, true); 66 } 67 68 69 79 public List listFiles(URL url) throws IOException { 80 return retrieveListing(url, true, false); 81 } 82 83 84 97 public List retrieveListing(URL url, boolean includeFiles, 98 boolean includeDirectories) throws IOException { 99 List urlList = new ArrayList (); 100 101 if (!url.getPath().endsWith("/") && !url.getPath().endsWith(".html")) { 103 url = new URL (url.getProtocol(), url.getHost(), url.getPort(), 104 url.getPath() + "/"); 105 } 106 107 BufferedReader r = 108 new BufferedReader (new InputStreamReader (URLHandlerRegistry.getDefault() 109 .openStream(url))); 110 111 String htmlText = FileUtil.readEntirely(r); 112 113 Matcher matcher = PATTERN.matcher(htmlText); 114 115 while (matcher.find()) { 116 String href = matcher.group(1); 118 String text = matcher.group(2); 119 120 if ((href == null) || (text == null)) { 121 continue; 123 } 124 125 text = text.trim(); 126 127 if (href.startsWith("/")) { 129 int slashIndex = href.substring(0, href.length() - 1).lastIndexOf('/'); 130 href= href.substring(slashIndex+1); 131 } 132 133 int dotIndex = text.indexOf('.'); 137 138 if ( ((dotIndex != -1) && !href.startsWith(text.substring(0, dotIndex))) 139 || ((dotIndex == -1) && !href.equalsIgnoreCase(text)) ) { 140 continue; 142 } 143 144 boolean directory = href.endsWith("/"); 145 146 if ((directory && includeDirectories) 147 || (!directory && includeFiles)) { 148 URL child = new URL (url, href); 149 urlList.add(child); 150 Message.debug("ApacheURLLister found URL=[" + child + "]."); 151 } 152 } 153 154 return urlList; 155 } 156 } 157 | Popular Tags |