1 25 package org.archive.util; 26 27 import java.io.BufferedInputStream ; 28 import java.io.BufferedOutputStream ; 29 import java.io.BufferedReader ; 30 import java.io.FileInputStream ; 31 import java.io.FileOutputStream ; 32 import java.io.IOException ; 33 import java.io.InputStream ; 34 import java.io.InputStreamReader ; 35 import java.io.PrintStream ; 36 import java.util.regex.Matcher ; 37 38 66 public class SURT { 67 static char DOT = '.'; 68 static String BEGIN_TRANSFORMED_AUTHORITY = "("; 69 static String TRANSFORMED_HOST_DELIM = ","; 70 static String END_TRANSFORMED_AUTHORITY = ")"; 71 72 static String URI_SPLITTER = 80 "^(\\w+://)(?:([-\\w\\.!~\\*'\\(\\)%;:&=+$,]+?)(@))?"+ 81 "(?:((?:\\d{1,3}\\.){3}\\d{1,3})|(\\S+?))(:\\d+)?(/\\S*)?$"; 83 85 94 95 96 97 112 public static String fromURI(String s) { 113 Matcher m = TextUtils.getMatcher(URI_SPLITTER,s); 114 if(!m.matches()) { 115 TextUtils.recycleMatcher(m); 117 return s; 118 } 119 StringBuffer builder = new StringBuffer (s.length()+3); 123 append(builder,s,m.start(1),m.end(1)); builder.append(BEGIN_TRANSFORMED_AUTHORITY); 126 if(m.start(4)>-1) { 127 append(builder,s,m.start(4),m.end(4)); 129 } else { 130 int hostSegEnd = m.end(5); 132 int hostStart = m.start(5); 133 for(int i = m.end(5)-1; i>=hostStart; i--) { 134 if(s.charAt(i-1)!=DOT && i > hostStart) { 135 continue; 136 } 137 append(builder,s,i,hostSegEnd); builder.append(TRANSFORMED_HOST_DELIM); hostSegEnd = i-1; 140 } 141 } 142 143 append(builder,s,m.start(6),m.end(6)); append(builder,s,m.start(3),m.end(3)); append(builder,s,m.start(2),m.end(2)); builder.append(END_TRANSFORMED_AUTHORITY); append(builder,s,m.start(7),m.end(7)); for(int i = 0; i < builder.length(); i++) { 149 builder.setCharAt(i,Character.toLowerCase(builder.charAt((i)))); 150 } 151 TextUtils.recycleMatcher(m); 152 return builder.toString(); 153 } 154 155 private static void append(StringBuffer b, CharSequence cs, int start, 156 int end) { 157 if (start < 0) { 158 return; 159 } 160 b.append(cs, start, end); 161 } 162 163 176 public static void main(String [] args) throws IOException { 177 InputStream in = args.length > 0 ? new BufferedInputStream ( 178 new FileInputStream (args[0])) : System.in; 179 PrintStream out = args.length > 1 ? new PrintStream ( 180 new BufferedOutputStream (new FileOutputStream (args[1]))) 181 : System.out; 182 BufferedReader br = 183 new BufferedReader (new InputStreamReader (in)); 184 String line; 185 while((line = br.readLine())!=null) { 186 if(line.indexOf("#")>0) line=line.substring(0,line.indexOf("#")); 187 line = line.trim(); 188 if(line.length()==0) continue; 189 line = ArchiveUtils.addImpliedHttpIfNecessary(line); 190 out.println(SURT.fromURI(line)); 191 } 192 br.close(); 193 out.close(); 194 } 195 } 196 | Popular Tags |