1 2 3 4 package net.nutch.util; 5 6 import java.util.HashMap ; 7 import java.nio.charset.Charset ; 8 9 12 public class StringUtil { 13 14 19 public static String rightPad(String s, int length) { 20 StringBuffer sb= new StringBuffer (s); 21 for (int i= length - s.length(); i > 0; i--) 22 sb.append(" "); 23 return sb.toString(); 24 } 25 26 31 public static String leftPad(String s, int length) { 32 StringBuffer sb= new StringBuffer (); 33 for (int i= length - s.length(); i > 0; i--) 34 sb.append(" "); 35 sb.append(s); 36 return sb.toString(); 37 } 38 39 49 public static String parseCharacterEncoding(String contentType) { 50 if (contentType == null) 51 return (null); 52 int start = contentType.indexOf("charset="); 53 if (start < 0) 54 return (null); 55 String encoding = contentType.substring(start + 8); 56 int end = encoding.indexOf(';'); 57 if (end >= 0) 58 encoding = encoding.substring(0, end); 59 encoding = encoding.trim(); 60 if ((encoding.length() > 2) && (encoding.startsWith("\"")) 61 && (encoding.endsWith("\""))) 62 encoding = encoding.substring(1, encoding.length() - 1); 63 return (encoding.trim()); 64 65 } 66 67 private static HashMap encodingAliases = new HashMap (); 68 69 80 static { 81 encodingAliases.put("ISO-8859-1", "windows-1252"); 82 encodingAliases.put("EUC-KR", "x-windows-949"); 83 encodingAliases.put("x-EUC-CN", "GB18030"); 84 encodingAliases.put("GBK", "GB18030"); 85 89 } 90 91 public static String resolveEncodingAlias(String encoding) { 92 if (!Charset.isSupported(encoding)) 93 return null; 94 String canonicalName = new String (Charset.forName(encoding).name()); 95 return encodingAliases.containsKey(canonicalName) ? 96 (String ) encodingAliases.get(canonicalName) : canonicalName; 97 } 98 99 public static void main(String [] args) { 100 if (args.length != 1) 101 System.out.println("Usage: StringUtil <encoding name>"); 102 else 103 System.out.println(args[0] + " is resolved to " + 104 resolveEncodingAlias(args[0])); 105 } 106 } 107 | Popular Tags |