1 37 38 package org.htmlcleaner; 39 40 import java.io.*; 41 import java.net.URL ; 42 43 49 public class Utils { 50 51 55 public static String ltrim(String s) { 56 if (s == null) { 57 return null; 58 } 59 60 int index = 0; 61 int len = s.length(); 62 63 while ( index < len && Character.isWhitespace(s.charAt(index)) ) { 64 index++; 65 } 66 67 return (index >= len) ? "" : s.substring(index); 68 } 69 70 74 public static String rtrim(String s) { 75 if (s == null) { 76 return null; 77 } 78 79 int len = s.length(); 80 int index = len; 81 82 while ( index > 0 && Character.isWhitespace(s.charAt(index-1)) ) { 83 index--; 84 } 85 86 return (index <= 0) ? "" : s.substring(0, index); 87 } 88 89 95 public static StringBuffer readUrl(URL url, String charset) throws IOException { 96 StringBuffer buffer = new StringBuffer (1024); 97 98 Object content = url.getContent(); 99 if (content instanceof InputStream) { 100 InputStreamReader reader = new InputStreamReader((InputStream)content, charset); 101 char[] charArray = new char[1024]; 102 103 int charsRead = 0; 104 do { 105 charsRead = reader.read(charArray); 106 if (charsRead >= 0) { 107 buffer.append(charArray, 0, charsRead); 108 } 109 } while (charsRead > 0); 110 } 111 112 return buffer; 113 } 114 115 public static boolean isHexadecimalDigit(char ch) { 116 return Character.isDigit(ch) || 117 ch == 'A' || ch == 'a' || ch == 'B' || ch == 'b' || ch == 'C' || ch == 'c' || 118 ch == 'D' || ch == 'd' || ch == 'E' || ch == 'e' || ch == 'F' || ch == 'f'; 119 } 120 121 124 public static String escapeXml(String s, boolean advanced, boolean recognizeUnicodeChars, boolean translateSpecialEntities) { 125 if (s != null) { 126 int len = s.length(); 127 StringBuffer result = new StringBuffer (len); 128 129 for (int i = 0; i < len; i++) { 130 char ch = s.charAt(i); 131 132 if (ch == '&') { 133 if ( recognizeUnicodeChars && (i < len-1) && (s.charAt(i+1) == '#') ) { 134 int charIndex = i + 2; 135 String unicode = ""; 136 while ( charIndex < len && 137 (isHexadecimalDigit(s.charAt(charIndex)) || s.charAt(charIndex) == 'x' || s.charAt(charIndex) == 'X') 138 ) { 139 unicode += s.charAt(charIndex); 140 charIndex++; 141 } 142 if (charIndex == len || !"".equals(unicode)) { 143 try { 144 char unicodeChar = unicode.toLowerCase().startsWith("x") ? 145 (char)Integer.parseInt(unicode.substring(1), 16) : 146 (char)Integer.parseInt(unicode); 147 if ( "&<>\'\"".indexOf(unicodeChar) < 0 ) { 148 int replaceChunkSize = (charIndex < len && s.charAt(charIndex) == ';') ? unicode.length()+1 : unicode.length(); 149 result.append( String.valueOf(unicodeChar) ); 150 i += replaceChunkSize + 1; 151 } else { 152 i = charIndex; 153 result.append("&#" + unicode + ";"); 154 } 155 } catch (NumberFormatException e) { 156 i = charIndex; 157 result.append("&#" + unicode + ";"); 158 } 159 } else { 160 result.append("&"); 161 } 162 } else { 163 if (translateSpecialEntities) { 164 String seq = s.substring(i, i+Math.min(10, len-i)); 166 int semiIndex = seq.indexOf(';'); 167 if (semiIndex > 0) { 168 String entity = seq.substring(1, semiIndex); 169 Integer code = (Integer ) SpecialEntities.entities.get(entity); 170 if (code != null) { 171 int entityLen = entity.length(); 172 result.append( (char)code.intValue() ); 173 i += entityLen + 1; 174 continue; 175 } 176 } 177 } 178 179 if (advanced) { 180 String sub = s.substring(i); 181 if ( sub.startsWith("&") ) { 182 result.append("&"); 183 i += 4; 184 } else if ( sub.startsWith("'") ) { 185 result.append("'"); 186 i += 5; 187 } else if ( sub.startsWith(">") ) { 188 result.append(">"); 189 i += 3; 190 } else if ( sub.startsWith("<") ) { 191 result.append("<"); 192 i += 3; 193 } else if ( sub.startsWith(""") ) { 194 result.append("""); 195 i += 5; 196 } else { 197 result.append("&"); 198 } 199 200 continue; 201 } 202 203 result.append("&"); 204 } 205 } else if (ch == '\'') { 206 result.append("'"); 207 } else if (ch == '>') { 208 result.append(">"); 209 } else if (ch == '<') { 210 result.append("<"); 211 } else if (ch == '\"') { 212 result.append("""); 213 } else { 214 result.append(ch); 215 } 216 } 217 218 return result.toString(); 219 } 220 221 return null; 222 } 223 224 } | Popular Tags |