1 57 58 package org.enhydra.apache.xerces.utils.regex; 59 60 61 import java.text.CharacterIterator ; 62 63 public final class REUtil { 64 private REUtil() { 65 } 66 67 static final int composeFromSurrogates(int high, int low) { 68 return 0x10000 + ((high-0xd800)<<10) + low-0xdc00; 69 } 70 71 static final boolean isLowSurrogate(int ch) { 72 return (ch & 0xfc00) == 0xdc00; 73 } 74 75 static final boolean isHighSurrogate(int ch) { 76 return (ch & 0xfc00) == 0xd800; 77 } 78 79 static final String decomposeToSurrogates(int ch) { 80 char[] chs = new char[2]; 81 ch -= 0x10000; 82 chs[0] = (char)((ch>>10)+0xd800); 83 chs[1] = (char)((ch&0x3ff)+0xdc00); 84 return new String (chs); 85 } 86 87 static final String substring(CharacterIterator iterator, int begin, int end) { 88 char[] src = new char[end-begin]; 89 for (int i = 0; i < src.length; i ++) 90 src[i] = iterator.setIndex(i+begin); 91 return new String (src); 92 } 93 94 96 static final int getOptionValue(int ch) { 97 int ret = 0; 98 switch (ch) { 99 case 'i': 100 ret = RegularExpression.IGNORE_CASE; 101 break; 102 case 'm': 103 ret = RegularExpression.MULTIPLE_LINES; 104 break; 105 case 's': 106 ret = RegularExpression.SINGLE_LINE; 107 break; 108 case 'x': 109 ret = RegularExpression.EXTENDED_COMMENT; 110 break; 111 case 'u': 112 ret = RegularExpression.USE_UNICODE_CATEGORY; 113 break; 114 case 'w': 115 ret = RegularExpression.UNICODE_WORD_BOUNDARY; 116 break; 117 case 'F': 118 ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION; 119 break; 120 case 'H': 121 ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; 122 break; 123 case 'X': 124 ret = RegularExpression.XMLSCHEMA_MODE; 125 break; 126 case ',': 127 ret = RegularExpression.SPECIAL_COMMA; 128 break; 129 default: 130 } 131 return ret; 132 } 133 134 static final int parseOptions(String opts) throws ParseException { 135 if (opts == null) return 0; 136 int options = 0; 137 for (int i = 0; i < opts.length(); i ++) { 138 int v = getOptionValue(opts.charAt(i)); 139 if (v == 0) 140 throw new ParseException("Unknown Option: "+opts.substring(i), -1); 141 options |= v; 142 } 143 return options; 144 } 145 146 static final String createOptionString(int options) { 147 StringBuffer sb = new StringBuffer (9); 148 if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0) 149 sb.append((char)'F'); 150 if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0) 151 sb.append((char)'H'); 152 if ((options & RegularExpression.XMLSCHEMA_MODE) != 0) 153 sb.append((char)'X'); 154 if ((options & RegularExpression.IGNORE_CASE) != 0) 155 sb.append((char)'i'); 156 if ((options & RegularExpression.MULTIPLE_LINES) != 0) 157 sb.append((char)'m'); 158 if ((options & RegularExpression.SINGLE_LINE) != 0) 159 sb.append((char)'s'); 160 if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0) 161 sb.append((char)'u'); 162 if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0) 163 sb.append((char)'w'); 164 if ((options & RegularExpression.EXTENDED_COMMENT) != 0) 165 sb.append((char)'x'); 166 if ((options & RegularExpression.SPECIAL_COMMA) != 0) 167 sb.append((char)','); 168 return sb.toString().intern(); 169 } 170 171 173 static String stripExtendedComment(String regex) { 174 int len = regex.length(); 175 StringBuffer buffer = new StringBuffer (len); 176 int offset = 0; 177 while (offset < len) { 178 int ch = regex.charAt(offset++); 179 if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ') 181 continue; 182 183 if (ch == '#') { while (offset < len) { 185 ch = regex.charAt(offset++); 186 if (ch == '\r' || ch == '\n') 187 break; 188 } 189 continue; 190 } 191 192 int next; if (ch == '\\' && offset < len) { 194 if ((next = regex.charAt(offset)) == '#' 195 || next == '\t' || next == '\n' || next == '\f' 196 || next == '\r' || next == ' ') { 197 buffer.append((char)next); 198 offset ++; 199 } else { buffer.append((char)'\\'); 201 buffer.append((char)next); 202 offset ++; 203 } 204 } else buffer.append((char)ch); 206 } 207 return buffer.toString(); 208 } 209 210 212 216 public static void main(String [] argv) { 217 String pattern = null; 218 try { 219 String options = ""; 220 String target = null; 221 if( argv.length == 0 ) { 222 System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" ); 223 System.exit( 0 ); 224 } 225 for (int i = 0; i < argv.length; i ++) { 226 if (argv[i].length() == 0 || argv[i].charAt(0) != '-') { 227 if (pattern == null) 228 pattern = argv[i]; 229 else if (target == null) 230 target = argv[i]; 231 else 232 System.err.println("Unnecessary: "+argv[i]); 233 } else if (argv[i].equals("-i")) { 234 options += "i"; 235 } else if (argv[i].equals("-m")) { 236 options += "m"; 237 } else if (argv[i].equals("-s")) { 238 options += "s"; 239 } else if (argv[i].equals("-u")) { 240 options += "u"; 241 } else if (argv[i].equals("-w")) { 242 options += "w"; 243 } else if (argv[i].equals("-X")) { 244 options += "X"; 245 } else { 246 System.err.println("Unknown option: "+argv[i]); 247 } 248 } 249 RegularExpression reg = new RegularExpression(pattern, options); 250 System.out.println("RegularExpression: "+reg); 251 Match match = new Match(); 252 reg.matches(target, match); 253 for (int i = 0; i < match.getNumberOfGroups(); i ++) { 254 if (i == 0 ) System.out.print("Matched range for the whole pattern: "); 255 else System.out.print("["+i+"]: "); 256 if (match.getBeginning(i) < 0) 257 System.out.println("-1"); 258 else { 259 System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", "); 260 System.out.println("\""+match.getCapturedText(i)+"\""); 261 } 262 } 263 } catch (ParseException pe) { 264 if (pattern == null) { 265 pe.printStackTrace(); 266 } else { 267 System.err.println("org.enhydra.apache.xerces.utils.regex.ParseException: "+pe.getMessage()); 268 String indent = " "; 269 System.err.println(indent+pattern); 270 int loc = pe.getLocation(); 271 if (loc >= 0) { 272 System.err.print(indent); 273 for (int i = 0; i < loc; i ++) System.err.print("-"); 274 System.err.println("^"); 275 } 276 } 277 } catch (Exception e) { 278 e.printStackTrace(); 279 } 280 } 281 282 static final int CACHESIZE = 20; 283 static RegularExpression[] regexCache = new RegularExpression[CACHESIZE]; 284 290 public static RegularExpression createRegex(String pattern, String options) 291 throws ParseException { 292 RegularExpression re = null; 293 int intOptions = REUtil.parseOptions(options); 294 synchronized (REUtil.regexCache) { 295 int i; 296 for (i = 0; i < REUtil.CACHESIZE; i ++) { 297 re = REUtil.regexCache[i]; 298 if (re == null) { 299 i = -1; 300 break; 301 } 302 if (re.equals(pattern, intOptions)) 303 break; 304 } 305 if (re != null) { 306 if (i != 0) { 307 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i); 308 REUtil.regexCache[0] = re; 309 } 310 } else { 311 re = new RegularExpression(pattern, options); 312 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1); 313 REUtil.regexCache[0] = re; 314 } 315 } 316 return re; 317 } 318 319 323 public static boolean matches(String regex, String target) throws ParseException { 324 return REUtil.createRegex(regex, null).matches(target); 325 } 326 327 331 public static boolean matches(String regex, String options, String target) throws ParseException { 332 return REUtil.createRegex(regex, options).matches(target); 333 } 334 335 337 340 public static String quoteMeta(String literal) { 341 int len = literal.length(); 342 StringBuffer buffer = null; 343 for (int i = 0; i < len; i ++) { 344 int ch = literal.charAt(i); 345 if (".*+?{[()|\\^$".indexOf(ch) >= 0) { 346 if (buffer == null) { 347 buffer = new StringBuffer (i+(len-i)*2); 348 if (i > 0) buffer.append(literal.substring(0, i)); 349 } 350 buffer.append((char)'\\'); 351 } else if (buffer != null) 352 buffer.append((char)ch); 353 } 354 return buffer != null ? buffer.toString() : literal; 355 } 356 357 359 static void dumpString(String v) { 360 for (int i = 0; i < v.length(); i ++) { 361 System.out.print(Integer.toHexString(v.charAt(i))); 362 System.out.print(" "); 363 } 364 System.out.println(); 365 } 366 } 367 | Popular Tags |