1 57 58 package com.sun.org.apache.xerces.internal.impl.xpath.regex; 59 60 import java.text.CharacterIterator ; 61 62 65 66 public final class REUtil { 67 private REUtil() { 68 } 69 70 static final int composeFromSurrogates(int high, int low) { 71 return 0x10000 + ((high-0xd800)<<10) + low-0xdc00; 72 } 73 74 static final boolean isLowSurrogate(int ch) { 75 return (ch & 0xfc00) == 0xdc00; 76 } 77 78 static final boolean isHighSurrogate(int ch) { 79 return (ch & 0xfc00) == 0xd800; 80 } 81 82 static final String decomposeToSurrogates(int ch) { 83 char[] chs = new char[2]; 84 ch -= 0x10000; 85 chs[0] = (char)((ch>>10)+0xd800); 86 chs[1] = (char)((ch&0x3ff)+0xdc00); 87 return new String (chs); 88 } 89 90 static final String substring(CharacterIterator iterator, int begin, int end) { 91 char[] src = new char[end-begin]; 92 for (int i = 0; i < src.length; i ++) 93 src[i] = iterator.setIndex(i+begin); 94 return new String (src); 95 } 96 97 99 static final int getOptionValue(int ch) { 100 int ret = 0; 101 switch (ch) { 102 case 'i': 103 ret = RegularExpression.IGNORE_CASE; 104 break; 105 case 'm': 106 ret = RegularExpression.MULTIPLE_LINES; 107 break; 108 case 's': 109 ret = RegularExpression.SINGLE_LINE; 110 break; 111 case 'x': 112 ret = RegularExpression.EXTENDED_COMMENT; 113 break; 114 case 'u': 115 ret = RegularExpression.USE_UNICODE_CATEGORY; 116 break; 117 case 'w': 118 ret = RegularExpression.UNICODE_WORD_BOUNDARY; 119 break; 120 case 'F': 121 ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION; 122 break; 123 case 'H': 124 ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; 125 break; 126 case 'X': 127 ret = RegularExpression.XMLSCHEMA_MODE; 128 break; 129 case ',': 130 ret = RegularExpression.SPECIAL_COMMA; 131 break; 132 default: 133 } 134 return ret; 135 } 136 137 static final int parseOptions(String opts) throws ParseException { 138 if (opts == null) return 0; 139 int options = 0; 140 for (int i = 0; i < opts.length(); i ++) { 141 int v = getOptionValue(opts.charAt(i)); 142 if (v == 0) 143 throw new ParseException("Unknown Option: "+opts.substring(i), -1); 144 options |= v; 145 } 146 return options; 147 } 148 149 static final String createOptionString(int options) { 150 StringBuffer sb = new StringBuffer (9); 151 if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0) 152 sb.append((char)'F'); 153 if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0) 154 sb.append((char)'H'); 155 if ((options & RegularExpression.XMLSCHEMA_MODE) != 0) 156 sb.append((char)'X'); 157 if ((options & RegularExpression.IGNORE_CASE) != 0) 158 sb.append((char)'i'); 159 if ((options & RegularExpression.MULTIPLE_LINES) != 0) 160 sb.append((char)'m'); 161 if ((options & RegularExpression.SINGLE_LINE) != 0) 162 sb.append((char)'s'); 163 if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0) 164 sb.append((char)'u'); 165 if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0) 166 sb.append((char)'w'); 167 if ((options & RegularExpression.EXTENDED_COMMENT) != 0) 168 sb.append((char)'x'); 169 if ((options & RegularExpression.SPECIAL_COMMA) != 0) 170 sb.append((char)','); 171 return sb.toString().intern(); 172 } 173 174 176 static String stripExtendedComment(String regex) { 177 int len = regex.length(); 178 StringBuffer buffer = new StringBuffer (len); 179 int offset = 0; 180 while (offset < len) { 181 int ch = regex.charAt(offset++); 182 if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ') 184 continue; 185 186 if (ch == '#') { while (offset < len) { 188 ch = regex.charAt(offset++); 189 if (ch == '\r' || ch == '\n') 190 break; 191 } 192 continue; 193 } 194 195 int next; if (ch == '\\' && offset < len) { 197 if ((next = regex.charAt(offset)) == '#' 198 || next == '\t' || next == '\n' || next == '\f' 199 || next == '\r' || next == ' ') { 200 buffer.append((char)next); 201 offset ++; 202 } else { buffer.append((char)'\\'); 204 buffer.append((char)next); 205 offset ++; 206 } 207 } else buffer.append((char)ch); 209 } 210 return buffer.toString(); 211 } 212 213 215 219 public static void main(String [] argv) { 220 String pattern = null; 221 try { 222 String options = ""; 223 String target = null; 224 if( argv.length == 0 ) { 225 System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" ); 226 System.exit( 0 ); 227 } 228 for (int i = 0; i < argv.length; i ++) { 229 if (argv[i].length() == 0 || argv[i].charAt(0) != '-') { 230 if (pattern == null) 231 pattern = argv[i]; 232 else if (target == null) 233 target = argv[i]; 234 else 235 System.err.println("Unnecessary: "+argv[i]); 236 } else if (argv[i].equals("-i")) { 237 options += "i"; 238 } else if (argv[i].equals("-m")) { 239 options += "m"; 240 } else if (argv[i].equals("-s")) { 241 options += "s"; 242 } else if (argv[i].equals("-u")) { 243 options += "u"; 244 } else if (argv[i].equals("-w")) { 245 options += "w"; 246 } else if (argv[i].equals("-X")) { 247 options += "X"; 248 } else { 249 System.err.println("Unknown option: "+argv[i]); 250 } 251 } 252 RegularExpression reg = new RegularExpression(pattern, options); 253 System.out.println("RegularExpression: "+reg); 254 Match match = new Match(); 255 reg.matches(target, match); 256 for (int i = 0; i < match.getNumberOfGroups(); i ++) { 257 if (i == 0 ) System.out.print("Matched range for the whole pattern: "); 258 else System.out.print("["+i+"]: "); 259 if (match.getBeginning(i) < 0) 260 System.out.println("-1"); 261 else { 262 System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", "); 263 System.out.println("\""+match.getCapturedText(i)+"\""); 264 } 265 } 266 } catch (ParseException pe) { 267 if (pattern == null) { 268 pe.printStackTrace(); 269 } else { 270 System.err.println("com.sun.org.apache.xerces.internal.utils.regex.ParseException: "+pe.getMessage()); 271 String indent = " "; 272 System.err.println(indent+pattern); 273 int loc = pe.getLocation(); 274 if (loc >= 0) { 275 System.err.print(indent); 276 for (int i = 0; i < loc; i ++) System.err.print("-"); 277 System.err.println("^"); 278 } 279 } 280 } catch (Exception e) { 281 e.printStackTrace(); 282 } 283 } 284 285 static final int CACHESIZE = 20; 286 static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE]; 287 293 public static RegularExpression createRegex(String pattern, String options) 294 throws ParseException { 295 RegularExpression re = null; 296 int intOptions = REUtil.parseOptions(options); 297 synchronized (REUtil.regexCache) { 298 int i; 299 for (i = 0; i < REUtil.CACHESIZE; i ++) { 300 RegularExpression cached = REUtil.regexCache[i]; 301 if (cached == null) { 302 i = -1; 303 break; 304 } 305 if (cached.equals(pattern, intOptions)) { 306 re = cached; 307 break; 308 } 309 } 310 if (re != null) { 311 if (i != 0) { 312 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i); 313 REUtil.regexCache[0] = re; 314 } 315 } else { 316 re = new RegularExpression(pattern, options); 317 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1); 318 REUtil.regexCache[0] = re; 319 } 320 } 321 return re; 322 } 323 324 328 public static boolean matches(String regex, String target) throws ParseException { 329 return REUtil.createRegex(regex, null).matches(target); 330 } 331 332 336 public static boolean matches(String regex, String options, String target) throws ParseException { 337 return REUtil.createRegex(regex, options).matches(target); 338 } 339 340 342 345 public static String quoteMeta(String literal) { 346 int len = literal.length(); 347 StringBuffer buffer = null; 348 for (int i = 0; i < len; i ++) { 349 int ch = literal.charAt(i); 350 if (".*+?{[()|\\^$".indexOf(ch) >= 0) { 351 if (buffer == null) { 352 buffer = new StringBuffer (i+(len-i)*2); 353 if (i > 0) buffer.append(literal.substring(0, i)); 354 } 355 buffer.append((char)'\\'); 356 buffer.append((char)ch); 357 } else if (buffer != null) 358 buffer.append((char)ch); 359 } 360 return buffer != null ? buffer.toString() : literal; 361 } 362 363 365 static void dumpString(String v) { 366 for (int i = 0; i < v.length(); i ++) { 367 System.out.print(Integer.toHexString(v.charAt(i))); 368 System.out.print(" "); 369 } 370 System.out.println(); 371 } 372 } 373 | Popular Tags |