1 16 17 package org.apache.xerces.impl.xpath.regex; 18 19 import java.text.CharacterIterator ; 20 21 26 public final class REUtil { 27 private REUtil() { 28 } 29 30 static final int composeFromSurrogates(int high, int low) { 31 return 0x10000 + ((high-0xd800)<<10) + low-0xdc00; 32 } 33 34 static final boolean isLowSurrogate(int ch) { 35 return (ch & 0xfc00) == 0xdc00; 36 } 37 38 static final boolean isHighSurrogate(int ch) { 39 return (ch & 0xfc00) == 0xd800; 40 } 41 42 static final String decomposeToSurrogates(int ch) { 43 char[] chs = new char[2]; 44 ch -= 0x10000; 45 chs[0] = (char)((ch>>10)+0xd800); 46 chs[1] = (char)((ch&0x3ff)+0xdc00); 47 return new String (chs); 48 } 49 50 static final String substring(CharacterIterator iterator, int begin, int end) { 51 char[] src = new char[end-begin]; 52 for (int i = 0; i < src.length; i ++) 53 src[i] = iterator.setIndex(i+begin); 54 return new String (src); 55 } 56 57 59 static final int getOptionValue(int ch) { 60 int ret = 0; 61 switch (ch) { 62 case 'i': 63 ret = RegularExpression.IGNORE_CASE; 64 break; 65 case 'm': 66 ret = RegularExpression.MULTIPLE_LINES; 67 break; 68 case 's': 69 ret = RegularExpression.SINGLE_LINE; 70 break; 71 case 'x': 72 ret = RegularExpression.EXTENDED_COMMENT; 73 break; 74 case 'u': 75 ret = RegularExpression.USE_UNICODE_CATEGORY; 76 break; 77 case 'w': 78 ret = RegularExpression.UNICODE_WORD_BOUNDARY; 79 break; 80 case 'F': 81 ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION; 82 break; 83 case 'H': 84 ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; 85 break; 86 case 'X': 87 ret = RegularExpression.XMLSCHEMA_MODE; 88 break; 89 case ',': 90 ret = RegularExpression.SPECIAL_COMMA; 91 break; 92 default: 93 } 94 return ret; 95 } 96 97 static final int parseOptions(String opts) throws ParseException { 98 if (opts == null) return 0; 99 int options = 0; 100 for (int i = 0; i < opts.length(); i ++) { 101 int v = getOptionValue(opts.charAt(i)); 102 if (v == 0) 103 throw new ParseException("Unknown Option: "+opts.substring(i), -1); 104 options |= v; 105 } 106 return options; 107 } 108 109 static final String createOptionString(int options) { 110 StringBuffer sb = new StringBuffer (9); 111 if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0) 112 sb.append((char)'F'); 113 if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0) 114 sb.append((char)'H'); 115 if ((options & RegularExpression.XMLSCHEMA_MODE) != 0) 116 sb.append((char)'X'); 117 if ((options & RegularExpression.IGNORE_CASE) != 0) 118 sb.append((char)'i'); 119 if ((options & RegularExpression.MULTIPLE_LINES) != 0) 120 sb.append((char)'m'); 121 if ((options & RegularExpression.SINGLE_LINE) != 0) 122 sb.append((char)'s'); 123 if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0) 124 sb.append((char)'u'); 125 if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0) 126 sb.append((char)'w'); 127 if ((options & RegularExpression.EXTENDED_COMMENT) != 0) 128 sb.append((char)'x'); 129 if ((options & RegularExpression.SPECIAL_COMMA) != 0) 130 sb.append((char)','); 131 return sb.toString().intern(); 132 } 133 134 136 static String stripExtendedComment(String regex) { 137 int len = regex.length(); 138 StringBuffer buffer = new StringBuffer (len); 139 int offset = 0; 140 while (offset < len) { 141 int ch = regex.charAt(offset++); 142 if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ') 144 continue; 145 146 if (ch == '#') { while (offset < len) { 148 ch = regex.charAt(offset++); 149 if (ch == '\r' || ch == '\n') 150 break; 151 } 152 continue; 153 } 154 155 int next; if (ch == '\\' && offset < len) { 157 if ((next = regex.charAt(offset)) == '#' 158 || next == '\t' || next == '\n' || next == '\f' 159 || next == '\r' || next == ' ') { 160 buffer.append((char)next); 161 offset ++; 162 } else { buffer.append((char)'\\'); 164 buffer.append((char)next); 165 offset ++; 166 } 167 } else buffer.append((char)ch); 169 } 170 return buffer.toString(); 171 } 172 173 175 179 public static void main(String [] argv) { 180 String pattern = null; 181 try { 182 String options = ""; 183 String target = null; 184 if( argv.length == 0 ) { 185 System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" ); 186 System.exit( 0 ); 187 } 188 for (int i = 0; i < argv.length; i ++) { 189 if (argv[i].length() == 0 || argv[i].charAt(0) != '-') { 190 if (pattern == null) 191 pattern = argv[i]; 192 else if (target == null) 193 target = argv[i]; 194 else 195 System.err.println("Unnecessary: "+argv[i]); 196 } else if (argv[i].equals("-i")) { 197 options += "i"; 198 } else if (argv[i].equals("-m")) { 199 options += "m"; 200 } else if (argv[i].equals("-s")) { 201 options += "s"; 202 } else if (argv[i].equals("-u")) { 203 options += "u"; 204 } else if (argv[i].equals("-w")) { 205 options += "w"; 206 } else if (argv[i].equals("-X")) { 207 options += "X"; 208 } else { 209 System.err.println("Unknown option: "+argv[i]); 210 } 211 } 212 RegularExpression reg = new RegularExpression(pattern, options); 213 System.out.println("RegularExpression: "+reg); 214 Match match = new Match(); 215 reg.matches(target, match); 216 for (int i = 0; i < match.getNumberOfGroups(); i ++) { 217 if (i == 0 ) System.out.print("Matched range for the whole pattern: "); 218 else System.out.print("["+i+"]: "); 219 if (match.getBeginning(i) < 0) 220 System.out.println("-1"); 221 else { 222 System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", "); 223 System.out.println("\""+match.getCapturedText(i)+"\""); 224 } 225 } 226 } catch (ParseException pe) { 227 if (pattern == null) { 228 pe.printStackTrace(); 229 } else { 230 System.err.println("org.apache.xerces.utils.regex.ParseException: "+pe.getMessage()); 231 String indent = " "; 232 System.err.println(indent+pattern); 233 int loc = pe.getLocation(); 234 if (loc >= 0) { 235 System.err.print(indent); 236 for (int i = 0; i < loc; i ++) System.err.print("-"); 237 System.err.println("^"); 238 } 239 } 240 } catch (Exception e) { 241 e.printStackTrace(); 242 } 243 } 244 245 static final int CACHESIZE = 20; 246 static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE]; 247 253 public static RegularExpression createRegex(String pattern, String options) 254 throws ParseException { 255 RegularExpression re = null; 256 int intOptions = REUtil.parseOptions(options); 257 synchronized (REUtil.regexCache) { 258 int i; 259 for (i = 0; i < REUtil.CACHESIZE; i ++) { 260 RegularExpression cached = REUtil.regexCache[i]; 261 if (cached == null) { 262 i = -1; 263 break; 264 } 265 if (cached.equals(pattern, intOptions)) { 266 re = cached; 267 break; 268 } 269 } 270 if (re != null) { 271 if (i != 0) { 272 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i); 273 REUtil.regexCache[0] = re; 274 } 275 } else { 276 re = new RegularExpression(pattern, options); 277 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1); 278 REUtil.regexCache[0] = re; 279 } 280 } 281 return re; 282 } 283 284 288 public static boolean matches(String regex, String target) throws ParseException { 289 return REUtil.createRegex(regex, null).matches(target); 290 } 291 292 296 public static boolean matches(String regex, String options, String target) throws ParseException { 297 return REUtil.createRegex(regex, options).matches(target); 298 } 299 300 302 305 public static String quoteMeta(String literal) { 306 int len = literal.length(); 307 StringBuffer buffer = null; 308 for (int i = 0; i < len; i ++) { 309 int ch = literal.charAt(i); 310 if (".*+?{[()|\\^$".indexOf(ch) >= 0) { 311 if (buffer == null) { 312 buffer = new StringBuffer (i+(len-i)*2); 313 if (i > 0) buffer.append(literal.substring(0, i)); 314 } 315 buffer.append((char)'\\'); 316 buffer.append((char)ch); 317 } else if (buffer != null) 318 buffer.append((char)ch); 319 } 320 return buffer != null ? buffer.toString() : literal; 321 } 322 323 325 static void dumpString(String v) { 326 for (int i = 0; i < v.length(); i ++) { 327 System.out.print(Integer.toHexString(v.charAt(i))); 328 System.out.print(" "); 329 } 330 System.out.println(); 331 } 332 } 333 | Popular Tags |