1 21 22 package net.percederberg.grammatica.parser.re; 23 24 import java.io.IOException ; 25 import java.io.PrintWriter ; 26 import java.util.ArrayList ; 27 28 import net.percederberg.grammatica.parser.LookAheadReader; 29 30 40 class CharacterSetElement extends Element { 41 42 46 public static final CharacterSetElement DOT = 47 new CharacterSetElement(false); 48 49 53 public static final CharacterSetElement DIGIT = 54 new CharacterSetElement(false); 55 56 60 public static final CharacterSetElement NON_DIGIT = 61 new CharacterSetElement(true); 62 63 67 public static final CharacterSetElement WHITESPACE = 68 new CharacterSetElement(false); 69 70 74 public static final CharacterSetElement NON_WHITESPACE = 75 new CharacterSetElement(true); 76 77 81 public static final CharacterSetElement WORD = 82 new CharacterSetElement(false); 83 84 88 public static final CharacterSetElement NON_WORD = 89 new CharacterSetElement(true); 90 91 94 private boolean inverted; 95 96 100 private ArrayList contents = new ArrayList (); 101 102 108 public CharacterSetElement(boolean inverted) { 109 this.inverted = inverted; 110 } 111 112 117 public void addCharacter(char c) { 118 contents.add(new Character (c)); 119 } 120 121 126 public void addCharacters(String str) { 127 for (int i = 0; i < str.length(); i++) { 128 addCharacter(str.charAt(i)); 129 } 130 } 131 132 137 public void addCharacters(StringElement elem) { 138 addCharacters(elem.getString()); 139 } 140 141 147 public void addRange(char min, char max) { 148 contents.add(new Range(min, max)); 149 } 150 151 156 public void addCharacterSet(CharacterSetElement elem) { 157 contents.add(elem); 158 } 159 160 168 public Object clone() { 169 return this; 170 } 171 172 189 public int match(Matcher m, LookAheadReader input, int start, int skip) 190 throws IOException { 191 192 int c; 193 194 if (skip != 0) { 195 return -1; 196 } 197 c = input.peek(start); 198 if (c < 0) { 199 m.setReadEndOfString(); 200 return -1; 201 } 202 if (m.isCaseInsensitive()) { 203 c = Character.toLowerCase((char) c); 204 } 205 return inSet((char) c) ? 1 : -1; 206 } 207 208 217 private boolean inSet(char value) { 218 if (this == DOT) { 219 return inDotSet(value); 220 } else if (this == DIGIT || this == NON_DIGIT) { 221 return inDigitSet(value) != inverted; 222 } else if (this == WHITESPACE || this == NON_WHITESPACE) { 223 return inWhitespaceSet(value) != inverted; 224 } else if (this == WORD || this == NON_WORD) { 225 return inWordSet(value) != inverted; 226 } else { 227 return inUserSet(value) != inverted; 228 } 229 } 230 231 240 private boolean inDotSet(char value) { 241 switch (value) { 242 case '\n': 243 case '\r': 244 case '\u0085': 245 case '\u2028': 246 case '\u2029': 247 return false; 248 default: 249 return true; 250 } 251 } 252 253 262 private boolean inDigitSet(char value) { 263 return '0' <= value && value <= '9'; 264 } 265 266 275 private boolean inWhitespaceSet(char value) { 276 switch (value) { 277 case ' ': 278 case '\t': 279 case '\n': 280 case '\f': 281 case '\r': 282 case 11: 283 return true; 284 default: 285 return false; 286 } 287 } 288 289 298 private boolean inWordSet(char value) { 299 return ('a' <= value && value <= 'z') 300 || ('A' <= value && value <= 'Z') 301 || ('0' <= value && value <= '9') 302 || value == '_'; 303 } 304 305 314 private boolean inUserSet(char value) { 315 Object obj; 316 Character c; 317 Range r; 318 CharacterSetElement e; 319 320 for (int i = 0; i < contents.size(); i++) { 321 obj = contents.get(i); 322 if (obj instanceof Character ) { 323 c = (Character ) obj; 324 if (c.charValue() == value) { 325 return true; 326 } 327 } else if (obj instanceof Range) { 328 r = (Range) obj; 329 if (r.inside(value)) { 330 return true; 331 } 332 } else if (obj instanceof CharacterSetElement) { 333 e = (CharacterSetElement) obj; 334 if (e.inSet(value)) { 335 return true; 336 } 337 } 338 } 339 return false; 340 } 341 342 348 public void printTo(PrintWriter output, String indent) { 349 output.println(indent + toString()); 350 } 351 352 357 public String toString() { 358 StringBuffer buffer; 359 360 if (this == DOT) { 362 return "."; 363 } else if (this == DIGIT) { 364 return "\\d"; 365 } else if (this == NON_DIGIT) { 366 return "\\D"; 367 } else if (this == WHITESPACE) { 368 return "\\s"; 369 } else if (this == NON_WHITESPACE) { 370 return "\\S"; 371 } else if (this == WORD) { 372 return "\\w"; 373 } else if (this == NON_WORD) { 374 return "\\W"; 375 } 376 377 buffer = new StringBuffer (); 379 if (inverted) { 380 buffer.append("^["); 381 } else { 382 buffer.append("["); 383 } 384 for (int i = 0; i < contents.size(); i++) { 385 buffer.append(contents.get(i)); 386 } 387 buffer.append("]"); 388 389 return buffer.toString(); 390 } 391 392 393 396 private class Range { 397 398 401 private char min; 402 403 406 private char max; 407 408 414 public Range(char min, char max) { 415 this.min = min; 416 this.max = max; 417 } 418 419 427 public boolean inside(char c) { 428 return c >= min && c <= max; 429 } 430 431 436 public String toString() { 437 return min + "-" + max; 438 } 439 } 440 } 441 | Popular Tags |