1 4 package org.jruby; 5 6 import java.util.regex.Matcher ; 7 import java.util.regex.Pattern ; 8 import java.util.regex.PatternSyntaxException ; 9 10 import org.jruby.parser.ReOptions; 11 12 public class RegexpTranslator { 13 14 private static final Pattern SHARP_IN_CHARACTER_CLASS_PATTERN = Pattern.compile("(\\[[^]]*)#(.*?])"); 15 private static final Pattern SPACE_IN_CHARACTER_CLASS_PATTERN = Pattern.compile("(\\[[^]]*) (.*?])"); 16 private static final Pattern COMMENT_PATTERN = Pattern.compile("\\(\\?#[^)]*\\)"); 17 private static final Pattern HEX_SINGLE_DIGIT_PATTERN = Pattern.compile("\\\\x(\\p{XDigit})(?!\\p{XDigit})"); 18 private static final Pattern OCTAL_SINGLE_ZERO_PATTERN = Pattern.compile("\\\\(0)(?![0-7])"); 19 private static final Pattern OCTAL_MISSING_ZERO_PATTERN = Pattern.compile("\\\\([1-7][0-7]{1,2})"); 20 private static final Pattern POSIX_NAME = Pattern.compile("\\[:(\\w+):\\]"); 21 22 public Pattern translate(String regex, int options, int javaRegexFlags) throws PatternSyntaxException { 23 javaRegexFlags |= translateFlags(options); 24 regex = translatePattern(regex, (javaRegexFlags & Pattern.COMMENTS) != 0); 25 26 return Pattern.compile(regex, javaRegexFlags); 27 } 28 29 private String translatePosixPattern(String regex) { 31 for (Matcher matcher = POSIX_NAME.matcher(regex); matcher.find(); matcher = POSIX_NAME.matcher(regex)) { 32 String value = matcher.group(1); 33 34 if ("alnum".equals(value)) { 35 regex = matcher.replaceFirst("\\\\p{Alnum}"); 36 } else if ("alpha".equals(value)) { 37 regex = matcher.replaceFirst("\\\\p{Alpha}"); 38 } else if ("blank".equals(value)) { 39 regex = matcher.replaceFirst("\\\\p{Blank}"); 40 } else if ("cntrl".equals(value)) { 41 regex = matcher.replaceFirst("\\\\p{Cntrl}"); 42 } else if ("digit".equals(value)) { 43 regex = matcher.replaceFirst("\\\\p{Digit}"); 44 } else if ("graph".equals(value)) { 45 regex = matcher.replaceFirst("\\\\p{Graph}"); 46 } else if ("lower".equals(value)) { 47 regex = matcher.replaceFirst("\\\\p{Lower}"); 48 } else if ("print".equals(value)) { 49 regex = matcher.replaceFirst("\\\\p{Print}"); 50 } else if ("punct".equals(value)) { 51 regex = matcher.replaceFirst("\\\\p{Punct}"); 52 } else if ("space".equals(value)) { 53 regex = matcher.replaceFirst("\\\\p{Space}"); 54 } else if ("upper".equals(value)) { 55 regex = matcher.replaceFirst("\\\\p{Upper}"); 56 } else if ("xdigit".equals(value)) { 57 regex = matcher.replaceFirst("\\\\p{XDigit}"); 58 } else { 59 regex = matcher.replaceFirst("\\\\[:" + value + ":\\\\]"); 60 } 61 } 62 return regex; 63 } 64 65 String translatePattern(String regex, boolean commentsAllowed) { 66 regex = COMMENT_PATTERN.matcher(regex).replaceAll(""); 67 regex = translatePosixPattern(regex); 68 regex = HEX_SINGLE_DIGIT_PATTERN.matcher(regex).replaceAll("\\\\"+"x0$1"); 69 regex = OCTAL_SINGLE_ZERO_PATTERN.matcher(regex).replaceAll("\\\\"+"0$1"); 70 regex = OCTAL_MISSING_ZERO_PATTERN.matcher(regex).replaceAll("\\\\"+"0$1"); 71 if (commentsAllowed) { 72 regex = SPACE_IN_CHARACTER_CLASS_PATTERN.matcher(regex).replaceAll("$1\\\\x20$2"); 73 regex = SHARP_IN_CHARACTER_CLASS_PATTERN.matcher(regex).replaceAll("$1\\\\x23$2"); 74 } 75 76 return regex; 77 } 78 79 int translateFlags(int options) { 80 int flags = Pattern.MULTILINE; 81 if ((options & ReOptions.RE_OPTION_IGNORECASE) > 0) { 82 flags |= Pattern.CASE_INSENSITIVE; 83 } 84 if ((options & ReOptions.RE_OPTION_EXTENDED) > 0) { 85 flags |= Pattern.COMMENTS; 86 } 87 if ((options & ReOptions.RE_OPTION_MULTILINE) > 0) { 88 flags |= Pattern.DOTALL; 89 } 90 return flags; 91 } 92 93 } 94 | Popular Tags |