1 15 16 17 package org.python.modules.sre; 18 19 import java.util.*; 20 import org.python.core.*; 21 22 public class PatternObject extends PyObject { 23 char[] code; 24 public String pattern; 25 public int groups; 26 public org.python.core.PyObject groupindex; 27 public int flags; 28 org.python.core.PyObject indexgroup; 29 public int codesize; 30 31 32 public PatternObject(PyString pattern, int flags, char[] code, 33 int groups, PyObject groupindex, PyObject indexgroup) { 34 35 if (pattern != null) 36 this.pattern = pattern.toString(); 37 this.flags = flags; 38 this.code = code; 39 this.codesize = code.length; 40 this.groups = groups; 41 this.groupindex = groupindex; 42 this.indexgroup = indexgroup; 43 } 44 45 public MatchObject match(String string) { 46 return match(string, 0, Integer.MAX_VALUE); 47 } 48 49 public MatchObject match(String string, int start) { 50 return match(string, start, Integer.MAX_VALUE); 51 } 52 53 public MatchObject match(String string, int start, int end) { 54 SRE_STATE state = new SRE_STATE(string, start, end, flags); 55 56 state.ptr = state.start; 57 int status = state.SRE_MATCH(code, 0, 1); 58 59 return _pattern_new_match(state, string, status); 60 } 61 62 63 64 65 public MatchObject search(PyObject[] args, String [] kws) { 66 ArgParser ap = new ArgParser("search", args, kws, 67 "pattern", "pos", "endpos"); 68 String string = ap.getString(0); 69 int start = ap.getInt(1, 0); 70 int end = ap.getInt(2, string.length()); 71 72 SRE_STATE state = new SRE_STATE(string, start, end, flags); 73 74 int status = state.SRE_SEARCH(code, 0); 75 76 return _pattern_new_match(state, string, status); 77 } 78 79 80 public PyObject sub(PyObject[] args, String [] kws) { 81 ArgParser ap = new ArgParser("sub", args, kws, 82 "repl", "string", "count"); 83 PyObject template = ap.getPyObject(0); 84 String string = ap.getString(1); 85 int count = ap.getInt(2, 0); 86 87 return subx(template, string, count, false); 88 } 89 90 91 92 public PyObject subn(PyObject[] args, String [] kws) { 93 ArgParser ap = new ArgParser("subn", args, kws, 94 "repl", "string", "count"); 95 PyObject template = ap.getPyObject(0); 96 String string = ap.getString(1); 97 int count = ap.getInt(2, 0); 98 99 return subx(template, string, count, true); 100 } 101 102 103 private PyObject subx(PyObject template, String string, int count, 104 boolean subn) 105 { 106 PyObject filter = null; 107 boolean filter_is_callable = false; 108 if (template.isCallable()) { 109 filter = template; 110 filter_is_callable = true; 111 } else { 112 boolean literal = false; 113 if (template instanceof PyString) { 114 literal = template.toString().indexOf('\\') < 0; 115 } 116 if (literal) { 117 filter = template; 118 filter_is_callable = false; 119 } else { 120 filter = call("sre", "_subx", new PyObject[] { 121 this, template}); 122 filter_is_callable = filter.isCallable(); 123 } 124 } 125 126 SRE_STATE state = new SRE_STATE(string, 0, Integer.MAX_VALUE, flags); 127 128 StringBuffer buf = new StringBuffer (); 129 130 int n = 0; 131 int i = 0; 132 133 while (count == 0 || n < count) { 134 state.state_reset(); 135 state.ptr = state.start; 136 int status = state.SRE_SEARCH(code, 0); 137 if (status <= 0) { 138 if (status == 0) 139 break; 140 _error(status); 141 } 142 int b = state.start; 143 int e = state.ptr; 144 145 if (i < b) { 146 147 buf.append(string.substring(i, b)); 148 } 149 if (! (i == b && i == e && n > 0)) { 150 PyObject item; 151 if (filter_is_callable) { 152 153 MatchObject match = _pattern_new_match(state, string, 1); 154 item = filter.__call__(match); 155 } else { 156 item = filter; 157 } 158 159 if (item != Py.None) { 160 buf.append(item.toString()); 161 } 162 i = e; 163 n++; 164 } 165 166 167 if (state.ptr == state.start) 168 state.start = state.ptr + 1; 169 else 170 state.start = state.ptr; 171 } 172 if (i < state.endpos) { 173 buf.append(string.substring(i, state.endpos)); 174 } 175 176 if (subn) 177 return new PyTuple(new PyObject[] { 178 Py.newString(buf.toString()), Py.newInteger(n) 179 }); 180 else 181 return Py.newString(buf.toString()); 182 } 183 184 185 public PyObject split(PyObject[] args, String [] kws) { 186 ArgParser ap = new ArgParser("split", args, kws, 187 "source", "maxsplit"); 188 String string = ap.getString(0); 189 int maxsplit = ap.getInt(1, 0); 190 191 SRE_STATE state = new SRE_STATE(string, 0, Integer.MAX_VALUE, flags); 192 193 PyList list = new PyList(); 194 195 int n = 0; 196 int last = state.start; 197 while (maxsplit == 0 || n < maxsplit) { 198 state.state_reset(); 199 state.ptr = state.start; 200 int status = state.SRE_SEARCH(code, 0); 201 if (status <= 0) { 202 if (status == 0) 203 break; 204 _error(status); 205 } 206 if (state.start == state.ptr) { 207 if (last == state.end) 208 break; 209 210 state.start = state.ptr + 1; 211 continue; 212 } 213 214 215 PyObject item = Py.newString(string.substring(last, state.start)); 216 list.append(item); 217 218 for (int i = 0; i < groups; i++) { 219 String s = state.getslice(i+1, string, false); 220 if (s != null) 221 list.append(Py.newString(s)); 222 else 223 list.append(Py.None); 224 } 225 n += 1; 226 last = state.start = state.ptr; 227 } 228 229 PyObject item = Py.newString(string.substring(last, state.endpos)); 230 list.append(item); 231 232 return list; 233 } 234 235 private PyObject call(String module, String function, PyObject[] args) { 236 PyObject sre = imp.importName(module, true); 237 return sre.invoke(function, args); 238 } 239 240 241 242 public PyObject findall(PyObject[] args, String [] kws) { 243 ArgParser ap = new ArgParser("findall", args, kws, 244 "source", "pos", "endpos"); 245 String string = ap.getString(0); 246 int start = ap.getInt(1, 0); 247 int end = ap.getInt(2, Integer.MAX_VALUE); 248 249 SRE_STATE state = new SRE_STATE(string, start, end, flags); 250 251 Vector list = new Vector(); 252 253 while (state.start <= state.end) { 254 state.state_reset(); 255 state.ptr = state.start; 256 int status = state.SRE_SEARCH(code, 0); 257 if (status > 0) { 258 PyObject item; 259 260 261 switch (groups) { 262 case 0: 263 item = Py.newString( 264 string.substring(state.start, state.ptr)); 265 break; 266 case 1: 267 item = Py.newString(state.getslice(1, string, true)); 268 break; 269 default: 270 PyObject[] t = new PyObject[groups]; 271 for (int i = 0; i < groups; i++) 272 t[i] = Py.newString(state.getslice(i+1, string, true)); 273 item = new PyTuple(t); 274 break; 275 } 276 277 list.addElement(item); 278 279 if (state.ptr == state.start) 280 state.start = state.ptr + 1; 281 else 282 state.start = state.ptr; 283 } else { 284 285 if (status == 0) 286 break; 287 288 _error(status); 289 } 290 } 291 return new PyList(list); 292 } 293 294 295 public PyObject finditer(String string) { 296 return finditer(string, 0, Integer.MAX_VALUE); 297 } 298 299 public PyObject finditer(String string, int start) { 300 return finditer(string, start, Integer.MAX_VALUE); 301 } 302 303 public PyObject finditer(String string, int start, int end) { 304 ScannerObject scanner = scanner(string, start, end); 305 PyObject search = scanner.__findattr__("search"); 306 return new PyCallIter(search, Py.None); 307 } 308 309 public ScannerObject scanner(String string) { 310 return scanner(string, 0, Integer.MAX_VALUE); 311 } 312 313 public ScannerObject scanner(String string, int start) { 314 return scanner(string, start, Integer.MAX_VALUE); 315 } 316 317 public ScannerObject scanner(String string, int start, int end) { 318 ScannerObject self = new ScannerObject(); 319 self.state = new SRE_STATE(string, start, end, flags); 320 self.pattern = this; 321 self.string = string; 322 return self; 323 } 324 325 326 private void _error(int status) { 327 if (status == SRE_STATE.SRE_ERROR_RECURSION_LIMIT) 328 throw Py.RuntimeError("maximum recursion limit exceeded"); 329 330 throw Py.RuntimeError("internal error in regular expression engine"); 331 } 332 333 334 MatchObject _pattern_new_match(SRE_STATE state, String string, 335 int status) 336 { 337 338 339 341 if (status > 0) { 342 343 MatchObject match = new MatchObject(); 344 match.pattern = this; 345 match.string = string; 346 match.regs = null; 347 match.groups = groups+1; 348 349 int base = state.beginning; 350 351 match.mark = new int[match.groups*2]; 352 match.mark[0] = state.start - base; 353 match.mark[1] = state.ptr - base; 354 355 356 int i, j; 357 for (i = j = 0; i < groups; i++, j+=2) { 358 if (j+1 <= state.lastmark && state.mark[j] != -1 && 359 state.mark[j+1] != -1) { 360 match.mark[j+2] = state.mark[j] - base; 361 match.mark[j+3] = state.mark[j+1] - base; 362 } else 363 match.mark[j+2] = match.mark[j+3] = -1; 364 } 365 match.pos = state.pos; 366 match.endpos = state.endpos; 367 match.lastindex = state.lastindex; 368 369 return match; 370 } else if (status == 0) { 371 return null; 372 } 373 374 _error(status); 375 return null; 376 } 377 } 378 379 380 | Popular Tags |