KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > python > modules > sre > PatternObject


1 /*
2  * Copyright 2000 Finn Bock
3  *
4  * This program contains material copyrighted by:
5  * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
6  *
7  * This version of the SRE library can be redistributed under CNRI's
8  * Python 1.6 license. For any other use, please contact Secret Labs
9  * AB (info@pythonware.com).
10  *
11  * Portions of this engine have been developed in cooperation with
12  * CNRI. Hewlett-Packard provided funding for 1.6 integration and
13  * other compatibility work.
14  */

15
16
17 package org.python.modules.sre;
18
19 import java.util.*;
20 import org.python.core.*;
21
22 public class PatternObject extends PyObject {
23     char[] code; /* link to the code string object */
24     public String JavaDoc pattern; /* link to the pattern source (or None) */
25     public int groups;
26     public org.python.core.PyObject groupindex;
27     public int flags;
28     org.python.core.PyObject indexgroup;
29     public int codesize;
30
31
32     public PatternObject(PyString pattern, int flags, char[] code,
33             int groups, PyObject groupindex, PyObject indexgroup) {
34
35         if (pattern != null)
36             this.pattern = pattern.toString();
37         this.flags = flags;
38         this.code = code;
39         this.codesize = code.length;
40         this.groups = groups;
41         this.groupindex = groupindex;
42         this.indexgroup = indexgroup;
43     }
44
45     public MatchObject match(String JavaDoc string) {
46         return match(string, 0, Integer.MAX_VALUE);
47     }
48
49     public MatchObject match(String JavaDoc string, int start) {
50         return match(string, start, Integer.MAX_VALUE);
51     }
52
53     public MatchObject match(String JavaDoc string, int start, int end) {
54         SRE_STATE state = new SRE_STATE(string, start, end, flags);
55
56         state.ptr = state.start;
57         int status = state.SRE_MATCH(code, 0, 1);
58
59         return _pattern_new_match(state, string, status);
60     }
61
62
63
64
65     public MatchObject search(PyObject[] args, String JavaDoc[] kws) {
66         ArgParser ap = new ArgParser("search", args, kws,
67                                      "pattern", "pos", "endpos");
68         String JavaDoc string = ap.getString(0);
69         int start = ap.getInt(1, 0);
70         int end = ap.getInt(2, string.length());
71
72         SRE_STATE state = new SRE_STATE(string, start, end, flags);
73
74         int status = state.SRE_SEARCH(code, 0);
75
76         return _pattern_new_match(state, string, status);
77     }
78
79
80     public PyObject sub(PyObject[] args, String JavaDoc[] kws) {
81         ArgParser ap = new ArgParser("sub", args, kws,
82                                      "repl", "string", "count");
83         PyObject template = ap.getPyObject(0);
84         String JavaDoc string = ap.getString(1);
85         int count = ap.getInt(2, 0);
86
87         return subx(template, string, count, false);
88     }
89
90
91
92     public PyObject subn(PyObject[] args, String JavaDoc[] kws) {
93         ArgParser ap = new ArgParser("subn", args, kws,
94                                      "repl", "string", "count");
95         PyObject template = ap.getPyObject(0);
96         String JavaDoc string = ap.getString(1);
97         int count = ap.getInt(2, 0);
98
99         return subx(template, string, count, true);
100     }
101
102
103     private PyObject subx(PyObject template, String JavaDoc string, int count,
104                           boolean subn)
105     {
106         PyObject filter = null;
107         boolean filter_is_callable = false;
108         if (template.isCallable()) {
109             filter = template;
110             filter_is_callable = true;
111         } else {
112             boolean literal = false;
113             if (template instanceof PyString) {
114                 literal = template.toString().indexOf('\\') < 0;
115             }
116             if (literal) {
117                 filter = template;
118                 filter_is_callable = false;
119             } else {
120                 filter = call("sre", "_subx", new PyObject[] {
121                     this, template});
122                 filter_is_callable = filter.isCallable();
123             }
124         }
125
126         SRE_STATE state = new SRE_STATE(string, 0, Integer.MAX_VALUE, flags);
127
128         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
129
130         int n = 0;
131         int i = 0;
132         
133         while (count == 0 || n < count) {
134             state.state_reset();
135             state.ptr = state.start;
136             int status = state.SRE_SEARCH(code, 0);
137             if (status <= 0) {
138                 if (status == 0)
139                     break;
140                 _error(status);
141             }
142             int b = state.start;
143             int e = state.ptr;
144
145             if (i < b) {
146                 /* get segment before this match */
147                 buf.append(string.substring(i, b));
148             }
149             if (! (i == b && i == e && n > 0)) {
150                 PyObject item;
151                 if (filter_is_callable) {
152                     /* pass match object through filter */
153                     MatchObject match = _pattern_new_match(state, string, 1);
154                     item = filter.__call__(match);
155                 } else {
156                     item = filter;
157                 }
158     
159                 if (item != Py.None) {
160                     buf.append(item.toString());
161                 }
162                 i = e;
163                 n++;
164             }
165
166             /* move on */
167             if (state.ptr == state.start)
168                 state.start = state.ptr + 1;
169             else
170                 state.start = state.ptr;
171         }
172         if (i < state.endpos) {
173             buf.append(string.substring(i, state.endpos));
174         }
175
176         if (subn)
177             return new PyTuple(new PyObject[] {
178                 Py.newString(buf.toString()), Py.newInteger(n)
179             });
180         else
181             return Py.newString(buf.toString());
182     }
183
184
185     public PyObject split(PyObject[] args, String JavaDoc[] kws) {
186         ArgParser ap = new ArgParser("split", args, kws,
187                                      "source", "maxsplit");
188         String JavaDoc string = ap.getString(0);
189         int maxsplit = ap.getInt(1, 0);
190
191         SRE_STATE state = new SRE_STATE(string, 0, Integer.MAX_VALUE, flags);
192
193         PyList list = new PyList();
194
195         int n = 0;
196         int last = state.start;
197         while (maxsplit == 0 || n < maxsplit) {
198             state.state_reset();
199             state.ptr = state.start;
200             int status = state.SRE_SEARCH(code, 0);
201             if (status <= 0) {
202                 if (status == 0)
203                     break;
204                 _error(status);
205             }
206             if (state.start == state.ptr) {
207                 if (last == state.end)
208                     break;
209                 /* skip one character */
210                 state.start = state.ptr + 1;
211                 continue;
212             }
213
214             /* get segment before this match */
215             PyObject item = Py.newString(string.substring(last, state.start));
216             list.append(item);
217
218             for (int i = 0; i < groups; i++) {
219                 String JavaDoc s = state.getslice(i+1, string, false);
220                 if (s != null)
221                     list.append(Py.newString(s));
222                 else
223                     list.append(Py.None);
224             }
225             n += 1;
226             last = state.start = state.ptr;
227         }
228
229         PyObject item = Py.newString(string.substring(last, state.endpos));
230         list.append(item);
231
232         return list;
233     }
234
235     private PyObject call(String JavaDoc module, String JavaDoc function, PyObject[] args) {
236         PyObject sre = imp.importName(module, true);
237         return sre.invoke(function, args);
238     }
239
240
241
242     public PyObject findall(PyObject[] args, String JavaDoc[] kws) {
243         ArgParser ap = new ArgParser("findall", args, kws,
244                                      "source", "pos", "endpos");
245         String JavaDoc string = ap.getString(0);
246         int start = ap.getInt(1, 0);
247         int end = ap.getInt(2, Integer.MAX_VALUE);
248
249         SRE_STATE state = new SRE_STATE(string, start, end, flags);
250
251         Vector list = new Vector();
252
253         while (state.start <= state.end) {
254             state.state_reset();
255             state.ptr = state.start;
256             int status = state.SRE_SEARCH(code, 0);
257             if (status > 0) {
258                 PyObject item;
259
260                 /* don't bother to build a match object */
261                 switch (groups) {
262                 case 0:
263                     item = Py.newString(
264                                     string.substring(state.start, state.ptr));
265                     break;
266                 case 1:
267                     item = Py.newString(state.getslice(1, string, true));
268                     break;
269                 default:
270                     PyObject[] t = new PyObject[groups];
271                     for (int i = 0; i < groups; i++)
272                         t[i] = Py.newString(state.getslice(i+1, string, true));
273                     item = new PyTuple(t);
274                     break;
275                 }
276
277                 list.addElement(item);
278
279                 if (state.ptr == state.start)
280                     state.start = state.ptr + 1;
281                 else
282                     state.start = state.ptr;
283             } else {
284
285                 if (status == 0)
286                     break;
287
288                 _error(status);
289             }
290         }
291         return new PyList(list);
292     }
293
294
295     public PyObject finditer(String JavaDoc string) {
296         return finditer(string, 0, Integer.MAX_VALUE);
297     }
298
299     public PyObject finditer(String JavaDoc string, int start) {
300         return finditer(string, start, Integer.MAX_VALUE);
301     }
302
303     public PyObject finditer(String JavaDoc string, int start, int end) {
304         ScannerObject scanner = scanner(string, start, end);
305         PyObject search = scanner.__findattr__("search");
306         return new PyCallIter(search, Py.None);
307     }
308
309     public ScannerObject scanner(String JavaDoc string) {
310         return scanner(string, 0, Integer.MAX_VALUE);
311     }
312
313     public ScannerObject scanner(String JavaDoc string, int start) {
314         return scanner(string, start, Integer.MAX_VALUE);
315     }
316
317     public ScannerObject scanner(String JavaDoc string, int start, int end) {
318         ScannerObject self = new ScannerObject();
319         self.state = new SRE_STATE(string, start, end, flags);
320         self.pattern = this;
321         self.string = string;
322         return self;
323     }
324
325
326     private void _error(int status) {
327         if (status == SRE_STATE.SRE_ERROR_RECURSION_LIMIT)
328             throw Py.RuntimeError("maximum recursion limit exceeded");
329
330         throw Py.RuntimeError("internal error in regular expression engine");
331     }
332
333
334     MatchObject _pattern_new_match(SRE_STATE state, String JavaDoc string,
335                                    int status)
336     {
337         /* create match object (from state object) */
338
339         //System.out.println("status = " + status + " " + string);
340

341         if (status > 0) {
342             /* create match object (with room for extra group marks) */
343             MatchObject match = new MatchObject();
344             match.pattern = this;
345             match.string = string;
346             match.regs = null;
347             match.groups = groups+1;
348             /* group zero */
349             int base = state.beginning;
350
351             match.mark = new int[match.groups*2];
352             match.mark[0] = state.start - base;
353             match.mark[1] = state.ptr - base;
354
355             /* fill in the rest of the groups */
356             int i, j;
357             for (i = j = 0; i < groups; i++, j+=2) {
358                 if (j+1 <= state.lastmark && state.mark[j] != -1 &&
359                                                     state.mark[j+1] != -1) {
360                     match.mark[j+2] = state.mark[j] - base;
361                     match.mark[j+3] = state.mark[j+1] - base;
362                 } else
363                     match.mark[j+2] = match.mark[j+3] = -1;
364             }
365             match.pos = state.pos;
366             match.endpos = state.endpos;
367             match.lastindex = state.lastindex;
368
369             return match;
370         } else if (status == 0) {
371             return null;
372         }
373
374         _error(status);
375         return null;
376     }
377 }
378
379
380
Popular Tags