KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > jcorporate > expresso > ext > regexp > REProgram


1 /* ====================================================================
2  * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
3  *
4  * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * 3. The end-user documentation included with the redistribution,
19  * if any, must include the following acknowledgment:
20  * "This product includes software developed by Jcorporate Ltd.
21  * (http://www.jcorporate.com/)."
22  * Alternately, this acknowledgment may appear in the software itself,
23  * if and wherever such third-party acknowledgments normally appear.
24  *
25  * 4. "Jcorporate" and product names such as "Expresso" must
26  * not be used to endorse or promote products derived from this
27  * software without prior written permission. For written permission,
28  * please contact info@jcorporate.com.
29  *
30  * 5. Products derived from this software may not be called "Expresso",
31  * or other Jcorporate product names; nor may "Expresso" or other
32  * Jcorporate product names appear in their name, without prior
33  * written permission of Jcorporate Ltd.
34  *
35  * 6. No product derived from this software may compete in the same
36  * market space, i.e. framework, without prior written permission
37  * of Jcorporate Ltd. For written permission, please contact
38  * partners@jcorporate.com.
39  *
40  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43  * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
44  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  * ====================================================================
53  *
54  * This software consists of voluntary contributions made by many
55  * individuals on behalf of the Jcorporate Ltd. Contributions back
56  * to the project(s) are encouraged when you make modifications.
57  * Please send them to support@jcorporate.com. For more information
58  * on Jcorporate Ltd. and its products, please see
59  * <http://www.jcorporate.com/>.
60  *
61  * Portions of this software are based upon other open source
62  * products and are subject to their respective licenses.
63  */

64
65 package com.jcorporate.expresso.ext.regexp;
66
67 /*
68  * ====================================================================
69  *
70  * The Apache Software License, Version 1.1
71  *
72  * Copyright (c) 1999 The Apache Software Foundation. All rights
73  * reserved.
74  *
75  * Redistribution and use in source and binary forms, with or without
76  * modification, are permitted provided that the following conditions
77  * are met:
78  *
79  * 1. Redistributions of source code must retain the above copyright
80  * notice, this list of conditions and the following disclaimer.
81  *
82  * 2. Redistributions in binary form must reproduce the above copyright
83  * notice, this list of conditions and the following disclaimer in
84  * the documentation and/or other materials provided with the
85  * distribution.
86  *
87  * 3. The end-user documentation included with the redistribution, if
88  * any, must include the following acknowlegement:
89  * "This product includes software developed by the
90  * Apache Software Foundation (http://www.apache.org/)."
91  * Alternately, this acknowlegement may appear in the software itself,
92  * if and wherever such third-party acknowlegements normally appear.
93  *
94  * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
95  * Foundation" must not be used to endorse or promote products derived
96  * from this software without prior written permission. For written
97  * permission, please contact apache@apache.org.
98  *
99  * 5. Products derived from this software may not be called "Apache"
100  * nor may "Apache" appear in their names without prior written
101  * permission of the Apache Group.
102  *
103  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
104  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
105  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
106  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
107  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
108  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
109  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
110  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
111  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
112  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
113  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
114  * SUCH DAMAGE.
115  * ====================================================================
116  *
117  * This software consists of voluntary contributions made by many
118  * individuals on behalf of the Apache Software Foundation. For more
119  * information on the Apache Software Foundation, please see
120  * <http://www.apache.org/>.
121  *
122  */

123
124
125 /**
126  * A class that holds compiled regular expressions. This is exposed mainly
127  * for use by the recompile utility (which helps you produce precompiled
128  * REProgram objects). You should not otherwise need to work directly with
129  * this class.
130  *
131  * @author <a HREF="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
132  * @version $Id: REProgram.java,v 1.7 2004/11/17 20:48:15 lhamel Exp $
133  * @see RE
134  * @see RECompiler
135  * @deprecated since v5.6, use jakarta oro
136  */

137 public class REProgram {
138     static final int OPT_HASBACKREFS = 1;
139     char[] instruction; // The compiled regular expression 'program'
140
int lenInstruction; // The amount of the instruction buffer in use
141
char[] prefix; // Prefix string optimization
142
int flags; // Optimization flags (REProgram.OPT_*)
143

144     /**
145      * Constructs a program object from a character array
146      *
147      * @param instruction Character array with RE opcode instructions in it
148      */

149     public REProgram(char[] instruction) {
150         this(instruction, instruction.length);
151     }
152
153     /**
154      * Constructs a program object from a character array
155      *
156      * @param instruction Character array with RE opcode instructions in it
157      * @param lenInstruction Amount of instruction array in use
158      */

159     public REProgram(char[] instruction, int lenInstruction) {
160         setInstructions(instruction, lenInstruction);
161     }
162
163     /**
164      * Returns a copy of the current regular expression program in a character
165      * array that is exactly the right length to hold the program. If there is
166      * no program compiled yet, getInstructions() will return null.
167      *
168      * @return A copy of the current compiled RE program
169      */

170     public char[] getInstructions() {
171
172         // Ensure program has been compiled!
173
if (lenInstruction != 0) {
174
175             // Return copy of program
176
char[] ret = new char[lenInstruction];
177             System.arraycopy(instruction, 0, ret, 0, lenInstruction);
178
179             return ret;
180         }
181
182         return null;
183     }
184
185     /**
186      * Sets a new regular expression program to run. It is this method which
187      * performs any special compile-time search optimizations. Currently only
188      * two optimizations are in place - one which checks for backreferences
189      * (so that they can be lazily allocated) and another which attempts to
190      * find an prefix anchor string so that substantial amounts of input can
191      * potentially be skipped without running the actual program.
192      *
193      * @param instruction Program instruction buffer
194      * @param lenInstruction Length of instruction buffer in use
195      */

196     public void setInstructions(char[] instruction, int lenInstruction) {
197
198         // Save reference to instruction array
199
this.instruction = instruction;
200         this.lenInstruction = lenInstruction;
201
202         // Initialize other program-related variables
203
flags = 0;
204         prefix = null;
205
206         // Try various compile-time optimizations if there's a program
207
if (instruction != null && lenInstruction != 0) {
208
209             // If the first node is a branch
210
if (lenInstruction >= RE.nodeSize &&
211                     instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH) {
212
213                 // to the end node
214
int next = instruction[0 + RE.offsetNext];
215
216                 if (instruction[next + RE.offsetOpcode] == RE.OP_END) {
217
218                     // and the branch starts with an atom
219
if (lenInstruction >= (RE.nodeSize * 2) &&
220                             instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM) {
221
222                         // then get that atom as an prefix because there's no other choice
223
int lenAtom = instruction[RE.nodeSize +
224                                 RE.offsetOpdata];
225                         prefix = new char[lenAtom];
226                         System.arraycopy(instruction, RE.nodeSize * 2, prefix,
227                                 0, lenAtom);
228                     }
229                 }
230             }
231             BackrefScanLoop:
232
233                         // Check for backreferences
234
for (int i = 0; i < lenInstruction; i += RE.nodeSize) {
235                             switch (instruction[i + RE.offsetOpcode]) {
236                                 case RE.OP_ANYOF:
237                                     i += (instruction[i + RE.offsetOpdata] * 2);
238                                     break;
239
240                                 case RE.OP_ATOM:
241                                     i += instruction[i + RE.offsetOpdata];
242                                     break;
243
244                                 case RE.OP_BACKREF:
245                                     flags |= OPT_HASBACKREFS;
246                                     break BackrefScanLoop;
247                             }
248                         }
249         }
250     }
251 }
Popular Tags