KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > regexp > REProgram


1 package org.apache.regexp;
2
3 /*
4  * ====================================================================
5  *
6  * The Apache Software License, Version 1.1
7  *
8  * Copyright (c) 1999-2003 The Apache Software Foundation. All rights
9  * reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in
20  * the documentation and/or other materials provided with the
21  * distribution.
22  *
23  * 3. The end-user documentation included with the redistribution, if
24  * any, must include the following acknowlegement:
25  * "This product includes software developed by the
26  * Apache Software Foundation (http://www.apache.org/)."
27  * Alternately, this acknowlegement may appear in the software itself,
28  * if and wherever such third-party acknowlegements normally appear.
29  *
30  * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
31  * Foundation" must not be used to endorse or promote products derived
32  * from this software without prior written permission. For written
33  * permission, please contact apache@apache.org.
34  *
35  * 5. Products derived from this software may not be called "Apache"
36  * nor may "Apache" appear in their names without prior written
37  * permission of the Apache Group.
38  *
39  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
41  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
42  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
43  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
45  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
46  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
47  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
49  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  * ====================================================================
52  *
53  * This software consists of voluntary contributions made by many
54  * individuals on behalf of the Apache Software Foundation. For more
55  * information on the Apache Software Foundation, please see
56  * <http://www.apache.org/>.
57  *
58  */

59
60 import java.io.Serializable JavaDoc;
61
62 /**
63  * A class that holds compiled regular expressions. This is exposed mainly
64  * for use by the recompile utility (which helps you produce precompiled
65  * REProgram objects). You should not otherwise need to work directly with
66  * this class.
67 *
68  * @see RE
69  * @see RECompiler
70  *
71  * @author <a HREF="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
72  * @version $Id: REProgram.java,v 1.3 2003/05/02 01:03:47 vgritsenko Exp $
73  */

74 public class REProgram implements Serializable JavaDoc
75 {
76     static final int OPT_HASBACKREFS = 1;
77
78     char[] instruction; // The compiled regular expression 'program'
79
int lenInstruction; // The amount of the instruction buffer in use
80
char[] prefix; // Prefix string optimization
81
int flags; // Optimization flags (REProgram.OPT_*)
82
int maxParens = -1;
83
84     /**
85      * Constructs a program object from a character array
86      * @param instruction Character array with RE opcode instructions in it
87      */

88     public REProgram(char[] instruction)
89     {
90         this(instruction, instruction.length);
91     }
92
93     /**
94      * Constructs a program object from a character array
95      * @param parens Count of parens in the program
96      * @param instruction Character array with RE opcode instructions in it
97      */

98     public REProgram(int parens, char[] instruction)
99     {
100         this(instruction, instruction.length);
101         this.maxParens = parens;
102     }
103
104     /**
105      * Constructs a program object from a character array
106      * @param instruction Character array with RE opcode instructions in it
107      * @param lenInstruction Amount of instruction array in use
108      */

109     public REProgram(char[] instruction, int lenInstruction)
110     {
111         setInstructions(instruction, lenInstruction);
112     }
113
114     /**
115      * Returns a copy of the current regular expression program in a character
116      * array that is exactly the right length to hold the program. If there is
117      * no program compiled yet, getInstructions() will return null.
118      * @return A copy of the current compiled RE program
119      */

120     public char[] getInstructions()
121     {
122         // Ensure program has been compiled!
123
if (lenInstruction != 0)
124         {
125             // Return copy of program
126
char[] ret = new char[lenInstruction];
127             System.arraycopy(instruction, 0, ret, 0, lenInstruction);
128             return ret;
129         }
130         return null;
131     }
132
133     /**
134      * Sets a new regular expression program to run. It is this method which
135      * performs any special compile-time search optimizations. Currently only
136      * two optimizations are in place - one which checks for backreferences
137      * (so that they can be lazily allocated) and another which attempts to
138      * find an prefix anchor string so that substantial amounts of input can
139      * potentially be skipped without running the actual program.
140      * @param instruction Program instruction buffer
141      * @param lenInstruction Length of instruction buffer in use
142      */

143     public void setInstructions(char[] instruction, int lenInstruction)
144     {
145         // Save reference to instruction array
146
this.instruction = instruction;
147         this.lenInstruction = lenInstruction;
148
149         // Initialize other program-related variables
150
flags = 0;
151         prefix = null;
152
153         // Try various compile-time optimizations if there's a program
154
if (instruction != null && lenInstruction != 0)
155         {
156             // If the first node is a branch
157
if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
158             {
159                 // to the end node
160
int next = instruction[0 + RE.offsetNext];
161                 if (instruction[next + RE.offsetOpcode] == RE.OP_END)
162                 {
163                     // and the branch starts with an atom
164
if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
165                     {
166                         // then get that atom as an prefix because there's no other choice
167
int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
168                         prefix = new char[lenAtom];
169                         System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
170                     }
171                 }
172             }
173
174             BackrefScanLoop:
175
176             // Check for backreferences
177
for (int i = 0; i < lenInstruction; i += RE.nodeSize)
178             {
179                 switch (instruction[i + RE.offsetOpcode])
180                 {
181                     case RE.OP_ANYOF:
182                         i += (instruction[i + RE.offsetOpdata] * 2);
183                         break;
184
185                     case RE.OP_ATOM:
186                         i += instruction[i + RE.offsetOpdata];
187                         break;
188
189                     case RE.OP_BACKREF:
190                         flags |= OPT_HASBACKREFS;
191                         break BackrefScanLoop;
192                 }
193             }
194         }
195     }
196 }
197
Popular Tags