KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > oro > text > regex > OpCode


1 package org.apache.oro.text.regex;
2
3 /* ====================================================================
4  * The Apache Software License, Version 1.1
5  *
6  * Copyright (c) 2000 The Apache Software Foundation. All rights
7  * reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in
18  * the documentation and/or other materials provided with the
19  * distribution.
20  *
21  * 3. The end-user documentation included with the redistribution,
22  * if any, must include the following acknowledgment:
23  * "This product includes software developed by the
24  * Apache Software Foundation (http://www.apache.org/)."
25  * Alternately, this acknowledgment may appear in the software itself,
26  * if and wherever such third-party acknowledgments normally appear.
27  *
28  * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
29  * must not be used to endorse or promote products derived from this
30  * software without prior written permission. For written
31  * permission, please contact apache@apache.org.
32  *
33  * 5. Products derived from this software may not be called "Apache"
34  * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
35  * name, without prior written permission of the Apache Software Foundation.
36  *
37  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  * ====================================================================
50  *
51  * This software consists of voluntary contributions made by many
52  * individuals on behalf of the Apache Software Foundation. For more
53  * information on the Apache Software Foundation, please see
54  * <http://www.apache.org/>.
55  *
56  * Portions of this software are based upon software originally written
57  * by Daniel F. Savarese. We appreciate his contributions.
58  */

59
60 /**
61  * The OpCode class should not be instantiated. It is a holder of various
62  * constants and static methods pertaining to the manipulation of the
63  * op-codes used in a compiled regular expression.
64
65  @author <a HREF="dfs@savarese.org">Daniel F. Savarese</a>
66  @version $Id: OpCode.java,v 1.1.1.1 2000/07/23 23:08:51 jon Exp $
67  */

68 final class OpCode {
69
70   private OpCode() { }
71
72   // Names, values, and descriptions of operators correspond to those of
73
// Perl regex bytecodes and for compatibility purposes are drawn from
74
// regcomp.h in the Perl source tree by Larry Wall.
75
static final char // Has Operand Meaning
76
_END = 0, // no End of program.
77
_BOL = 1, // no Match "" at beginning of line.
78
_MBOL = 2, // no Same, assuming multiline.
79
_SBOL = 3, // no Same, assuming singleline.
80
_EOL = 4, // no Match "" at end of line.
81
_MEOL = 5, // no Same, assuming multiline.
82
_SEOL = 6, // no Same, assuming singleline.
83
_ANY = 7, // no Match any one character (except newline).
84
_SANY = 8, // no Match any one character.
85
_ANYOF = 9, // yes Match character in (or not in) this class.
86
_CURLY = 10, // yes Match this simple thing {n,m} times.
87
_CURLYX = 11, // yes Match this complex thing {n,m} times.
88
_BRANCH = 12, // yes Match this alternative, or the next...
89
_BACK = 13, // no Match "", "next" ptr points backward.
90
_EXACTLY = 14, // yes Match this string (preceded by length).
91
_NOTHING = 15, // no Match empty string.
92
_STAR = 16, // yes Match this (simple) thing 0 or more times.
93
_PLUS = 17, // yes Match this (simple) thing 1 or more times.
94
_ALNUM = 18, // no Match any alphanumeric character
95
_NALNUM = 19, // no Match any non-alphanumeric character
96
_BOUND = 20, // no Match "" at any word boundary
97
_NBOUND = 21, // no Match "" at any word non-boundary
98
_SPACE = 22, // no Match any whitespace character
99
_NSPACE = 23, // no Match any non-whitespace character
100
_DIGIT = 24, // no Match any numeric character
101
_NDIGIT = 25, // no Match any non-numeric character
102
_REF = 26, // yes Match some already matched string
103
_OPEN = 27, // yes Mark this point in input as start of #n.
104
_CLOSE = 28, // yes Analogous to OPEN.
105
_MINMOD = 29, // no Next operator is not greedy.
106
_GBOL = 30, // no Matches where last m//g left off.
107
_IFMATCH = 31, // no Succeeds if the following matches.
108
_UNLESSM = 32, // no Fails if the following matches.
109
_SUCCEED = 33, // no Return from a subroutine, basically.
110
_WHILEM = 34; // no Do curly processing and see if rest matches.
111

112   // Lengths of the various operands.
113
static final int _operandLength[] = {
114     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0,
115     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0
116   };
117
118   static final char _opType[] = {
119     _END, _BOL, _BOL, _BOL, _EOL, _EOL, _EOL, _ANY, _ANY, _ANYOF, _CURLY,
120     _CURLY, _BRANCH, _BACK, _EXACTLY, _NOTHING, _STAR, _PLUS, _ALNUM,
121     _NALNUM, _BOUND, _NBOUND, _SPACE, _NSPACE, _DIGIT, _NDIGIT, _REF,
122     _OPEN, _CLOSE, _MINMOD, _BOL, _BRANCH, _BRANCH, _END, _WHILEM
123   };
124
125   static final char _opLengthVaries[] = {
126     _BRANCH, _BACK, _STAR, _PLUS, _CURLY, _CURLYX, _REF, _WHILEM
127   };
128
129   static final char _opLengthOne[] = {
130     _ANY, _SANY, _ANYOF, _ALNUM, _NALNUM, _SPACE, _NSPACE, _DIGIT, _NDIGIT
131   };
132
133   static final int _NULL_OFFSET = -1;
134   static final char _NULL_POINTER = 0;
135
136   static final int _getNextOffset(char[] program, int offset) {
137     return ((int)program[offset + 1]);
138   }
139
140   static final char _getArg1(char[] program, int offset) {
141     return program[offset + 2];
142   }
143
144   static final char _getArg2(char[] program, int offset) {
145     return program[offset + 3];
146   }
147
148   static final int _getOperand(int offset) {
149     return (offset + 2);
150   }
151
152   static final boolean _isInArray(char ch, char[] array, int start) {
153     while(start < array.length)
154       if(ch == array[start++])
155     return true;
156     return false;
157   }
158
159   static final int _getNextOperator(int offset) { return (offset + 2); }
160   static final int _getPrevOperator(int offset) { return (offset - 2); }
161
162   static final int _getNext(char[] program, int offset) {
163     int offs;
164
165     if(program == null)
166       return _NULL_OFFSET;
167
168
169     offs = _getNextOffset(program, offset);
170     if(offs == _NULL_POINTER)
171       return _NULL_OFFSET;
172
173     if(program[offset] == OpCode._BACK)
174       return (offset - offs);
175
176     return (offset + offs);
177   }
178
179   // doesn't really belong in this class, but we want Perl5Matcher not to
180
// depend on Perl5Compiler
181
static final boolean _isWordCharacter(char token) {
182     return ((token >= 'a' && token <= 'z') ||
183             (token >= 'A' && token <= 'Z') ||
184             (token >= '0' && token <= '9') ||
185             (token == '_'));
186   }
187 }
188
Popular Tags