KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > scriptella > jdbc > SqlReaderTokenizer


1 /*
2  * Copyright 2006-2007 The Scriptella Project Team.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package scriptella.jdbc;
17
18 import scriptella.util.StringUtils;
19
20 import java.io.IOException JavaDoc;
21 import java.io.Reader JavaDoc;
22 import java.util.ArrayList JavaDoc;
23 import java.util.List JavaDoc;
24
25 /**
26  * Reader based SQL tokenizer.
27  * <p>This class splits sql statements using a specifed
28  * {@link #setSeparator(String) separator string}.
29  * <p>The ? injections in quoted literals and comments are skipped.
30  * The $ substitutions are skipped only in comments.
31  * <p>This class became too complex and <b>needs to be refactored</b>.
32  *
33  * @author Fyodor Kupolov
34  * @version 1.0
35  */

36 public final class SqlReaderTokenizer implements SqlTokenizer {
37     private final ReaderWrapper reader;
38     private final List JavaDoc<Integer JavaDoc> injections = new ArrayList JavaDoc<Integer JavaDoc>();
39     private final StringBuilder JavaDoc sb = new StringBuilder JavaDoc(80);
40     private char[] separator = DEFAULT_SEPARATOR;
41     private boolean separatorOnSingleLine;
42     private boolean keepFormat;
43     private SeparatorMatcher separatorMatcher = new SeparatorMatcher();
44     private static final char[] DEFAULT_SEPARATOR = ";".toCharArray();
45
46     public SqlReaderTokenizer(Reader JavaDoc reader) {
47         this.reader = new ReaderWrapper(reader);
48     }
49
50     public SqlReaderTokenizer(Reader JavaDoc reader, String JavaDoc separator, boolean separatorOnSingleLine, boolean keepFormat) {
51         this.reader = new ReaderWrapper(reader);
52         setKeepFormat(keepFormat);
53         setSeparator(separator);
54         setSeparatorOnSingleLine(separatorOnSingleLine);
55     }
56
57     private int position;
58     private char previousChar;
59     private char currentChar;
60
61     private int lastLineStart;
62
63
64     public String JavaDoc nextStatement() throws IOException JavaDoc {
65         sb.setLength(0);
66         injections.clear();
67         final boolean newLineMode = separatorOnSingleLine; //make a local copy for performance reasons
68
final boolean defaultMode = !newLineMode;
69         boolean whitespacesOnly = true;
70         final char sep0 = separator[0];
71         lastLineStart = 0;
72
73         previousChar = (char) -1;
74         int n;
75         for (position = 0; (n = readNormalizedChar()) >= 0; position++) {
76             currentChar=(char) n;
77             sb.append(currentChar);
78             //Checking separator substring
79
if ((currentChar == sep0) && //if matched a first separator char
80
//and no whitespaces in new line mode or not a new line mode
81
((newLineMode && whitespacesOnly) || defaultMode)) {
82                 if (separatorMatcher.matches()) { //try to match the whole string
83
return sb.toString();
84                 }
85             }
86             if (newLineMode && currentChar > 32) {
87                 whitespacesOnly = false;
88             }
89             switch (currentChar) {
90                 case '-':
91                     if (previousChar == '-') { //Comment
92
seekEndLineComment();
93                         whitespacesOnly = true;
94                     }
95                     break;
96                 case '/':
97                     if (previousChar == '/') { //Comment
98
seekEndLineComment();
99                         whitespacesOnly = true;
100                     }
101                     break;
102                 case '*':
103                     if (previousChar == '/') {
104                         seekEndCStyleComment();
105                     }
106                     break;
107                 case '"':
108                     seekQuote('\"');
109                     break;
110                 case '\'':
111                     seekQuote('\'');
112                     break;
113                 case '?':
114                 case '$':
115                     injections.add(position);
116                     break;
117                 case '\r':
118                 case '\n': //new line started
119
whitespacesOnly = true;
120                     lastLineStart = position + 1;
121                     break;
122             }
123
124             previousChar = currentChar;
125         }
126         if (sb.length() > 0) {
127             return sb.toString();
128         } else return n >= 0 ? "" : null;
129     }
130
131     private int readNormalizedChar() throws IOException JavaDoc {
132         for (int n;(n = reader.read())>=0;) {
133             if (!keepFormat) {
134                 //Normalize char \r,\n->\n ; any whitespace transformed to space
135
//If previous char was also a whitespace - this char is ignored
136
if (n=='\n' || n=='\r') {
137                     if (previousChar=='\n') {
138                         continue;
139                     }
140                     n='\n';
141                 } else if (n <= ' ') {
142                     if (previousChar == ' ' || previousChar == '\n') {
143                         continue;
144                     }
145                     n = ' ';
146                 }
147             }
148             return n;
149         }
150         return -1;
151     }
152
153     public int[] getInjections() {
154         int n = injections.size();
155         if (n==0) {
156             return EMPTY_INJECTIONS_ARRAY;
157         }
158         int[] res = new int[n];
159         for (int i = 0; i < n; i++) {
160             res[i]=injections.get(i);
161         }
162         return res;
163     }
164
165     private void seekQuote(char q) throws IOException JavaDoc {
166         for (int n; (n = reader.read()) >= 0; ) {
167             position++;
168             sb.append((char) n);
169             if ('$' == n) { //$ expressions are substituted in quotes
170
injections.add(position);
171             } else if (q == n) { //quote
172
return;
173             }
174         }
175     }
176
177     private void seekEndLineComment() throws IOException JavaDoc {
178         if (!keepFormat) {
179             position-=2;
180             sb.setLength(position+1);
181         }
182         for (int n; (n = reader.read()) >= 0; ) {
183             if (keepFormat) {
184                 position++;
185                 sb.append((char) n);
186             }
187             if ('\r' == n || '\n' == n) { //EOL
188
if (!keepFormat) {
189                     position++;
190                     sb.append('\n');
191                     currentChar='\n';
192                 } else {
193                     currentChar=(char) n;
194                 }
195                 lastLineStart = position + 1; //remember the new line position
196
return;
197             }
198         }
199     }
200
201     /**
202      * Seeks until end c-style comment * /.
203      * If keepFormat=false, the comment string is not appended to the buffer.
204      * @throws IOException if I/O error occurs
205      */

206     private void seekEndCStyleComment() throws IOException JavaDoc {
207         boolean firstChar=true;
208         boolean copyChars=true;
209         for (int n; (n = reader.read()) >= 0; ) {
210             //Oracle is extraordinary as always ;)
211
//if oracle hint, i.e. /*+ and keepformat=false
212
if (firstChar && !keepFormat && n!='+') {
213                 position-=2;
214                 sb.setLength(position+1);
215                 copyChars=false;
216             }
217             firstChar=false;
218
219             if (copyChars) {
220                 position++;
221                 sb.append((char) n);
222             }
223             if ('/' == n && previousChar == '*') { // / * Comment
224
currentChar = (char) -1;
225                 return;
226             }
227             previousChar = (char) n;
228         }
229     }
230
231     public String JavaDoc getSeparator() {
232         return new String JavaDoc(separator).intern();
233     }
234
235     /**
236      * Sets statements separator.
237      *
238      * @param separator statements separator. Default value is &quot;;&quot;
239      */

240     public void setSeparator(String JavaDoc separator) {
241         if (StringUtils.isEmpty(separator)) {
242             throw new IllegalArgumentException JavaDoc("separator string cannot be empty");
243         }
244         this.separator = separator.toCharArray();
245     }
246
247     public boolean isSeparatorOnSingleLine() {
248         return separatorOnSingleLine;
249     }
250
251     /**
252      * Sets the separator mode.
253      *
254      * @param separatorOnSingleLine true if {@link #separator} must be on a single line.
255      */

256     public void setSeparatorOnSingleLine(boolean separatorOnSingleLine) {
257         this.separatorOnSingleLine = separatorOnSingleLine;
258     }
259     
260     /**
261      * Returns true if preserve comments and whitespaces. Default value is <b><code>false</code></b>
262      * @return <tt>false</tt> by default
263      */

264     public boolean isKeepFormat() {
265         return keepFormat;
266     }
267
268     /**
269      * Keep original text format, i.e. preserve comments and whitespaces.
270      * @param keepFormat true if comments/whitespaces should be preserved.
271      */

272     public void setKeepFormat(boolean keepFormat) {
273         this.keepFormat = keepFormat;
274     }
275
276     private class SeparatorMatcher {
277         private boolean matches() throws IOException JavaDoc {
278             final int separatorLength = separator.length;
279             for (int j = 1, n; (j < separatorLength) && (n = reader.read()) >= 0; j++) {
280                 position++;
281                 previousChar = currentChar;
282                 currentChar = (char) n;
283                 sb.append(currentChar);
284                 if (separator[j] != n) {
285                     return false;
286                 }
287
288             }
289             if (!separatorOnSingleLine) {
290                 final int len = sb.length();
291                 sb.setLength(len - separatorLength);
292                 return true;
293             } else {
294                 for (int n; (n = reader.read()) >= 0;) {
295                     position++;
296                     previousChar = currentChar;
297                     currentChar = (char) n;
298                     sb.append(currentChar);
299                     if (n > 32) {
300                         return false;
301                     } else if (n == '\r' || n == '\n') {
302                         break;
303                     }
304                 }
305                 sb.setLength(lastLineStart);
306                 return true;
307             }
308         }
309     }
310
311
312     /**
313      * Unsynchronized buffered wrapper for a reader.
314      * <p>Used for performance reasons to avoid multiple calls to underlying reader implementation,
315      * this class is faster and lighter than BufferedReader for our case.
316      */

317     private static final class ReaderWrapper {
318         /**
319          * Size of internal buffer
320          */

321         private static final int BUF_SIZE=512; //optimal for small and huge scripts
322
private char[] buf=new char[BUF_SIZE];
323         private int bufSize;
324         private int bufPos;
325         private final Reader JavaDoc reader;
326
327         ReaderWrapper(Reader JavaDoc reader) {
328             this.reader = reader;
329         }
330
331         /**
332          * Reads a character for reader.
333          * <p>Internal bufferring is used for performance reasons.
334          * @return a character read.
335          * @throws IOException if I/O exception occurs
336          */

337         private int read() throws IOException JavaDoc {
338             if (bufPos>=bufSize) { //buffer is empty
339
bufSize=reader.read(buf, 0, BUF_SIZE);
340                 if (bufSize<0) {
341                     return -1;
342                 }
343                 bufPos=0;
344             }
345             return buf[bufPos++];
346         }
347
348     }
349
350     public void close() throws IOException JavaDoc {
351         reader.reader.close();
352     }
353 }
354
355
Popular Tags