KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sqlmagic > tinysql > FieldTokenizer


1 /*
2  * This class provides a more sophisticated tokenizer than that available
3  * with StringTokenizer. In particular, it handles tokenizing of fields
4  * in brackets, and will ignore separators in quotes or brackets.
5  *
6  * $Author: davis $
7  * $Date: 2004/12/18 21:29:04 $
8  * $Revision: 1.1 $
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  *
24  * Revision History:
25  *
26  * Written by Davis Swan in April, 2004.
27  */

28 package com.sqlmagic.tinysql;
29
30 import java.text.*;
31 import java.util.*;
32 import java.lang.*;
33
34 public class FieldTokenizer
35 {
36    String JavaDoc[] fields;
37    int fieldIndex;
38 /*
39  * Split an input string into fields based upon the input separator, ignoring
40  * separators that might occur within brackets or quoted string. If the
41  * separator is (, return strings outside and inside of the brackets. The
42  * parameter returnSep indicates whether or not the actual separator characters
43  * themselves should be returned.
44  */

45    public FieldTokenizer(String JavaDoc inputString,char separator, boolean returnSep)
46    {
47       char quoteChar,nextChar,bracketQuoteChar;
48       char[] charArray = {' '};
49       Vector tempStrings;
50       int i,leftBracketCount,rightBracketCount,startPosn,endPosn;
51       String JavaDoc tempString;
52       boolean debug=false;
53       if ( inputString.indexOf(separator) < 0 )
54       {
55          fields = new String JavaDoc[1];
56          fields[0] = inputString;
57       }
58       if ( debug )
59          System.out.println("FieldTokenizer: "
60          + " separator is " + separator + " string is <" + inputString + ">");
61       charArray[0] = separator;
62       tempStrings = new Vector();
63       leftBracketCount = 0;
64       rightBracketCount = 0;
65       quoteChar = ' ';
66       bracketQuoteChar = ' ';
67       startPosn = 0;
68       endPosn = 0;
69       for ( i = 0; i < inputString.length(); i++ )
70       {
71          nextChar = inputString.charAt(i);
72          endPosn = i;
73          if ( nextChar == '\'' | nextChar == '"' )
74          {
75 /*
76  * Set the bracketQuoteChar for quotes within a bracket
77  * delimited string. This will allow handling of brackets
78  * within quoted strings that are embedded within the brackets.
79  */

80             if ( leftBracketCount > 0 )
81             {
82                if ( bracketQuoteChar == ' ' )
83                   bracketQuoteChar = nextChar;
84                else if ( nextChar == bracketQuoteChar )
85                   bracketQuoteChar = ' ';
86                 continue;
87             }
88             if ( quoteChar == ' ' )
89                quoteChar = nextChar;
90             else if ( nextChar == quoteChar )
91             {
92 /*
93  * A matching quote character has been found. Check for two
94  * adjacent single quotes which represent an embedded single
95  * quote.
96  */

97                if ( i < inputString.length() - 1 & quoteChar == '\'' )
98                {
99                   if ( inputString.charAt(i + 1) == '\'' ) i++;
100                   else quoteChar = ' ';
101                } else {
102                   quoteChar = ' ';
103                }
104             }
105          } else if ( nextChar == '(' | nextChar == ')' ) {
106 /*
107  * Ignore brackets inside quoted strings.
108  */

109             if ( quoteChar != ' ' | bracketQuoteChar != ' ' ) continue;
110             if ( nextChar == '(' )
111             {
112                leftBracketCount++;
113 /*
114  * If bracket is the separator, return the string before the
115  * left bracket.
116  */

117                if ( separator == '(' & leftBracketCount == 1 )
118                {
119                   tempString = "";
120                   if ( endPosn > startPosn )
121                      tempString = inputString.substring(startPosn,endPosn);
122                   if ( tempString.trim().length() > 0 )
123                      tempStrings.addElement(tempString.trim());
124                   if ( returnSep ) tempStrings.addElement("(");
125                   startPosn = endPosn + 1;
126                }
127             } else if ( nextChar == ')' ) {
128 /*
129  * Handle nested sets of brackets.
130  */

131                rightBracketCount++;
132                if ( leftBracketCount > 0 &
133                     leftBracketCount == rightBracketCount )
134                {
135                   if ( separator == '(' )
136                   {
137 /*
138  * If bracket is the separator, return the string between the
139  * brackets.
140  */

141                      tempString = "";
142                      if ( endPosn > startPosn )
143                         tempString = inputString.substring(startPosn,endPosn);
144                      if ( tempString.trim().length() > 0)
145                         tempStrings.addElement(tempString.trim());
146                      if ( returnSep ) tempStrings.addElement(")");
147                      startPosn = endPosn + 1;
148                   }
149                   leftBracketCount = 0;
150                   rightBracketCount = 0;
151                }
152             }
153 /*
154  * If the separator character has been found and we are not within
155  * brackets and we are not within a quoted string (as indicated
156  * by a blank quoteChar value), then build the next output string.
157  */

158          } else if ( nextChar == separator & leftBracketCount == 0 &
159                      quoteChar == ' ' ) {
160            
161             tempString = "";
162             if ( endPosn > startPosn )
163                tempString = inputString.substring(startPosn,endPosn).trim();
164             if ( tempString.length() > 0 ) tempStrings.addElement(tempString);
165             if ( returnSep )
166                tempStrings.addElement(new String JavaDoc(charArray));
167             startPosn = endPosn + 1;
168          }
169       }
170 /*
171  * Pick up the last string if there is one.
172  */

173       if ( endPosn >= startPosn )
174       {
175          tempString = inputString.substring(startPosn,endPosn+1).trim();
176          if ( tempString.length() > 0 )
177             tempStrings.addElement(tempString);
178       }
179 /*
180  * Create output string array from Vector.
181  */

182       if ( tempStrings.size() == 0 )
183       {
184          fields = new String JavaDoc[1];
185          fields[0] = inputString;
186          if ( debug )
187             System.out.println("FieldTokenizer output: <" + inputString + ">");
188       } else {
189          fields = new String JavaDoc[tempStrings.size()];
190          for ( i = 0; i < tempStrings.size(); i++ )
191          {
192             fields[i] = (String JavaDoc)tempStrings.elementAt(i);
193             if ( debug )
194                System.out.println("FieldTokenizer output[" + i + "]: <"
195                + fields[i] + ">");
196          }
197       }
198       fieldIndex = 0;
199    }
200 /*
201  * Method to return the fields as an array of strings.
202  */

203    public String JavaDoc[] getFields()
204    {
205       return fields;
206    }
207 /*
208  * Method to return the count of fields.
209  */

210    public int countFields()
211    {
212       return fields.length;
213    }
214 /*
215  * Methods to return a particular field. A default value can be provided.
216  */

217    public String JavaDoc getField(int inputIndex)
218    {
219       return getField(inputIndex,"NULL");
220    }
221    public String JavaDoc getField(int inputIndex,String JavaDoc defaultString)
222    {
223       if ( inputIndex < 0 | inputIndex >= fields.length )
224          return defaultString;
225       else
226          return fields[inputIndex];
227    }
228    public int getInt(int inputIndex,int defaultInt)
229    {
230       String JavaDoc numStr;
231       int outputInt;
232       if ( inputIndex < 0 | inputIndex >= fields.length )
233          return defaultInt;
234       else
235       {
236          numStr = getField(inputIndex);
237          try
238          {
239             return Integer.parseInt(numStr);
240          } catch (Exception JavaDoc e) {
241             return defaultInt;
242          }
243       }
244    }
245 /*
246  * Methods used to get fields sequentially.
247  */

248    public boolean hasMoreFields()
249    {
250       if ( fieldIndex < fields.length ) return true;
251       else return false;
252    }
253    public String JavaDoc nextField()
254    {
255       String JavaDoc returnString;
256       if ( fieldIndex < fields.length )
257       {
258          returnString = fields[fieldIndex];
259          fieldIndex++;
260       }
261       else
262          returnString = (String JavaDoc)null;
263       return returnString;
264    }
265 }
266
Popular Tags