KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > turbine > util > parser > DataStreamParser


1 package org.apache.turbine.util.parser;
2
3 /*
4  * Copyright 2001-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License")
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.BufferedReader JavaDoc;
20 import java.io.IOException JavaDoc;
21 import java.io.InputStreamReader JavaDoc;
22 import java.io.Reader JavaDoc;
23 import java.io.StreamTokenizer JavaDoc;
24
25 import java.util.ArrayList JavaDoc;
26 import java.util.Iterator JavaDoc;
27 import java.util.List JavaDoc;
28 import java.util.NoSuchElementException JavaDoc;
29
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32
33 /**
34  * DataStreamParser is used to parse a stream with a fixed format and
35  * generate ValueParser objects which can be used to extract the values
36  * in the desired type.
37  *
38  * <p>The class itself is abstract - a concrete subclass which implements
39  * the initTokenizer method such as CSVParser or TSVParser is required
40  * to use the functionality.
41  *
42  * <p>The class implements the java.util.Iterator interface for convenience.
43  * This allows simple use in a Velocity template for example:
44  *
45  * <pre>
46  * #foreach ($row in $datastream)
47  * Name: $row.Name
48  * Description: $row.Description
49  * #end
50  * </pre>
51  *
52  * @author <a HREF="mailto:sean@informage.net">Sean Legassick</a>
53  * @author <a HREF="mailto:martin@mvdb.net">Martin van den Bemt</a>
54  * @version $Id: DataStreamParser.java,v 1.1.2.3 2004/08/14 20:11:43 henning Exp $
55  */

56 public abstract class DataStreamParser implements Iterator JavaDoc
57 {
58     /** Logging */
59     private static Log log = LogFactory.getLog(DataStreamParser.class);
60
61     /**
62      * Conditional compilation flag.
63      */

64     private static final boolean DEBUG = false;
65
66     /**
67      * The constant for empty fields
68      */

69     protected static final String JavaDoc EMPTYFIELDNAME = "UNKNOWNFIELD";
70
71     /**
72      * The list of column names.
73      */

74     private List JavaDoc columnNames;
75
76     /**
77      * The stream tokenizer for reading values from the input reader.
78      */

79     private StreamTokenizer JavaDoc tokenizer;
80
81     /**
82      * The parameter parser holding the values of columns for the current line.
83      */

84     private ValueParser lineValues;
85
86     /**
87      * Indicates whether or not the tokenizer has read anything yet.
88      */

89     private boolean neverRead = true;
90
91     /**
92      * The character encoding of the input
93      */

94     private String JavaDoc characterEncoding;
95
96     /**
97      * The fieldseperator, which can be almost any char
98      */

99     private char fieldSeparator;
100
101     /**
102      * Create a new DataStreamParser instance. Requires a Reader to read the
103      * comma-separated values from, a list of column names and a
104      * character encoding.
105      *
106      * @param in the input reader.
107      * @param columnNames a list of column names.
108      * @param characterEncoding the character encoding of the input.
109      */

110     public DataStreamParser(Reader JavaDoc in, List JavaDoc columnNames,
111                             String JavaDoc characterEncoding)
112     {
113         this.columnNames = columnNames;
114         this.characterEncoding = characterEncoding;
115
116         if (this.characterEncoding == null)
117         {
118             // try and get the characterEncoding from the reader
119
this.characterEncoding = "US-ASCII";
120             try
121             {
122                 this.characterEncoding = ((InputStreamReader JavaDoc) in).getEncoding();
123             }
124             catch (ClassCastException JavaDoc e)
125             {
126             }
127         }
128
129         tokenizer = new StreamTokenizer JavaDoc(new BufferedReader JavaDoc(in));
130         initTokenizer(tokenizer);
131     }
132
133     /**
134      * Initialize the StreamTokenizer instance used to read the lines
135      * from the input reader. This must be implemented in subclasses to
136      * set up other tokenizing properties.
137      *
138      * @param tokenizer the tokenizer to adjust
139      */

140     protected void initTokenizer(StreamTokenizer JavaDoc tokenizer)
141     {
142         // set all numeric characters as ordinary characters
143
// (switches off number parsing)
144
tokenizer.ordinaryChars('0', '9');
145         tokenizer.ordinaryChars('-', '-');
146         tokenizer.ordinaryChars('.', '.');
147
148         // leave out the comma sign (,), we need it for empty fields
149

150         tokenizer.wordChars(' ', Integer.MAX_VALUE);
151
152         // and set the quote mark as the quoting character
153
tokenizer.quoteChar('"');
154
155         // and finally say that end of line is significant
156
tokenizer.eolIsSignificant(true);
157     }
158
159     /**
160      * This method must be called to setup the field seperator
161      * @param fieldSeparator the char which separates the fields
162      */

163     public void setFieldSeparator(char fieldSeparator)
164     {
165         this.fieldSeparator = fieldSeparator;
166         // make this field also an ordinary char by default.
167
tokenizer.ordinaryChar(fieldSeparator);
168     }
169
170     /**
171      * Set the list of column names explicitly.
172      *
173      * @param columnNames A list of column names.
174      */

175     public void setColumnNames(List JavaDoc columnNames)
176     {
177         this.columnNames = columnNames;
178     }
179
180     /**
181      * Read the list of column names from the input reader using the
182      * tokenizer. If fieldNames are empty, we use the current fieldNumber
183      * + the EMPTYFIELDNAME to make one up.
184      *
185      * @exception IOException an IOException occurred.
186      */

187     public void readColumnNames()
188             throws IOException JavaDoc
189     {
190         columnNames = new ArrayList JavaDoc();
191         int lastTtype = 0;
192         int fieldCounter = 1;
193
194         neverRead = false;
195         tokenizer.nextToken();
196         while (tokenizer.ttype == StreamTokenizer.TT_WORD || tokenizer.ttype == StreamTokenizer.TT_EOL
197                 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator)
198         {
199             if (tokenizer.ttype != fieldSeparator && tokenizer.ttype != StreamTokenizer.TT_EOL)
200             {
201                 columnNames.add(tokenizer.sval);
202                 fieldCounter++;
203             }
204             else if (tokenizer.ttype == fieldSeparator && lastTtype == fieldSeparator)
205             {
206                 // we have an empty field name
207
columnNames.add(EMPTYFIELDNAME + fieldCounter);
208                 fieldCounter++;
209             }
210             else if (lastTtype == fieldSeparator && tokenizer.ttype == StreamTokenizer.TT_EOL)
211             {
212                 columnNames.add(EMPTYFIELDNAME + fieldCounter);
213                 break;
214             }
215             else if (tokenizer.ttype == StreamTokenizer.TT_EOL)
216             {
217                 break;
218             }
219             lastTtype = tokenizer.ttype;
220             tokenizer.nextToken();
221         }
222     }
223
224     /**
225      * Determine whether a further row of values exists in the input.
226      *
227      * @return true if the input has more rows.
228      * @exception IOException an IOException occurred.
229      */

230     public boolean hasNextRow()
231             throws IOException JavaDoc
232     {
233         // check for end of line ensures that an empty last line doesn't
234
// give a false positive for hasNextRow
235
if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
236         {
237             tokenizer.nextToken();
238             tokenizer.pushBack();
239             neverRead = false;
240         }
241         return tokenizer.ttype != StreamTokenizer.TT_EOF;
242     }
243
244     /**
245      * Returns a ValueParser object containing the next row of values.
246      *
247      * @return a ValueParser object.
248      * @exception IOException an IOException occurred.
249      * @exception NoSuchElementException there are no more rows in the input.
250      */

251     public ValueParser nextRow()
252             throws IOException JavaDoc, NoSuchElementException JavaDoc
253     {
254         if (!hasNextRow())
255         {
256             throw new NoSuchElementException JavaDoc();
257         }
258
259         if (lineValues == null)
260         {
261             lineValues = new BaseValueParser(characterEncoding);
262         }
263         else
264         {
265             lineValues.clear();
266         }
267
268         Iterator JavaDoc it = columnNames.iterator();
269         tokenizer.nextToken();
270         while (tokenizer.ttype == StreamTokenizer.TT_WORD
271                 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator)
272         {
273             int lastTtype = 0;
274             // note this means that if there are more values than
275
// column names, the extra values are discarded.
276
if (it.hasNext())
277             {
278                 String JavaDoc colname = it.next().toString();
279                 String JavaDoc colval = tokenizer.sval;
280                 if (tokenizer.ttype != fieldSeparator && lastTtype != fieldSeparator)
281                 {
282                     if (DEBUG)
283                     {
284                         log.debug("DataStreamParser.nextRow(): " +
285                                 colname + "=" + colval);
286                     }
287                     lineValues.add(colname, colval);
288                 }
289                 else if (tokenizer.ttype == fieldSeparator && lastTtype != fieldSeparator)
290                 {
291                     lastTtype = tokenizer.ttype;
292                     tokenizer.nextToken();
293                     if (tokenizer.ttype != fieldSeparator && tokenizer.sval != null)
294                     {
295                         lineValues.add(colname, tokenizer.sval);
296                     }
297                     else if (tokenizer.ttype == StreamTokenizer.TT_EOL)
298                     {
299                         tokenizer.pushBack();
300                     }
301                 }
302             }
303             tokenizer.nextToken();
304         }
305
306         return lineValues;
307     }
308
309     /**
310      * Determine whether a further row of values exists in the input.
311      *
312      * @return true if the input has more rows.
313      */

314     public boolean hasNext()
315     {
316         boolean hasNext = false;
317
318         try
319         {
320             hasNext = hasNextRow();
321         }
322         catch (IOException JavaDoc e)
323         {
324             log.error("IOException in CSVParser.hasNext", e);
325         }
326
327         return hasNext;
328     }
329
330     /**
331      * Returns a ValueParser object containing the next row of values.
332      *
333      * @return a ValueParser object as an Object.
334      * @exception NoSuchElementException there are no more rows in the input
335      * or an IOException occurred.
336      */

337     public Object JavaDoc next()
338             throws NoSuchElementException JavaDoc
339     {
340         Object JavaDoc nextRow = null;
341
342         try
343         {
344             nextRow = nextRow();
345         }
346         catch (IOException JavaDoc e)
347         {
348             log.error("IOException in CSVParser.next", e);
349             throw new NoSuchElementException JavaDoc();
350         }
351
352         return nextRow;
353     }
354
355     /**
356      * The optional Iterator.remove method is not supported.
357      *
358      * @exception UnsupportedOperationException the operation is not supported.
359      */

360     public void remove()
361             throws UnsupportedOperationException JavaDoc
362     {
363         throw new UnsupportedOperationException JavaDoc();
364     }
365 }
366
Popular Tags