KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > Ostermiller > util > LabeledCSVParser


1 /*
2  * Read files in comma separated value format with a fist line of labels.
3  *
4  * Copyright (C) 2004 Campbell, Allen T. <allenc28@yahoo.com>
5  *
6  * Copyright (C) 2004 Stephen Ostermiller
7  * http://ostermiller.org/contact.pl?regarding=Java+Utilities
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * See COPYING.TXT for details.
20  */

21 package com.Ostermiller.util;
22
23 import java.io.IOException JavaDoc;
24 import java.util.HashMap JavaDoc;
25 import java.util.Map JavaDoc;
26
27 /**
28  * Decorate a CSVParse object to provide an index of field names. Many (most?)
29  * CSV files have a list of field names (labels) as the first line. A
30  * LabeledCSVParser will consume this line automatically. The methods
31  * {@link #getLabels()}, {@link #getLabelIndex(String)} and
32  * {@link #getValueByLabel(String)} allow these labels to be discovered and
33  * used while parsing CSV data. This class can also be used to conveniently
34  * ignore field labels if they happen to be present in a CSV file and are not
35  * desired.
36  *
37  * @author Campbell, Allen T. <allenc28@yahoo.com>
38  * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
39  * @since ostermillerutils 1.03.00
40  */

41 public class LabeledCSVParser implements CSVParse {
42
43     /**
44      * Class which actually does the parsing. Called for most methods.
45      *
46      * @since ostermillerutils 1.03.00
47      */

48     private CSVParse parse;
49
50     /**
51      * The first line of the CSV file - treated specially as labels.
52      * Set by setLabels.
53      *
54      * @since ostermillerutils 1.03.00
55      */

56     private String JavaDoc[] labels;
57
58     /**
59      * Hash of the labels (String) to column number (Integer).
60      * Set by setLabels.
61      *
62      * @since ostermillerutils 1.03.00
63      */

64     private Map JavaDoc<String JavaDoc,Integer JavaDoc> labelMap;
65
66     /**
67      * The last line read from the CSV file. Saved for getValueByLabel().
68      *
69      * @since ostermillerutils 1.03.00
70      */

71     private String JavaDoc[] lastLine;
72
73     /**
74      * Set whenever nextValue is called and checked when getValueByLabel() is
75      * called to enforce incompatibility between the methods.
76      *
77      * @since ostermillerutils 1.03.00
78      */

79     private int nextValueLine = -2;
80
81     /**
82      * Construct a LabeledCSVParser on a CSVParse implementation.
83      *
84      * @param parse CSVParse implementation
85      * @throws IOException if an error occurs while reading.
86      *
87      * @since ostermillerutils 1.03.00
88      */

89     public LabeledCSVParser(CSVParse parse) throws IOException JavaDoc {
90         this.parse = parse;
91     }
92
93     /**
94      * Change this parser so that it uses a new delimiter.
95      * <p>
96      * The initial character is a comma, the delimiter cannot be changed
97      * to a quote or other character that has special meaning in CSV.
98      *
99      * @param newDelim delimiter to which to switch.
100      * @throws BadDelimiterException if the character cannot be used as a delimiter.
101      *
102      * @since ostermillerutils 1.03.00
103      */

104     public void changeDelimiter(char newDelim) throws BadDelimiterException {
105         parse.changeDelimiter(newDelim);
106     }
107
108     /**
109      * Change this parser so that it uses a new character for quoting.
110      * <p>
111      * The initial character is a double quote ("), the delimiter cannot be changed
112      * to a comma or other character that has special meaning in CSV.
113      *
114      * @param newQuote character to use for quoting.
115      * @throws BadQuoteException if the character cannot be used as a quote.
116      *
117      * @since ostermillerutils 1.03.00
118      */

119     public void changeQuote(char newQuote) throws BadQuoteException {
120         parse.changeQuote(newQuote);
121     }
122
123     /**
124      * Get all the values from the file.
125      * <p>
126      * If the file has already been partially read, only the
127      * values that have not already been read will be included.
128      * <p>
129      * Each line of the file that has at least one value will be
130      * represented. Comments and empty lines are ignored.
131      * <p>
132      * The resulting double array may be jagged.
133      * <p>
134      * The last line of the values is saved and may be accessed
135      * by getValueByLabel().
136      *
137      * @return all the values from the file or null if there are no more values.
138      * @throws IOException if an error occurs while reading.
139      *
140      * @since ostermillerutils 1.03.00
141      */

142     public String JavaDoc[][] getAllValues() throws IOException JavaDoc {
143         if (labels == null) setLabels();
144         String JavaDoc[][] allValues = parse.getAllValues();
145         if (allValues == null){
146             lastLine = null;
147         } else {
148             lastLine = allValues[allValues.length-1];
149         }
150         return allValues;
151     }
152
153     /**
154      * Get the line number that the last token came from.
155      * <p>
156      * New line breaks that occur in the middle of a token are not
157      * counted in the line number count.
158      * <p>
159      * The first line of labels does not count towards the line number.
160      *
161      * @return line number or -1 if no tokens have been returned yet.
162      *
163      * @since ostermillerutils 1.03.00
164      */

165     public int getLastLineNumber(){
166         return lastLineNumber();
167     }
168
169     /**
170      * Get the line number that the last token came from.
171      * <p>
172      * New line breaks that occur in the middle of a token are not
173      * counted in the line number count.
174      * <p>
175      * The first line of labels does not count towards the line number.
176      *
177      * @return line number or -1 if no tokens have been returned yet.
178      *
179      * @since ostermillerutils 1.03.00
180      */

181     public int lastLineNumber(){
182         int lineNum = parse.getLastLineNumber();
183         if (lineNum <= -1) return -1; // Nothing has been read yet
184
if (lineNum == 1) return -1; // only labels have been read
185
return lineNum - 1; // adjust line number to account for the label line
186
}
187
188     /**
189      * Get all the values from a line.
190      * <p>
191      * If the line has already been partially read, only the values that have not
192      * already been read will be included.
193      * <p>
194      * In addition to returning all the values from a line, LabeledCSVParser
195      * maintains a buffer of the values. This feature allows
196      * {@link #getValueByLabel(String)} to function. In this case
197      * {@link #getLine()} is used simply to iterate CSV data. The iteration ends
198      * when null is returned.
199      * <p>
200      * <b>Note:</b> The methods {@link #nextValue()} and {@link #getAllValues()}
201      * are incompatible with {@link #getValueByLabel(String)} because the former
202      * methods cause the offset of field values to shift and corrupt the internal
203      * buffer maintained by {@link #getLine}.
204      *
205      * @return all the values from the line or null if there are no more values.
206      * @throws IOException if an error occurs while reading.
207      *
208      * @since ostermillerutils 1.03.00
209      */

210     public String JavaDoc[] getLine() throws IOException JavaDoc {
211         if (labels == null) setLabels();
212         lastLine = parse.getLine();
213         return lastLine;
214     }
215
216     /**
217      * Read the next value from the file. The line number from
218      * which this value was taken can be obtained from getLastLineNumber().
219      * <p>
220      * This method is not compatible with getValueByLabel(). Using this
221      * method will make getValueByLabel() throw an IllegalStateException
222      * for the rest of the line.
223      *
224      * @return the next value or null if there are no more values.
225      * @throws IOException if an error occurs while reading.
226      *
227      * @since ostermillerutils 1.03.00
228      */

229     public String JavaDoc nextValue() throws IOException JavaDoc {
230         if (labels == null) setLabels();
231         String JavaDoc nextValue = parse.nextValue();
232         nextValueLine = getLastLineNumber();
233         return nextValue;
234     }
235
236     /**
237      * Initialize the LabeledCSVParser.labels member and LabeledCSVParser.labelMap
238      * member.
239      *
240      * @throws java.io.IOException
241      *
242      * @since ostermillerutils 1.03.00
243      */

244     private void setLabels() throws IOException JavaDoc {
245         labels = parse.getLine();
246         if (labels == null) return;
247         labelMap = new HashMap JavaDoc<String JavaDoc,Integer JavaDoc>();
248         for (int i = 0; i < labels.length; i++){
249             labelMap.put(labels[i], new Integer JavaDoc(i));
250         }
251     }
252
253     /**
254      * Return an array of all field names from the top
255      * of the CSV file.
256      *
257      * @return Field names.
258      *
259      * @since ostermillerutils 1.03.00
260      */

261     public String JavaDoc[] getLabels() throws IOException JavaDoc {
262         if (labels == null) setLabels();
263         return labels;
264     }
265
266     /**
267      * Get the index of the column having the given label.
268      * The {@link #getLine()} method returns an
269      * array of field values for a single record of data. This method returns
270      * the index of a member of that array based on the specified field name.
271      * The first field has the index 0.
272      *
273      * @param label The field name.
274      * @return The index of the field name, or -1 if the label does not exist.
275      * @deprecated may swallow an IOException while reading the labels - please use getLabelIdx()
276      *
277      * @since ostermillerutils 1.03.00
278      */

279     public int getLabelIndex(String JavaDoc label){
280         try {
281             return getLabelIdx(label);
282         } catch (IOException JavaDoc iox){
283             return -1;
284         }
285     }
286
287     /**
288      * Get the index of the column having the given label.
289      * The {@link #getLine()} method returns an
290      * array of field values for a single record of data. This method returns
291      * the index of a member of that array based on the specified field name.
292      * The first field has the index 0.
293      *
294      * @param label The field name.
295      * @return The index of the field name, or -1 if the label does not exist.
296      *
297      * @since ostermillerutils 1.04.02
298      */

299     public int getLabelIdx(String JavaDoc label) throws IOException JavaDoc {
300         if (labels == null) setLabels();
301         if (labelMap == null) return -1;
302         if (!labelMap.containsKey(label)) return -1;
303         return ((Integer JavaDoc)labelMap.get(label)).intValue();
304     }
305
306     /**
307      * Given the label for the column, get the column from the last line that
308      * was read. If the column cannot be found in the line, null is returned.
309      *
310      * @param label The field name.
311      * @throws IllegalStateException if nextValue has been called as part of getting the last line. nextValue is not compatible with this method.
312      * @return the value from the last line read or null if there is no such value
313      *
314      * @since ostermillerutils 1.03.00
315      */

316     public String JavaDoc getValueByLabel(String JavaDoc label) throws IllegalStateException JavaDoc {
317         if (nextValueLine == getLastLineNumber()) throw new IllegalStateException JavaDoc("nextValue() was used to get values from this line.");
318         if (lastLine == null) return null;
319         int fieldIndex;
320         try {
321             fieldIndex = getLabelIdx(label);
322         } catch (IOException JavaDoc iox){
323             // Can't happen here because the labels have been read before the first line.
324
throw new RuntimeException JavaDoc(iox);
325         }
326         if (fieldIndex == -1) return null;
327         if (fieldIndex >= lastLine.length) return null;
328         return lastLine[fieldIndex];
329     }
330
331     /**
332      * Close any stream upon which this parser is based.
333      *
334      * @throws IOException if an error occurs while closing the stream.
335      *
336      * @since ostermillerutils 1.03.00
337      */

338     public void close() throws IOException JavaDoc {
339         parse.close();
340     }
341 }
342
Popular Tags