KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > h2 > tools > Csv


1 /*
2  * Copyright 2004-2006 H2 Group. Licensed under the H2 License, Version 1.0 (http://h2database.com/html/license.html).
3  * Initial Developer: H2 Group
4  */

5 package org.h2.tools;
6
7 import java.io.*;
8 import java.sql.*;
9 import java.util.ArrayList JavaDoc;
10
11 import org.h2.util.IOUtils;
12 import org.h2.util.StringUtils;
13
14 /**
15  * A facility to read from and write to CSV (comma separated values) files.
16  */

17 public class Csv implements SimpleRowSource {
18     
19     private String JavaDoc charset = StringUtils.getDefaultCharset();
20     private int bufferSize = 8 * 1024;
21     private String JavaDoc[] columnNames;
22     private char fieldSeparatorRead = ',';
23     private char commentLineStart = '#';
24     private String JavaDoc fieldSeparatorWrite = ",";
25     private String JavaDoc rowSeparatorWrite;
26     private char fieldDelimiter = '\"';
27     private char escapeCharacter = '\"';
28     private String JavaDoc fileName;
29     private InputStream in;
30     private Reader reader;
31     private FileOutputStream out;
32     private PrintWriter writer;
33     private int back;
34     private boolean endOfLine, endOfFile;
35     
36     /**
37      * Get a new object of this class.
38      *
39      * @return the new instance
40      */

41     public static Csv getInstance() {
42         return new Csv();
43     }
44     
45     /**
46      * Writes the result set to a file in the CSV format.
47      * @param fileName
48      * @param rs the result set
49      * @param charset the charset or null to use UTF-8
50      * @throws SQLException
51      */

52     public void write(String JavaDoc fileName, ResultSet rs, String JavaDoc charset) throws SQLException {
53         ResultSetMetaData meta = rs.getMetaData();
54         init(fileName, charset);
55         try {
56             initWrite();
57             int columnCount = meta.getColumnCount();
58             String JavaDoc[] row = new String JavaDoc[columnCount];
59             for(int i=0; i<columnCount; i++) {
60                 row[i] = meta.getColumnLabel(i + 1);
61             }
62             writeRow(row);
63             while(rs.next()) {
64                 for(int i=0; i<columnCount; i++) {
65                     row[i] = rs.getString(i + 1);
66                 }
67                 writeRow(row);
68             }
69             close();
70         } catch(IOException e) {
71             throw convertException("IOException writing file " + fileName, e);
72         }
73         rs.close();
74     }
75     
76     /**
77      * Writes the result set of a query to a file in the CSV format.
78      *
79      * @param conn the connection
80      * @param fileName the file name
81      * @param sql the query
82      * @param charset the charset or null to use UTF-8
83      * @throws SQLException
84      */

85     public void write(Connection conn, String JavaDoc fileName, String JavaDoc sql, String JavaDoc charset) throws SQLException {
86         Statement stat = conn.createStatement();
87         ResultSet rs = stat.executeQuery(sql);
88         write(fileName, rs, charset);
89         stat.close();
90     }
91
92     /**
93      * Reads from the CSV file and returns a result set.
94      * The rows in the result set are created on demand,
95      * that means the file is kept open until all rows are read
96      * or the result set is closed.
97      *
98      * @param fileName the file name
99      * @param colNames or null if the column names should be read from the CSV file
100      * @param charset the charset or null to use UTF-8
101      * @return the result set
102      * @throws SQLException
103      */

104     public ResultSet read(String JavaDoc fileName, String JavaDoc[] colNames, String JavaDoc charset) throws SQLException {
105         init(fileName, charset);
106         try {
107             columnNames = colNames;
108             initRead();
109             SimpleResultSet result = new SimpleResultSet(this);
110             normalizeColumnNames();
111             for(int i=0; i<columnNames.length; i++) {
112                 result.addColumn(columnNames[i], Types.VARCHAR, 255, 0);
113             }
114             return result;
115         } catch(IOException e) {
116             throw convertException("IOException reading file " + fileName, e);
117         }
118     }
119     
120     /**
121      * Reads CSV data from a reader and returns a result set.
122      * The rows in the result set are created on demand,
123      * that means the reader is kept open until all rows are read
124      * or the result set is closed.
125      *
126      * @param reader the reader
127      * @param colNames or null if the column names should be read from the CSV file
128      * @return the result set
129      * @throws SQLException
130      */

131     public ResultSet read(Reader reader, String JavaDoc[] colNames) throws SQLException {
132         init(null, null);
133         try {
134             this.columnNames = colNames;
135             this.reader = reader;
136             initRead();
137             SimpleResultSet result = new SimpleResultSet(this);
138             normalizeColumnNames();
139             for(int i=0; i<columnNames.length; i++) {
140                 result.addColumn(columnNames[i], Types.VARCHAR, 255, 0);
141             }
142             return result;
143         } catch(IOException e) {
144             throw convertException("IOException", e);
145         }
146     }
147     
148     private void normalizeColumnNames() {
149         for(int i=0; i<columnNames.length; i++) {
150             String JavaDoc x = columnNames[i];
151             if(x == null || x.length()==0) {
152                 x = "C" + (i+1);
153             }
154             for(int j=0; j<i; j++) {
155                 String JavaDoc y = columnNames[j];
156                 if(x.equals(y)) {
157                     x = x + "1";
158                     j = -1;
159                 }
160             }
161             columnNames[i] = x;
162         }
163     }
164
165     private Csv() {
166     }
167         
168     private void init(String JavaDoc fileName, String JavaDoc charset) {
169         this.fileName = fileName;
170         if(charset != null) {
171             this.charset = charset;
172         }
173     }
174     
175     private void initWrite() throws IOException {
176         if(writer == null) {
177             try {
178                 out = new FileOutputStream(fileName);
179                 BufferedOutputStream o = new BufferedOutputStream(out, bufferSize);
180                 writer = new PrintWriter(new OutputStreamWriter(o, charset));
181                 // TODO performance: what is faster? one, two, or both?
182
// writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, encoding), bufferSize));
183
} catch(IOException e) {
184                 close();
185                 throw e;
186             }
187         }
188         if(columnNames != null) {
189             writeHeader();
190         }
191     }
192     
193     private void writeHeader() {
194         for(int i=0; i<columnNames.length; i++) {
195             if(i>0) {
196                 writer.print(fieldSeparatorRead);
197             }
198             writer.print(columnNames[i]);
199         }
200         writer.println();
201     }
202     
203     private void writeRow(String JavaDoc[] values) {
204         for(int i=0; i<values.length; i++) {
205             if(i>0) {
206                 if(fieldSeparatorWrite != null) {
207                     writer.print(fieldSeparatorWrite);
208                 }
209             }
210             String JavaDoc s = values[i];
211             if(s != null) {
212                 if(escapeCharacter != 0) {
213                     if(fieldDelimiter != 0) {
214                         writer.print(fieldDelimiter);
215                     }
216                     writer.print(escape(s));
217                     if(fieldDelimiter != 0) {
218                         writer.print(fieldDelimiter);
219                     }
220                 } else {
221                     writer.print(s);
222                 }
223             }
224         }
225         if(rowSeparatorWrite != null) {
226             writer.print(rowSeparatorWrite);
227         }
228         writer.println();
229     }
230
231     private String JavaDoc escape(String JavaDoc data) {
232         if(data.indexOf(fieldDelimiter) < 0) {
233             if(escapeCharacter == fieldDelimiter || data.indexOf(escapeCharacter) < 0) {
234                 return data;
235             }
236         }
237         StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
238         for(int i=0; i<data.length(); i++) {
239             char ch = data.charAt(i);
240             if(ch == fieldDelimiter || ch == escapeCharacter) {
241                 buff.append(escapeCharacter);
242             }
243             buff.append(ch);
244         }
245         return buff.toString();
246     }
247     
248     private void initRead() throws IOException {
249         if(reader == null) {
250             try {
251                 in = new FileInputStream(fileName);
252                 BufferedInputStream i = new BufferedInputStream(in, bufferSize);
253                 reader = new InputStreamReader(i, charset);
254                 // TODO what is faster, 1, 2, 1+2
255
//reader = new BufferedReader(new InputStreamReader(in, encoding), bufferSize);
256
} catch(IOException e) {
257                 close();
258                 throw e;
259             }
260         }
261         if(columnNames == null) {
262             readHeader();
263         }
264     }
265     
266     private void readHeader() throws IOException {
267         ArrayList JavaDoc list = new ArrayList JavaDoc();
268         while(true) {
269             String JavaDoc v = readValue();
270             if(v==null) {
271                 if(endOfLine) {
272                     if(endOfFile || list.size()>0) {
273                         break;
274                     }
275                 } else {
276                     list.add("COLUMN" + list.size());
277                 }
278             } else {
279                 list.add(v);
280             }
281         }
282         columnNames = new String JavaDoc[list.size()];
283         list.toArray(columnNames);
284     }
285     
286     private void pushBack(int ch) {
287         back = ch;
288     }
289     
290     private int readChar() throws IOException {
291         int ch = back;
292         if(ch != -1) {
293             back = -1;
294             return ch;
295         } else if(endOfFile) {
296             return -1;
297         }
298         ch = reader.read();
299         if(ch < 0) {
300             endOfFile = true;
301             close();
302         }
303         return ch;
304     }
305     
306     private String JavaDoc readValue() throws IOException {
307         endOfLine = false;
308         String JavaDoc value = null;
309         while(true) {
310             int ch = readChar();
311             if(ch < 0 || ch == '\r' || ch == '\n') {
312                 endOfLine = true;
313                 break;
314             } else if(ch <= ' ') {
315                 // ignore spaces
316
continue;
317             } else if(ch == fieldSeparatorRead) {
318                 break;
319             } else if(ch == commentLineStart) {
320                 while(true) {
321                     ch = readChar();
322                     if(ch < 0 || ch == '\r' || ch == '\n') {
323                         break;
324                     }
325                 }
326                 endOfLine = true;
327                 break;
328             } else if(ch == fieldDelimiter) {
329                 StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
330                 boolean containsEscape = false;
331                 while(true) {
332                     ch = readChar();
333                     if(ch < 0) {
334                         return buff.toString();
335                     } else if(ch == fieldDelimiter) {
336                         ch = readChar();
337                         if(ch == fieldDelimiter) {
338                             buff.append((char)ch);
339                         } else {
340                             pushBack(ch);
341                             break;
342                         }
343                     } else if(ch == escapeCharacter) {
344                         buff.append((char)ch);
345                         ch = readChar();
346                         if(ch < 0) {
347                             break;
348                         }
349                         containsEscape = true;
350                         buff.append((char)ch);
351                     } else {
352                         buff.append((char)ch);
353                     }
354                 }
355                 value = buff.toString();
356                 if(containsEscape) {
357                     value = unEscape(value);
358                 }
359                 while(true) {
360                     ch = readChar();
361                     if(ch < 0) {
362                         break;
363                     } else if(ch == ' ' || ch == '\t') {
364                         // ignore
365
} else if(ch == fieldSeparatorRead) {
366                         break;
367                     } else if(ch == '\r' || ch == '\n') {
368                         pushBack(ch);
369                         endOfLine = true;
370                         break;
371                     } else {
372                         pushBack(ch);
373                         break;
374                     }
375                 }
376                 break;
377             } else {
378                 StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
379                 buff.append((char)ch);
380                 while(true) {
381                     ch = readChar();
382                     if(ch == fieldSeparatorRead) {
383                         break;
384                     } else if(ch == '\r' || ch == '\n') {
385                         pushBack(ch);
386                         endOfLine = true;
387                         break;
388                     } else if(ch < 0) {
389                         break;
390                     }
391                     buff.append((char)ch);
392                 }
393                 value = buff.toString().trim();
394                 break;
395             }
396         }
397         return value;
398     }
399
400     private String JavaDoc unEscape(String JavaDoc s) {
401         StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
402         int start = 0;
403         while(true) {
404             int idx = s.indexOf(escapeCharacter, start);
405             if(idx < 0) {
406                 break;
407             }
408             buff.append(s.toCharArray(), start, idx);
409             start = idx + 1;
410         }
411         buff.append(s.substring(start));
412         return buff.toString();
413     }
414
415     /**
416      * INTERNAL
417      */

418     public Object JavaDoc[] readRow() throws SQLException {
419         if(reader == null) {
420             return null;
421         }
422         String JavaDoc[] row = new String JavaDoc[columnNames.length];
423         try {
424             for(int i=0;; i++) {
425                 String JavaDoc v = readValue();
426                 if(v==null) {
427                     if(endOfFile && i==0) {
428                         return null;
429                     }
430                     if(endOfLine) {
431                         if(i==0) {
432                             // empty line
433
i--;
434                             continue;
435                         }
436                         break;
437                     }
438                 }
439                 if(i < row.length) {
440                     row[i] = v;
441                 }
442             }
443         } catch(IOException e) {
444             throw convertException("IOException reading from " + fileName, e);
445         }
446         return row;
447     }
448     
449     private SQLException convertException(String JavaDoc message, Exception JavaDoc e) {
450         SQLException s = new SQLException(message, "CSV");
451 //#ifdef JDK14
452
s.initCause(e);
453 //#endif
454
return s;
455     }
456
457     /**
458      * INTERNAL
459      */

460     public void close() {
461         IOUtils.closeSilently(reader);
462         reader = null;
463         IOUtils.closeSilently(in);
464         in = null;
465         IOUtils.closeSilently(writer);
466         writer = null;
467         IOUtils.closeSilently(out);
468         out = null;
469     }
470
471     /**
472      * Override the field separator for writing. The default is ",".
473      * @param fieldSeparatorWrite
474      */

475     public void setFieldSeparatorWrite(String JavaDoc fieldSeparatorWrite) {
476         this.fieldSeparatorWrite = fieldSeparatorWrite;
477     }
478
479     /**
480      * Override the end-of-row marker for writing. The default is null.
481      * @param fieldSeparatorWrite
482      */

483     public void setRowSeparatorWrite(String JavaDoc rowSeparatorWrite) {
484         this.rowSeparatorWrite = rowSeparatorWrite;
485     }
486
487     /**
488      * Reset the position (before the first row).
489      * This is not supported at this time, and this methods throws a SQLException
490      */

491     public void reset() throws SQLException {
492         throw new SQLException("Method is notsupported", "CSV");
493     }
494
495 }
496
Popular Tags