KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > relique > jdbc > csv > CsvReader


1 /**
2     Copyright (C) 2002-2003 Together
3
4     This library is free software; you can redistribute it and/or
5     modify it under the terms of the GNU Lesser General Public
6     License as published by the Free Software Foundation; either
7     version 2.1 of the License, or (at your option) any later version.
8
9     This library is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12     Lesser General Public License for more details.
13
14     You should have received a copy of the GNU Lesser General Public
15     License along with this library; if not, write to the Free Software
16     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18 */

19
20 package org.relique.jdbc.csv;
21
22 import java.io.BufferedReader JavaDoc;
23 import java.io.File JavaDoc;
24 import java.io.FileInputStream JavaDoc;
25 import java.io.IOException JavaDoc;
26 import java.io.InputStreamReader JavaDoc;
27 import java.sql.SQLException JavaDoc;
28 import java.util.ArrayList JavaDoc;
29 import java.util.HashMap JavaDoc;
30 import java.util.Map JavaDoc;
31 import java.util.Vector JavaDoc;
32
33
34 /**
35  * This class is a helper class that handles the reading and parsing of data
36  * from a .csv file.
37  *
38  * @author Zoran Milakovic
39  */

40
41 public class CsvReader
42 {
43   private BufferedReader JavaDoc bufReader;
44   private CsvRandomAccessFile randomReader;
45   private String JavaDoc[] columnNames;
46   private Map JavaDoc columnTypes = new KeyInsensitiveHashMap();
47   private String JavaDoc[] columns;
48   private java.lang.String JavaDoc buf = null;
49   private char separator = CsvDriver.DEFAULT_SEPARATOR;
50   private long maxFileSize = CsvDriver.DEFAULT_FILE_MAXSIZE;
51   private String JavaDoc extension = CsvDriver.DEFAULT_EXTENSION;
52   private boolean suppressHeaders = false;
53   private String JavaDoc lineBreakEscape = CsvDriver.DEFAULT_LINE_BREAK_ESCAPE;
54   private String JavaDoc doubleQuoteEscape = CsvDriver.DEFAULT_DOUBLE_QUOTE_ESCAPE;
55   private String JavaDoc carriageReturnEscape = CsvDriver.DEFAULT_CARRIAGE_RETURN_ESCAPE;
56   private String JavaDoc tableName;
57   private String JavaDoc fileName;
58   private String JavaDoc charset = null;
59
60
61
62   /**
63    *
64    * @param fileName
65    * @param separator
66    * @param suppressHeaders
67    * @param charset
68    * @param extension
69    * @throws java.lang.Exception
70    */

71   public CsvReader(
72       String JavaDoc fileName,
73       char separator,
74       boolean suppressHeaders,
75       String JavaDoc charset,
76       String JavaDoc extension,
77       String JavaDoc lineBreakEscape,
78       String JavaDoc carriageReturnEscape
79       )
80        throws java.lang.Exception JavaDoc
81   {
82     this.separator = separator;
83     this.suppressHeaders = suppressHeaders;
84     this.fileName = fileName;
85     this.charset = charset;
86     this.lineBreakEscape = lineBreakEscape;
87     this.carriageReturnEscape = carriageReturnEscape;
88     if( extension != null )
89       this.extension = extension;
90
91     if (charset != null) {
92         if(Utils.isUTF16(charset))
93             randomReader = new CsvRandomAccessFile(fileName,charset);
94         else
95             bufReader = new BufferedReader JavaDoc(new InputStreamReader JavaDoc(new FileInputStream JavaDoc(fileName),charset));
96     } else {
97         bufReader = new BufferedReader JavaDoc(new InputStreamReader JavaDoc(new FileInputStream JavaDoc(fileName)));
98     }
99     if (this.suppressHeaders)
100     {
101       // No column names available.
102
// Read first data line and determine number of colums.
103
buf = this.readLine();
104       String JavaDoc[] data = parseCsvLineAsHeader(buf);
105       columnNames = new String JavaDoc[data.length];
106       for (int i = 0; i < data.length; i++)
107       {
108         columnNames[i] = "COLUMN" + String.valueOf(i+1);
109       }
110       data = null;
111     }
112     else
113     {
114       String JavaDoc headerLine = this.readLine();
115       columnNames = parseCsvLineAsHeader(headerLine);
116     }
117   }
118
119
120   /**
121    * Gets the columnNames attribute of the CsvReader object
122    *
123    * @return The columnNames value
124    */

125   public String JavaDoc[] getColumnNames()
126   {
127     return columnNames;
128   }
129
130   /**
131    *
132    * @return array with column types
133    */

134   public Map JavaDoc getColumnTypes()
135   {
136     return columnTypes;
137   }
138
139
140
141   public String JavaDoc getTableName() {
142       if(tableName != null)
143           return tableName;
144
145       int lastSlash = 0;
146       for(int i = fileName.length()-1; i >= 0; i--)
147           if(fileName.charAt(i) == '/' || fileName.charAt(i) == '\\') {
148             lastSlash = i;
149             break;
150           }
151       tableName = fileName.substring(lastSlash+1, fileName.length() - 4);
152       return tableName;
153   }
154
155   /**
156    * Get the value of the column at the specified index.
157    *
158    * @param columnIndex Description of Parameter
159    * @return The column value
160    * @since
161    */

162
163   public String JavaDoc getColumn(int columnIndex) throws SQLException JavaDoc
164   {
165       if (columnIndex >= columns.length)
166       {
167           return null;
168       }
169       return formatString( columns[columnIndex] );
170   }
171
172   /**
173    * Get value from column at specified name.
174    * If the column name is not found, throw an error.
175    *
176    * @param columnName Description of Parameter
177    * @return The column value
178    * @exception SQLException Description of Exception
179    * @since
180    */

181
182   public String JavaDoc getColumn(String JavaDoc columnName) throws SQLException JavaDoc
183   {
184     for (int loop = 0; loop < columnNames.length; loop++)
185     {
186       if (columnName.equalsIgnoreCase(columnNames[loop])
187           || columnName.equalsIgnoreCase(getTableName() + "."
188                                          + columnNames[loop]))
189       {
190         return getColumn(loop);
191       }
192     }
193     throw new SQLException JavaDoc("Column '" + columnName + "' not found.");
194   }
195
196
197   /**
198    *Description of the Method
199    *
200    * @return Description of the Returned Value
201    * @exception SQLException Description of Exception
202    * @since
203    */

204   public boolean next() throws SQLException JavaDoc {
205     columns = new String JavaDoc[columnNames.length];
206     String JavaDoc dataLine = null;
207     try {
208         if (suppressHeaders && (buf != null)) {
209 // The buffer is not empty yet, so use this first.
210
dataLine = buf;
211           buf = null;
212         } else {
213 // read new line of data from input.
214
dataLine = this.readLine();
215         }
216         if (dataLine == null) {
217           String JavaDoc nextFileName = getNextFileName();
218           if (new File JavaDoc(nextFileName).exists()) {
219             this.fileName = nextFileName;
220             if (charset != null) {
221                 if(Utils.isUTF16(charset))
222                     randomReader = new CsvRandomAccessFile(fileName,charset);
223                 else
224                     bufReader = new BufferedReader JavaDoc(new InputStreamReader JavaDoc(new FileInputStream JavaDoc(fileName),charset));
225             } else {
226                 bufReader = new BufferedReader JavaDoc(new InputStreamReader JavaDoc(new FileInputStream JavaDoc(fileName)));
227             }
228 //skip header
229
dataLine = this.readLine();
230             dataLine = this.readLine();
231           }
232           else {
233             this.closeInputs();
234             return false;
235           }
236         }
237
238     } catch (IOException JavaDoc e) {
239         throw new SQLException JavaDoc(e.toString());
240     }
241     columns = parseCsvLine(dataLine);
242     return true;
243   }
244
245
246   private String JavaDoc getNextFileName() {
247     String JavaDoc currentFileName = this.fileName;
248     String JavaDoc newName = "";
249     String JavaDoc number = "";
250 //name without extension
251
String JavaDoc currentFileExtension = currentFileName.substring(currentFileName.lastIndexOf("."), currentFileName.length());
252     currentFileName = currentFileName.substring(0, currentFileName.lastIndexOf("."));
253     if( currentFileExtension.endsWith(CsvDriver.FILE_NAME_EXT) ) {
254        number += currentFileName.substring(currentFileName.length()-3, currentFileName.length());
255        long num = Long.valueOf(number).longValue()+1;
256        if( num >= 100 && num < 1000 )
257          number = String.valueOf( num );
258        else if ( num >= 10 && num < 100 )
259          number = "0"+String.valueOf( num );
260        else if ( num > 1 && num < 10 )
261          number = "00"+String.valueOf( num );
262        currentFileName = currentFileName.substring(0, currentFileName.length()-3);
263        newName = currentFileName + number + currentFileExtension;
264     } else {
265       newName = currentFileName.toUpperCase() + "001" + this.extension + CsvDriver.FILE_NAME_EXT;
266     }
267     return newName;
268   }
269
270
271   /**
272    *Description of the Method
273    *
274    * @since
275    */

276   public void close()
277   {
278     try
279     {
280       this.closeInputs();
281       buf = null;
282     }
283     catch (Exception JavaDoc e)
284     {
285     }
286   }
287
288
289   /**
290    *
291    * Parse csv line with columnTypes.
292    *
293    * @param line
294    * @return array with values or column names.
295    * @throws SQLException
296    */

297   protected String JavaDoc[] parseCsvLine(String JavaDoc line) throws SQLException JavaDoc
298   {
299     ArrayList JavaDoc values = new ArrayList JavaDoc();
300     boolean inQuotedString = false;
301     String JavaDoc value = "";
302     String JavaDoc orgLine = line;
303     int currentPos = 0;
304     int fullLine = 0;
305     int currentColumn = 0;
306     int indexOfBinaryObject = 0;
307     char currentChar;
308       line += separator;
309       long lineLength = line.length();
310       while (fullLine == 0) {
311         currentPos = 0;
312         while (currentPos < lineLength) {
313
314 //handle BINARY columns
315
if( !(this.columnTypes.size() <= currentColumn ) ) {
316           if (this.columnTypes.get(columnNames[currentColumn]).equals(CsvDriver.BINARY_TYPE)) {
317             String JavaDoc binaryValue = "";
318             currentChar = line.charAt(currentPos);
319             if (currentChar == ',') {
320               values.add(binaryValue); //binary value is null;
321
currentPos ++;
322             }
323             else if (currentChar == '"') {
324               if (line.charAt(currentPos + 1) == '"') {
325                 values.add(binaryValue); //binary value is null
326
currentPos = currentPos + 3;
327               }
328               else {
329                 // take all until next separator, and that is value
330
// do not insert BinaryObject+index into line, just set right currentPos
331
// and insert value into vector
332
// binary value is always beteween quotes (")
333
binaryValue = line.substring(currentPos);
334                 binaryValue = binaryValue.substring(1,
335                                                     binaryValue.indexOf(separator) -
336                                                     1);
337                 values.add(binaryValue);
338                 currentPos += binaryValue.length() + 3;
339               }
340             }
341             //set currentColumn++
342
currentColumn++;
343             continue;
344             }
345           } else {
346             throw new SQLException JavaDoc("Invalid csv format : file = "+new File JavaDoc(fileName).getAbsolutePath()+", line = "+line);
347           }
348
349
350 //parse one by one character
351
currentChar = line.charAt(currentPos);
352           if (value.length() == 0 && currentChar == '"' && !inQuotedString) {
353 //enter here if we are at start of column value
354
currentPos++;
355             inQuotedString = true;
356             continue;
357           }
358
359           if (currentChar == '"') {
360 //get next character
361
char nextChar = line.charAt(currentPos + 1);
362 //if we have "", consider it as ", and add it to value
363
if (nextChar == '"') {
364               value += currentChar;
365               currentPos++;
366             }
367             else {
368 //enter here if we are at end of column value
369
if (!inQuotedString) {
370                 throw new SQLException JavaDoc("Unexpected '\"' in position " +
371                                        currentPos + ". Line=" + orgLine);
372               }
373               if (inQuotedString && nextChar != separator) {
374                 throw new SQLException JavaDoc("Expecting " + separator +
375                                        " in position " + (currentPos + 1) +
376                                        ". Line=" + orgLine);
377               }
378
379 //set currentPos to comma after value
380
currentPos++;
381 //if value is empty string between double quotes consider it as empty string
382
//else if value is empty string between commas consider it as null value
383
values.add(value);
384               currentColumn++;
385               value = "";
386               inQuotedString = false;
387             }
388           }
389
390           else {
391 //when we are at end of column value, and value is not inside of double quotes
392
if (currentChar == separator) {
393 //when have separator in data
394
if (inQuotedString) {
395                 value += currentChar;
396               }
397               else {
398 //if value is empty string between double quotes consider it as empty string
399
//else if value is empty string between commas consider it as null value
400
if( value.equals("") )
401                   value = null;
402                 values.add(value);
403                 currentColumn++;
404                 value = "";
405               }
406             }
407             else {
408               value += currentChar;
409             }
410           }
411
412           currentPos++;
413         } //end while
414

415         if (inQuotedString) {
416           // Remove extra , added at start
417
value = value.substring(0, value.length() - 1);
418           try {
419             line = this.readLine();
420           }
421           catch (IOException JavaDoc e) {
422             throw new SQLException JavaDoc(e.toString());
423           }
424         }
425         else {
426           fullLine = 1;
427         }
428
429       }// end while( fullLine == 0 )
430
String JavaDoc[] retVal = new String JavaDoc[values.size()];
431     values.toArray(retVal);
432
433     return retVal;
434   }
435
436
437    /**
438    *
439    * Parse csv line, whithout columnTypes.
440    *
441    * @param line
442    * @return array with values or column names.
443    * @throws SQLException
444    */

445   protected String JavaDoc[] parseCsvLineAsHeader(String JavaDoc line) throws SQLException JavaDoc
446   {
447     Vector JavaDoc values = new Vector JavaDoc();
448 // ArrayList columnTypesList = new ArrayList();
449
boolean inQuotedString = false;
450     String JavaDoc value = "";
451     String JavaDoc orgLine = line;
452     int currentPos = 0;
453     int fullLine = 0;
454
455     while (fullLine == 0) {
456       currentPos = 0;
457       line += separator;
458       while (currentPos < line.length()) {
459         char currentChar = line.charAt(currentPos);
460         if (value.length() == 0 && currentChar == '"' && !inQuotedString) {
461           currentPos++;
462           inQuotedString = true;
463           continue;
464         }
465         if (currentChar == '"') {
466           char nextChar = line.charAt(currentPos + 1);
467           if (nextChar == '"') {
468             value += currentChar;
469             currentPos++;
470           }
471           else {
472             if (!inQuotedString) {
473               throw new SQLException JavaDoc("Unexpected '\"' in position " +
474                                      currentPos + ". Line=" + orgLine);
475             }
476             if (inQuotedString && nextChar != separator) {
477               throw new SQLException JavaDoc("Expecting " + separator + " in position " +
478                                      (currentPos + 1) + ". Line=" + orgLine);
479             }
480             if (value.endsWith("-"+CsvDriver.BINARY_TYPE)) {
481               value = value.substring(0,value.indexOf("-"+CsvDriver.BINARY_TYPE));
482               columnTypes.put(value, CsvDriver.BINARY_TYPE);
483             }
484             else
485                 columnTypes.put(value, CsvDriver.VARCHAR_TYPE);
486             values.add(value);
487             value = "";
488             inQuotedString = false;
489             currentPos++;
490           }
491         }
492         else {
493           if (currentChar == separator) {
494             if (inQuotedString) {
495               value += currentChar;
496             }
497             else {
498               if (value.endsWith("-"+CsvDriver.BINARY_TYPE)) {
499                 value = value.substring(0,value.indexOf("-"+CsvDriver.BINARY_TYPE));
500                 columnTypes.put(value, CsvDriver.BINARY_TYPE);
501               }
502               else
503                   columnTypes.put(value, CsvDriver.VARCHAR_TYPE);
504               values.add(value);
505               value = "";
506             }
507           }
508           else {
509             value += currentChar;
510           }
511         }
512         currentPos++;
513       }
514       if (inQuotedString) {
515         value = value.substring(0, value.length() - 1);
516         try {
517           line = this.readLine();
518         }
519         catch (IOException JavaDoc e) {
520           throw new SQLException JavaDoc(e.toString());
521         }
522       }
523       else {
524         fullLine = 1;
525       }
526     }
527     String JavaDoc[] retVal = new String JavaDoc[values.size()];
528     values.copyInto(retVal);
529
530    
531     return retVal;
532
533   }
534
535   private String JavaDoc formatString(String JavaDoc str) throws SQLException JavaDoc {
536      String JavaDoc retValue = str;
537      try {
538        //replace spec. characters
539
retValue = Utils.replaceAll(retValue,this.lineBreakEscape, "\n");
540        retValue = Utils.replaceAll(retValue,this.carriageReturnEscape, "\r");
541      }catch(Exception JavaDoc e) {
542        throw new SQLException JavaDoc("Error while reformat string ! : "+str);
543      }
544      return retValue;
545    }
546   
547   private String JavaDoc readLine() throws IOException JavaDoc {
548       String JavaDoc retVal = "";
549       if(Utils.isUTF16(this.charset)) {
550           retVal = this.randomReader.readCsvLine();
551       } else {
552           retVal = bufReader.readLine();
553       }
554       return retVal;
555   }
556   
557   private void closeInputs() throws IOException JavaDoc {
558       if(!Utils.isUTF16(this.charset)) {
559           if(this.bufReader != null)
560               bufReader.close();
561       } else {
562           if(this.randomReader != null)
563               randomReader.close();
564       }
565   }
566
567
568 }
569
570
Popular Tags