1 32 33 package com.knowgate.misc; 34 35 import java.io.File ; 36 import java.io.FileReader ; 37 import java.io.FileNotFoundException ; 38 import java.io.IOException ; 39 import java.io.UnsupportedEncodingException ; 40 import java.io.OutputStream ; 41 import java.io.FileOutputStream ; 42 import java.io.BufferedOutputStream ; 43 import java.io.Reader ; 44 import java.io.InputStreamReader ; 45 import java.io.FileInputStream ; 46 47 import java.sql.Connection ; 48 import java.sql.SQLException ; 49 import java.sql.Date ; 50 import java.sql.CallableStatement ; 51 52 import com.knowgate.debug.DebugFile; 53 import com.knowgate.jdc.JDCConnection; 54 import com.knowgate.misc.Gadgets; 55 import com.knowgate.dataobjs.DB; 56 import com.knowgate.dataobjs.DBBind; 57 import com.knowgate.dataobjs.DBPersist; 58 59 65 public class CSVParser { 66 67 private char cBuffer[]; private int iBuffer; private String ColNames[]; private int RowPointers[]; private int ColPointers[][]; private int iCols; private int iRows; private int iErrLine; private char cDelimiter; 76 private boolean bQuoted; 77 private String sCharSet; 78 79 81 public CSVParser() { 82 iBuffer = 0; 83 sCharSet = null; 84 } 85 86 88 92 public CSVParser(String sCharSetName) { 93 iBuffer = 0; 94 sCharSet = sCharSetName; 95 } 96 97 99 public String charSet() { 100 return sCharSet; 101 } 102 103 105 public void charSet(String sCharSetName) { 106 sCharSet = sCharSetName; 107 } 108 109 111 116 public int getLineCount() { 117 return iRows; 118 } 119 120 122 127 public int getColumnCount() { 128 return iCols; 129 } 130 131 133 public int errorLine() { 134 return iErrLine; 135 } 136 137 139 public char getDelimiter() { 140 return cDelimiter; 141 } 142 143 145 158 159 public void parseData(char[] aCharData, String sFileDescriptor) 160 throws ArrayIndexOutOfBoundsException , RuntimeException , 161 NullPointerException ,IllegalArgumentException { 162 163 boolean bIgnore; 164 char cAt; 165 166 if (DebugFile.trace) { 167 DebugFile.writeln("Begin CSVParser.parseData(char[" + String.valueOf(aCharData.length) + "], \"" + sFileDescriptor + "\")"); 168 DebugFile.incIdent(); 169 } 170 171 bQuoted = false; 172 cDelimiter = (char)0; 173 bIgnore = false; 174 175 if (aCharData!=cBuffer) { 176 iBuffer = aCharData.length; 177 cBuffer = new char[iBuffer]; 178 System.arraycopy(aCharData, 0, cBuffer, 0, iBuffer); 179 } 180 181 iErrLine = 0; 182 183 if (DebugFile.trace) DebugFile.writeln("trimming leading whitespaces"); 184 185 for (int p=iBuffer-1; p>=0; p--) { 187 cAt = cBuffer[p]; 188 if (cAt==' ' || cAt=='\n' || cAt=='\r' || cAt=='\t') 189 iBuffer--; 190 else 191 break; 192 } 193 194 if (iBuffer==0) { 195 iRows = 0; 196 if (DebugFile.trace) { 197 DebugFile.decIdent(); 198 DebugFile.writeln("End CSVParser.parseData() : zero length array"); 199 } 200 return; 201 } 202 203 206 int iFileDescLen = sFileDescriptor.length(); 207 208 for (int p=0; p<iBuffer; p++) { 209 cAt = sFileDescriptor.charAt(p); 210 211 if (cAt!=' ' && cAt!='\t' && cAt!='\n' && cAt!='\r') { 212 bQuoted = (cAt == '"'); 213 break; 214 } 215 } 217 if (DebugFile.trace) { 218 if (bQuoted) DebugFile.writeln("asume quoted identifiers"); 219 } 220 221 223 for (int p=0; p<iFileDescLen && cDelimiter==(char)0; p++) { 224 225 cAt = sFileDescriptor.charAt(p); 226 227 if (cAt=='"') bIgnore = !bIgnore; 228 if (!bIgnore) { 229 switch (cAt) { 230 case ',': 231 cDelimiter = ','; 232 break; 233 case ';': 234 cDelimiter = ';'; 235 break; 236 case '|': 237 cDelimiter = '|'; 238 break; 239 case '`': 240 cDelimiter = '`'; 241 break; 242 case '\t': 243 cDelimiter = '\t'; 244 break; 245 } } } 249 if (DebugFile.trace) { 250 if (cDelimiter == (char)0) DebugFile.writeln("error: cannot assign a valid column delimiter"); 251 } 252 253 if (cDelimiter == (char)0) 254 throw new RuntimeException ("Cannot assign a valid column delimiter"); 255 256 ColNames = Gadgets.split(sFileDescriptor, new String (new char[]{cDelimiter})); 258 iCols = ColNames.length; 259 260 if (DebugFile.trace) DebugFile.writeln("descriptor has " + String.valueOf(iCols) + " columns"); 261 262 if (bQuoted) 263 for (int c=0; c<iCols; c++) 264 ColNames[c] = (ColNames[c].replace('"',' ')).trim(); 265 266 iRows = 1; 268 for (int p=0; p<iBuffer; p++) { 269 if (cBuffer[p]=='\n') iRows++; 270 } 272 if (DebugFile.trace) DebugFile.writeln("input data has " + String.valueOf(iRows) + " lines"); 273 274 RowPointers = new int[iRows]; 275 ColPointers = new int[iRows][iCols]; 276 277 int iRow = 0, iCol = 0; 278 279 if (DebugFile.trace) DebugFile.writeln("parsing line 0"); 280 281 RowPointers[iRow] = 0; 282 ColPointers[iRow][iCol] = 0; 283 284 bIgnore = false; 285 286 for (int p=0; p<iBuffer; p++) { 287 288 cAt = cBuffer[p]; 289 290 if (cAt=='"' && bQuoted) bIgnore = !bIgnore; 291 292 if (!bIgnore) { 293 if (cAt==cDelimiter) { 294 iCol++; 295 if (iCol>=iCols) { 296 iErrLine = iRow+1; 297 throw new ArrayIndexOutOfBoundsException ("Columns count mismatch for line " + String.valueOf(iErrLine) + " expected " + String.valueOf(iCols) + " but found more."); 298 } 299 else 300 ColPointers[iRow][iCol] = p+1; 301 } 302 else if (cAt=='\n') { 303 if (iCol!=iCols-1) { 304 iErrLine = iRow+1; 305 throw new ArrayIndexOutOfBoundsException ("Columns count mismatch for line " + String.valueOf(iErrLine) + " expected " + String.valueOf(iCols) + " and found only " + String.valueOf(iCol+1)); 306 } 307 iRow++; 308 iCol = 0; 309 310 if (DebugFile.trace) DebugFile.writeln("parsing line " + String.valueOf(iRow)); 311 312 RowPointers[iRow] = p+1; 313 ColPointers[iRow][iCol] = p+1; 314 } 315 } } 318 iErrLine = 0; 319 320 if (DebugFile.trace) { 321 DebugFile.decIdent(); 322 DebugFile.writeln("End CSVParser.parseData()"); 323 } 324 } 326 328 347 public void parseFile(File oFile, String sFileDescriptor) 348 throws ArrayIndexOutOfBoundsException ,IOException ,FileNotFoundException , 349 RuntimeException ,NullPointerException ,IllegalArgumentException , 350 UnsupportedEncodingException { 351 352 Reader oReader; 353 354 if (oFile==null) 355 throw new NullPointerException ("CSVParser.parseFile() File parameter may not be null"); 356 357 if (DebugFile.trace) { 358 DebugFile.writeln("Begin CSVParser.parseFile(\"" + oFile.getAbsolutePath() + "\",\"" + sFileDescriptor + "\")"); 359 DebugFile.incIdent(); 360 } 361 362 if (sFileDescriptor==null) { 363 if (DebugFile.trace) DebugFile.decIdent(); 364 throw new NullPointerException ("CSVParser.parseFile() File Descriptor parameter may not be null"); 365 } 366 367 if (sFileDescriptor.trim().length()==0) { 368 if (DebugFile.trace) DebugFile.decIdent(); 369 throw new IllegalArgumentException ("File Descriptor parameter may not be an empty string"); 370 } 371 372 iErrLine = 0; 373 374 iBuffer = new Long (oFile.length()).intValue(); 375 376 if (iBuffer==0) { 377 iRows = 0; 378 if (DebugFile.trace) { 379 DebugFile.decIdent(); 380 DebugFile.writeln("End CSVParser.parseFile() : zero length file"); 381 } 382 return; 383 } 384 385 cBuffer = new char[iBuffer]; 386 387 if (null==sCharSet) { 388 oReader = new FileReader (oFile); 389 } else { 390 oReader = new InputStreamReader (new FileInputStream (oFile), sCharSet); 391 } 392 oReader.read(cBuffer); 393 oReader.close(); 394 oReader = null; 395 396 parseData (cBuffer, sFileDescriptor); 397 398 if (DebugFile.trace) { 399 DebugFile.decIdent(); 400 DebugFile.writeln("End CSVParser.parseFile()"); 401 } 402 } 404 406 423 public void parseFile(String sFilePath, String sFileDescriptor) 424 throws ArrayIndexOutOfBoundsException ,IOException ,FileNotFoundException , 425 RuntimeException ,NullPointerException ,IllegalArgumentException , 426 UnsupportedEncodingException { 427 parseFile (new File (sFilePath), sFileDescriptor); 428 } 429 430 432 436 public int getColumnPosition(String sColumnName) { 437 438 if (DebugFile.trace) { 439 DebugFile.writeln("Begin CSVParser.getColumnPosition(" + sColumnName + ")"); 440 DebugFile.incIdent(); 441 } 442 443 int iPos = -1; 444 445 for (int c=0; c<iCols; c++) { 446 if (ColNames[c].equalsIgnoreCase(sColumnName)) { 447 iPos = c; 448 break; 449 } 450 } 452 if (DebugFile.trace) { 453 DebugFile.decIdent(); 454 DebugFile.writeln("End CSVParser.getColumnPosition() : " + String.valueOf(iPos)); 455 } 456 457 return iPos; 458 } 460 462 470 public String getLine(int iLine) throws IllegalStateException , UnsupportedEncodingException { 471 String sRetVal; 472 int iStart, iEnd; 473 474 if (DebugFile.trace) { 475 DebugFile.writeln("Begin CSVParser.getLine(" + String.valueOf(iLine) + ")"); 476 DebugFile.incIdent(); 477 } 478 479 if (0 == iBuffer) 480 throw new IllegalStateException ("Must call parseFile() on a valid non-empty delimited file before calling getField() method"); 481 482 if (iLine<0 || iLine>iRows-1) 483 484 sRetVal = null; 485 486 else { 487 488 iStart = ColPointers[iLine][0]; 489 iEnd = iBuffer; 490 491 for (int p=iStart; p<iBuffer; p++) 493 if (cBuffer[p]=='\n') { 494 iEnd = p; 495 break; 496 } 498 if (iStart==iEnd) 499 sRetVal = ""; 500 else { 501 if (iEnd-1>iStart) { 503 if (cBuffer[iEnd-1]=='\r') --iEnd; 504 if (iStart==iEnd) 505 sRetVal = ""; 506 else 507 sRetVal = new String (cBuffer, iStart, iEnd - iStart); 508 } 509 else { 510 if (cBuffer[iStart]=='\r') 511 sRetVal = ""; 512 else 513 sRetVal = new String (cBuffer, iStart, iEnd - iStart); 514 } 515 } 516 517 } 519 if (DebugFile.trace) { 520 DebugFile.decIdent(); 521 DebugFile.writeln("End CSVParser.getLine() : " + sRetVal); 522 } 523 524 return sRetVal; 525 } 527 529 542 public String getField(int iCol, int iRow) 543 throws IllegalStateException , ArrayIndexOutOfBoundsException , 544 StringIndexOutOfBoundsException , UnsupportedEncodingException { 545 int iStart; 546 int iEnd; 547 String sRetVal; 548 549 if (DebugFile.trace) { 550 DebugFile.writeln("Begin CSVParser.getField(" + String.valueOf(iCol) + "," + String.valueOf(iRow) + ")"); 551 if (iBuffer>0) DebugFile.incIdent(); 552 } 553 554 iErrLine = 0; 555 556 if (0 == iBuffer) 557 throw new IllegalStateException ("Must call parseFile() on a valid non-empty delimited file before calling getField() method"); 558 559 if (-1==iCol || -1==iRow) { 560 if (DebugFile.trace) { 561 DebugFile.decIdent(); 562 DebugFile.writeln("End CSVParser.getField() : null"); 563 } 564 return null; 565 } 566 567 iErrLine = iRow; 568 569 iStart = ColPointers[iRow][iCol]; 570 571 if (DebugFile.trace) DebugFile.writeln("iStart=" + String.valueOf(iStart)); 572 573 if (iCol<iCols-1) 574 iEnd = ColPointers[iRow][iCol+1]-1; 575 else if (iRow<iRows-1) 576 iEnd = ColPointers[iRow+1][0]-1; 577 else 578 iEnd = iBuffer; 579 580 if (DebugFile.trace) DebugFile.writeln("triming trailing spaces from " + String.valueOf(iEnd)); 581 582 if (iEnd>0 && iEnd<iBuffer) { 583 if (bQuoted) { 584 while (cBuffer[iEnd - 1] == '\r' || cBuffer[iEnd - 1] == ' ' || 585 cBuffer[iEnd - 1] == '\t') 586 if (--iEnd == 0) 587 break; 588 } 589 else { 590 if (cBuffer[iEnd-1]=='\r') iEnd--; 591 } 592 } 593 else if (iEnd<0) 594 iEnd = 0; 595 596 if (DebugFile.trace) DebugFile.writeln("iEnd=" + String.valueOf(iEnd)); 597 598 if (iStart==iEnd) 599 sRetVal = ""; 600 else if (bQuoted) 601 sRetVal = new String (cBuffer, iStart+1, iEnd-iStart-2); 602 else 603 sRetVal = new String (cBuffer, iStart, iEnd-iStart); 604 605 iErrLine = 0; 606 607 if (DebugFile.trace) { 608 DebugFile.decIdent(); 609 DebugFile.writeln("End CSVParser.getField() : " + sRetVal); 610 } 611 612 return sRetVal; 613 } 615 617 627 public String getField(String sCol, int iRow) 628 throws IllegalStateException , ArrayIndexOutOfBoundsException , 629 StringIndexOutOfBoundsException , UnsupportedEncodingException { 630 631 int iCol = getColumnPosition(sCol); 632 633 if (iCol==-1) 634 throw new ArrayIndexOutOfBoundsException ("Column " + sCol + " not found"); 635 636 return getField (iCol, iRow); 637 } 638 639 641 650 public int find (int iCol, String sVal) throws UnsupportedEncodingException { 651 int iFound = -1; 652 int r = 0; 653 while (r<iRows) { 654 if (getField(iCol,r).equals(sVal)) { 655 iFound = r; 656 break; 657 } 658 } return iFound; 660 } 662 664 673 public int findi (int iCol, String sVal) throws UnsupportedEncodingException { 674 int iFound = -1; 675 int r = 0; 676 while (r<iRows) { 677 if (getField(iCol,r).equalsIgnoreCase(sVal)) { 678 iFound = r; 679 break; 680 } 681 } return iFound; 683 } 685 687 693 public void writeToStream(OutputStream oStrm) throws IOException { 694 if (DebugFile.trace) { 695 DebugFile.writeln("Begin CSVParser.writeToStream([OutputStream])"); 696 DebugFile.incIdent(); 697 } 698 699 if (null!=sCharSet) 700 oStrm.write(new String (cBuffer).getBytes(sCharSet)); 701 else 702 oStrm.write(new String (cBuffer).getBytes()); 703 704 if (DebugFile.trace) { 705 DebugFile.decIdent(); 706 DebugFile.writeln("End CSVParser.writeToStream()"); 707 } 708 } 710 712 719 public void writeToFile(String sFilePath) throws IOException , SecurityException { 720 if (DebugFile.trace) { 721 DebugFile.writeln("Begin CSVParser.writeToFile("+sFilePath+")"); 722 DebugFile.incIdent(); 723 } 724 FileOutputStream oOutStrm = new FileOutputStream (sFilePath); 725 BufferedOutputStream oOutBuff = new BufferedOutputStream (oOutStrm); 726 727 writeToStream(oOutBuff); 728 729 oOutBuff.close(); 730 oOutStrm.close(); 731 732 if (DebugFile.trace) { 733 DebugFile.decIdent(); 734 DebugFile.writeln("End CSVParser.writeToFile()"); 735 } 736 } 738 } 739 | Popular Tags |