KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > derby > impl > load > ImportReadData


1 /*
2
3    Derby - Class org.apache.derby.impl.load.ImportReadData
4
5    Licensed to the Apache Software Foundation (ASF) under one or more
6    contributor license agreements. See the NOTICE file distributed with
7    this work for additional information regarding copyright ownership.
8    The ASF licenses this file to You under the Apache License, Version 2.0
9    (the "License"); you may not use this file except in compliance with
10    the License. You may obtain a copy of the License at
11
12       http://www.apache.org/licenses/LICENSE-2.0
13
14    Unless required by applicable law or agreed to in writing, software
15    distributed under the License is distributed on an "AS IS" BASIS,
16    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17    See the License for the specific language governing permissions and
18    limitations under the License.
19
20  */

21
22 package org.apache.derby.impl.load;
23
24 import java.io.BufferedReader JavaDoc;
25 import java.io.FileNotFoundException JavaDoc;
26 import java.io.InputStream JavaDoc;
27 import java.io.InputStreamReader JavaDoc;
28 import java.io.FileInputStream JavaDoc;
29 import java.io.IOException JavaDoc;
30 import java.net.MalformedURLException JavaDoc;
31 import java.net.URL JavaDoc;
32 import org.apache.derby.iapi.services.sanity.SanityManager;
33
34 final class ImportReadData implements java.security.PrivilegedExceptionAction JavaDoc {
35   //Read data from this file
36
private String JavaDoc inputFileName;
37
38   private int[] columnWidths;
39   private int rowWidth;
40   private char[] tempString;
41   private int numberOfCharsReadSoFar;
42
43   //temporary variables
44
private BufferedReader JavaDoc bufferedReader;
45
46   //temporary variable which holds each token as we are building it.
47
private static final int START_SIZE = 10240;
48   private char[] currentToken = new char[START_SIZE];
49   private int currentTokenMaxSize = START_SIZE;
50
51   //This tells whether to look for a matching stop pattern
52
boolean foundStartDelimiter;
53   int totalCharsSoFar;
54   //following is used to ignore whitespaces in the front
55
int positionOfNonWhiteSpaceCharInFront;
56   //following is used to ignore whitespaces in the back
57
int positionOfNonWhiteSpaceCharInBack;
58   int lineNumber;
59   int fieldStartDelimiterIndex;
60   int fieldStopDelimiterIndex;
61   int stopDelimiterPosition;
62   boolean foundStartAndStopDelimiters;
63
64   //in the constructor we open the stream only if it's delimited file to find out
65
//number of columns. In case of fixed, we know that already from the control file.
66
//then we close the stream. Now the stream is reopened when the first record is
67
//read from the file(ie when the first time next is issued. This was done for the
68
//bug 1032 filed by Dan
69
boolean streamOpenForReading;
70
71   static final int DEFAULT_FORMAT_CODE = 0;
72   static final int ASCII_FIXED_FORMAT_CODE = 1;
73   private int formatCode = DEFAULT_FORMAT_CODE;
74   private boolean hasColumnDefinition;
75   private char recordSeparatorChar0;
76   private char fieldSeparatorChar0;
77   private boolean recordSepStartNotWhite = true;
78   private boolean fieldSepStartNotWhite = true;
79
80   //get properties infr from following
81
protected ControlInfo controlFileReader;
82
83   //Read first row to find out how many columns make up a row and put it in
84
//the following variable
85
protected int numberOfColumns;
86  
87   // the types of the columns that we are about to read
88
protected String JavaDoc [] columnTypes;
89   
90   //Read control file properties and write it in here
91
protected char[] fieldSeparator;
92   protected int fieldSeparatorLength;
93   protected char[] recordSeparator;
94   protected int recordSeparatorLength;
95   protected String JavaDoc nullString;
96   protected String JavaDoc columnDefinition;
97   protected String JavaDoc format;
98   protected String JavaDoc dataCodeset;
99   protected char[] fieldStartDelimiter;
100   protected int fieldStartDelimiterLength;
101   protected char[] fieldStopDelimiter;
102   protected int fieldStopDelimiterLength;
103   protected boolean hasDelimiterAtEnd;
104   
105
106   //load the control file properties info locally, since we need to refer to them
107
//all the time while looking for tokens
108
private void loadPropertiesInfo() throws Exception JavaDoc {
109     fieldSeparator = controlFileReader.getFieldSeparator().toCharArray();
110     fieldSeparatorLength = fieldSeparator.length;
111     recordSeparator = controlFileReader.getRecordSeparator().toCharArray();
112     recordSeparatorLength = recordSeparator.length;
113     nullString = controlFileReader.getNullString();
114     columnDefinition = controlFileReader.getColumnDefinition();
115     format = controlFileReader.getFormat();
116     dataCodeset = controlFileReader.getDataCodeset();
117     fieldStartDelimiter = controlFileReader.getFieldStartDelimiter().toCharArray();
118     fieldStartDelimiterLength = fieldStartDelimiter.length;
119     fieldStopDelimiter = controlFileReader.getFieldEndDelimiter().toCharArray();
120     fieldStopDelimiterLength = fieldStopDelimiter.length;
121     hasDelimiterAtEnd = controlFileReader.getHasDelimiterAtEnd();
122
123     // when record or field separators start with typical white space,
124
// we can't ignore it around values in the import file. So set up
125
// a boolean so we don't keep re-testing for it.
126
if (recordSeparatorLength >0) {
127       recordSeparatorChar0=recordSeparator[0];
128       recordSepStartNotWhite = (Character.isWhitespace(recordSeparatorChar0)==false);
129     }
130     if (fieldSeparatorLength >0) {
131       fieldSeparatorChar0=fieldSeparator[0];
132       fieldSepStartNotWhite = (Character.isWhitespace(fieldSeparatorChar0)==false);
133     }
134   }
135   //inputFileName: File to read data from
136
//controlFileReader: File used to interpret data in the inputFileName
137
ImportReadData(String JavaDoc inputFileName, ControlInfo controlFileReader)
138   throws Exception JavaDoc {
139     this.inputFileName = inputFileName;
140     this.controlFileReader = controlFileReader;
141
142     //load the control file properties info locally, since we need to refer to
143
//them all the time while looking for tokens
144
loadPropertiesInfo();
145     //read the first row to find how many columns make a row and then save that
146
//column information for further use
147
loadMetaData();
148   }
149
150   //just a getter returning number of columns for a row in the data file
151
int getNumberOfColumns() {
152     return numberOfColumns;
153   }
154   /**if columndefinition is true, ignore first row. The way to do that is to just
155   * look for the record separator
156     * @exception Exception if there is an error
157     */

158   protected void ignoreFirstRow() throws Exception JavaDoc {
159     readNextToken(recordSeparator, 0, recordSeparatorLength, true);
160   }
161
162   /** load the column types from the meta data line to be analyzed
163     * later in the constructor of the ImportResultSetMetaData.
164     */

165   protected void loadColumnTypes() throws Exception JavaDoc {
166     int idx;
167     String JavaDoc [] metaDataArray;
168
169     // start by counting the number of columns that we have at the
170
// meta data line
171
findNumberOfColumnsInARow();
172
173     // reopen the file to the start of the file to read the actual column types data
174
closeStream();
175     openFile();
176
177     // make room for the meta data
178
metaDataArray=new String JavaDoc [numberOfColumns];
179
180     // read the meta data line line - meta data is always in a delimited format
181
readNextDelimitedRow(metaDataArray);
182
183     // allocate space for the columnTypes meta data
184
// since the meta data line contains a combination of column name and
185
// column type for every column we actually have only half the number of
186
// columns that was counted.
187
columnTypes=new String JavaDoc[numberOfColumns/2];
188
189     for(idx=0 ; idx<numberOfColumns ; idx=idx+2) {
190       columnTypes[idx/2]=metaDataArray[idx+1];
191     }
192
193
194     // reopen to the start of the file so the rest of the program will
195
// work as expected
196
closeStream();
197     openFile();
198
199     // init the numberOfColumns variable since it is
200
// being accumulate by the findNumberOfColumnsInARow method
201
numberOfColumns=0;
202   }
203
204   private void openFile() throws Exception JavaDoc {
205     try {
206         java.security.AccessController.doPrivileged(this);
207     } catch (java.security.PrivilegedActionException JavaDoc pae) {
208         throw pae.getException();
209     }
210   }
211
212   public final Object JavaDoc run() throws Exception JavaDoc {
213       realOpenFile();
214       return null;
215   }
216
217   //open the input data file for reading
218
private void realOpenFile() throws Exception JavaDoc {
219       InputStream JavaDoc inputStream;
220     try {
221       try {
222         URL JavaDoc url = new URL JavaDoc(inputFileName);
223         if (url.getProtocol().equals("file")) { //this means it's a file url
224
inputFileName = url.getFile(); //seems like you can't do openstream on file
225
throw new MalformedURLException JavaDoc(); //so, get the filename from url and do it ususal way
226
}
227         inputStream = url.openStream();
228       } catch (MalformedURLException JavaDoc ex) {
229         inputStream = new FileInputStream JavaDoc(inputFileName);
230         
231       }
232     } catch (FileNotFoundException JavaDoc ex) {
233       throw LoadError.dataFileNotFound(inputFileName);
234     } catch (SecurityException JavaDoc se) {
235         java.sql.SQLException JavaDoc sqle = LoadError.dataFileNotFound(inputFileName);
236
237         sqle.setNextException(new java.sql.SQLException JavaDoc("XJ001", se.getMessage(), 0));
238
239         throw sqle;
240     }
241     java.io.Reader JavaDoc rd = dataCodeset == null ?
242             new InputStreamReader JavaDoc(inputStream) : new InputStreamReader JavaDoc(inputStream, dataCodeset);
243     bufferedReader = new BufferedReader JavaDoc(rd, 32*1024);
244     streamOpenForReading = true;
245   }
246
247   //read the first data row to find how many columns make a row and then save that
248
//column information for future use
249
private void loadMetaData() throws Exception JavaDoc {
250     //open the input data file for reading the metadata information
251
openFile();
252     // if column definition is true, ignore the first row since that's not
253
// really the data do uppercase because the ui shows the values as True
254
// and False
255
if (columnDefinition.toUpperCase(java.util.Locale.ENGLISH).equals(ControlInfo.INTERNAL_TRUE.toUpperCase(java.util.Locale.ENGLISH))) {
256       hasColumnDefinition = true;
257       ignoreFirstRow();
258     }
259
260     if (formatCode == DEFAULT_FORMAT_CODE) {
261       findNumberOfColumnsInARow();
262     }
263     closeStream();
264   }
265
266   /**close the input data file
267     * @exception Exception if there is an error
268     */

269   void closeStream() throws Exception JavaDoc {
270     if (streamOpenForReading) {
271        bufferedReader.close();
272        streamOpenForReading = false;
273     }
274   }
275
276   //actually looks at the data file to find how many columns make up a row
277
int findNumberOfColumnsInARow() throws Exception JavaDoc {
278     // init the number of columns to 1 - no such thing as a table
279
// without columns
280
numberOfColumns=1;
281     while (! readTokensUntilEndOfRecord() ) {
282       numberOfColumns++;
283     }
284     //--numberOfColumns;
285
//what shall we do if there is delimeter after the last column?
286
//reducing the number of columns seems to work fine.
287

288     //this is necessary to be able to read delimited files that have a delimeter
289
//at the end of a row.
290
if (hasDelimiterAtEnd){
291         --numberOfColumns;
292     }
293
294     // a special check - if the imported file is empty then
295
// set the number of columns to 0
296
if (numberOfCharsReadSoFar==0) {
297       numberOfColumns=0;
298     }
299     return numberOfColumns;
300   }
301
302   //keep track of white spaces in the front. We use positionOfNonWhiteSpaceCharInFront for
303
//that. It has the count of number of white spaces found so far before any non-white char
304
//in the token.
305
//Look for whitespace only if field start delimiter is not found yet. Any white spaces
306
//within the start and stop delimiters are ignored.
307
//Also if one of the white space chars is same as recordSeparator or fieldSeparator then
308
//disregard it.
309
private void checkForWhiteSpaceInFront() {
310     //if found white space characters so far, the following if will be true
311
if ((positionOfNonWhiteSpaceCharInFront + 1) == totalCharsSoFar &&
312         ((!foundStartDelimiter) && (!foundStartAndStopDelimiters) )) {
313        char currentChar = currentToken[positionOfNonWhiteSpaceCharInFront];
314        if (//currentChar == '\t' ||
315
//currentChar == '\r' || alc: why isn't this included?
316
// alc: BTW, \r and \n should be replaced
317
// or amended with the first char of line.separator...
318
//currentChar == '\n' ||
319
//currentChar == ' ') {
320
// use String.trim()'s definition of whitespace.
321
// i18n - check for whitespace - avoid doing a hard coded character
322
// check and use the isWhitespace method to cover all the Unicode
323
// options
324
Character.isWhitespace(currentChar) == true) {
325
326              if ((recordSepStartNotWhite || (currentChar != recordSeparatorChar0))
327                   &&
328                  (fieldSepStartNotWhite || (currentChar != fieldSeparatorChar0)))
329              //disregard if whitespace char is same as separator first char
330
positionOfNonWhiteSpaceCharInFront++;
331        }
332     }
333   }
334
335
336   //look for white spaces from the back towards the stop delimiter position.
337
//If there was no startdelimite & stopdelimiter combination, then we start from the back
338
//all the way to the beginning and stop when we find non-white char
339
//positionOfNonWhiteSpaceCharInBack keeps the count of whitespaces at the back
340
private void checkForWhiteSpaceInBack() {
341     boolean onlyWhiteSpaceSoFar = true;
342     positionOfNonWhiteSpaceCharInBack = 0;
343
344     for (int i = totalCharsSoFar; (i > stopDelimiterPosition) && onlyWhiteSpaceSoFar; i--) {
345        char currentChar = currentToken[i];
346     // replace test on \t,\n,' ' with String.trim's definition of white space
347
// i18n - check for whitespace - avoid doing a hard coded character
348
// check and use the isWhitespace method to cover all the Unicode
349
// options
350
if (Character.isWhitespace(currentChar)==true) {
351
352              if ((recordSepStartNotWhite || (currentChar != recordSeparatorChar0))
353                   &&
354                  (fieldSepStartNotWhite || (currentChar != fieldSeparatorChar0)))
355              //disregard if whitespace char is same as separator first char
356
positionOfNonWhiteSpaceCharInBack++;
357        } else
358          onlyWhiteSpaceSoFar = false;
359     }
360   }
361
362   //keep looking for field and record separators simultaneously because we don't yet
363
//know how many columns make up a row in this data file. Stop as soon as we get
364
//the record separator which is indicated by a return value of true from this function
365
boolean readTokensUntilEndOfRecord() throws Exception JavaDoc {
366     int nextChar;
367     int fieldSeparatorIndex = 0;
368     int recordSeparatorIndex = 0;
369
370     fieldStopDelimiterIndex = 0;
371     fieldStartDelimiterIndex = 0;
372     totalCharsSoFar = 0;
373     //at the start of every new token, make white space in front count 0
374
positionOfNonWhiteSpaceCharInFront = 0;
375     foundStartDelimiter = false;
376     foundStartAndStopDelimiters = false;
377     numberOfCharsReadSoFar = 0;
378
379     while (true) {
380       nextChar = bufferedReader.read();
381       if (nextChar == -1)
382          return true;
383       numberOfCharsReadSoFar++;
384       //read the character into the token holder. If token holder reaches it's capacity,
385
//double it's capacity
386
currentToken[totalCharsSoFar++] = (char)nextChar;
387       //check if character read is white space char in front
388
checkForWhiteSpaceInFront();
389       if (totalCharsSoFar == currentTokenMaxSize) {
390         currentTokenMaxSize = currentTokenMaxSize * 2;
391         char[] tempArray = new char[currentTokenMaxSize];
392         System.arraycopy(currentToken, 0, tempArray, 0, totalCharsSoFar);
393         currentToken = tempArray;
394       }
395
396       //see if we can find fieldSeparator
397
fieldSeparatorIndex = lookForPassedSeparator(fieldSeparator,
398                                                    fieldSeparatorIndex,
399                                                    fieldSeparatorLength,
400                                                    nextChar, false);
401       //every time we find a column separator, the return false will indicate that count
402
//this token as column data value and keep lookin for more tokens or record
403
//separator
404
if (fieldSeparatorIndex == -1)
405          return false;
406
407       //if found start delimiter, then don't look for record separator, just look for
408
//end delimiter
409
if (!foundStartDelimiter ) {
410          //see if we can find recordSeparator
411
recordSeparatorIndex = lookForPassedSeparator(recordSeparator, recordSeparatorIndex,
412            recordSeparatorLength, nextChar, true);
413          if (recordSeparatorIndex == -1)
414             return true;
415       }
416     }
417   }
418
419   //if not inside a start delimiter, then look for the delimiter passed
420
//else look for stop delimiter first.
421
//this routine returns -1 if it finds field delimiter or record delimiter
422
private int lookForPassedSeparator(char[] delimiter, int delimiterIndex,
423                                      int delimiterLength, int nextChar,
424                                      boolean lookForRecordSeperator) throws
425                                      IOException JavaDoc
426     {
427
428     //foundStartDelimiter will be false if we haven't found a start delimiter yet
429
//if we haven't found startdelimiter, then we look for both start delimiter
430
//and passed delimiter(which can be field or record delimiter). If we do find
431
//start delimiter, then we only look for stop delimiter and not the passed delimiter.
432
if (!foundStartDelimiter ) {
433        //look for start delimiter only if it's length is non-zero and only if haven't already
434
//found it at all so far.
435
if (fieldStartDelimiterLength != 0 && (!foundStartAndStopDelimiters) ) {
436           //the code inside following if will be executed only if we have gone past all the
437
//white characters in the front.
438
if (totalCharsSoFar != positionOfNonWhiteSpaceCharInFront &&
439               (totalCharsSoFar - positionOfNonWhiteSpaceCharInFront) <= fieldStartDelimiterLength) {
440              //After getting rid of white spaces in front, look for the start delimiter. If
441
//found, set foundStartDelimiter flag.
442
if (nextChar == fieldStartDelimiter[fieldStartDelimiterIndex]){
443                 fieldStartDelimiterIndex++;
444                 if (fieldStartDelimiterIndex == fieldStartDelimiterLength) {
445                    foundStartDelimiter = true;
446                    //since characters read so far are same as start delimiters, discard those chars
447
totalCharsSoFar = 0;
448                    positionOfNonWhiteSpaceCharInFront = 0;
449                    return 0;
450                 }
451              } else {
452                 //found a mismatch for the start delimiter
453
//see if found match for more than one char of this start delimiter before the
454
//current mismatch, if so check the remaining chars agains
455
//eg if stop delimiter is xa and data is xxa
456
if (fieldStartDelimiterIndex > 0) {
457                    reCheckRestOfTheCharacters(totalCharsSoFar-fieldStartDelimiterIndex,
458                    fieldStartDelimiter, fieldStartDelimiterLength);
459                 }
460              }
461           }
462        }
463
464        /*look for typical record seperators line feed (\n), a carriage return
465         * (\r) or a carriage return followed by line feed (\r\n)
466         */

467        if(lookForRecordSeperator)
468        {
469            if(nextChar == '\r' || nextChar == '\n')
470            {
471                recordSeparatorChar0 = (char) nextChar;
472                if(nextChar == '\r' )
473                {
474                    //omot the line feed character if it exists in the stream
475
omitLineFeed();
476                }
477
478                totalCharsSoFar = totalCharsSoFar - 1 ;
479                return -1;
480            }
481
482            return delimiterIndex;
483        }
484
485        //look for passed delimiter
486
if (nextChar == delimiter[delimiterIndex]) {
487           delimiterIndex++;
488           if (delimiterIndex == delimiterLength) { //found passed delimiter
489
totalCharsSoFar = totalCharsSoFar - delimiterLength;
490              return -1;
491           }
492           return delimiterIndex; //this number of chars of delimiter have exact match so far
493
} else {
494          //found a mismatch for the delimiter
495
//see if found match for more than one char of this delimiter before the
496
//current mismatch, if so check the remaining chars agains
497
//eg if delimiter is xa and data is xxa
498
if (delimiterIndex > 0)
499             return(reCheckRestOfTheCharacters(totalCharsSoFar-delimiterIndex,
500         delimiter,
501                 delimiterLength));
502        }
503     } else {
504       //see if we can find fieldStopDelimiter
505
if (nextChar == fieldStopDelimiter[fieldStopDelimiterIndex]) {
506          fieldStopDelimiterIndex++;
507          if (fieldStopDelimiterIndex == fieldStopDelimiterLength) {
508              boolean skipped = skipDoubleDelimiters(fieldStopDelimiter);
509              if(!skipped)
510              {
511                  foundStartDelimiter = false;
512                  //found stop delimiter, discard the chars corresponding to stop delimiter
513
totalCharsSoFar = totalCharsSoFar - fieldStopDelimiterLength;
514                  //following is to take care of a case like "aa"aa This will result in an
515
//error. Also a case like "aa" will truncate it to just aa
516
stopDelimiterPosition = totalCharsSoFar;
517                  //following is used to distinguish between empty string ,"", and null string ,,
518
foundStartAndStopDelimiters = true;
519              }else
520              {
521                  fieldStopDelimiterIndex =0 ;
522              }
523             return 0;
524          }
525          return 0;
526       } else {
527          //found a mismatch for the stop delimiter
528
//see if found match for more than one char of this stop delimiter before the
529
//current mismatch, if so check the remaining chars agains
530
//eg if stop delimiter is xa and data is xxa
531
if (fieldStopDelimiterIndex > 0) {
532             reCheckRestOfTheCharacters(totalCharsSoFar-fieldStopDelimiterIndex,
533             fieldStopDelimiter, fieldStopDelimiterLength);
534             return 0;
535         }
536       }
537     }
538     return 0;
539   }
540
541   //If after finding a few matching characters for a delimiter, find a mismatch,
542
//restart the matching process from character next to the one from which you
543
//were in the process of finding the matching pattern
544
private int reCheckRestOfTheCharacters(int startFrom,
545          char[] delimiter, int delimiterLength) {
546     int delimiterIndex = 0;
547     // alc: need to test delim of abab with abaabab
548
// if delimIndex resets to 0, i probably needs to reset to
549
// (an ever increasing) startFrom=startFrom+1, not stay where it is
550
for (int i = startFrom; i<totalCharsSoFar; i++) {
551         if (currentToken[i] == delimiter[delimiterIndex])
552            delimiterIndex++;
553         else
554          delimiterIndex = 0;
555     }
556     return delimiterIndex;
557   }
558
559     /*
560      * skips the duplicate delimeter characters inserd character stringd ata
561      * to get the original string. In Double Delimter recognigation Delimiter
562      * Format strings are written with a duplicate delimeter if a delimiter is
563      * found inside the data while exporting.
564      * For example with double quote(") as character delimiter
565      *
566      * "What a ""nice""day!"
567      *
568      * will be imported as:
569      *
570      * What a "nice"day!
571      *
572      * In the case of export, the rule applies in reverse. For example,
573      *
574      * I am 6"tall.
575      *
576      * will be exported to a file as:
577      *
578      * "I am 6""tall."
579      */

580     private boolean skipDoubleDelimiters(char [] characterDelimiter) throws IOException JavaDoc
581     {
582         boolean skipped = true;
583         int cDelLength = characterDelimiter.length ;
584         bufferedReader.mark(cDelLength);
585         for(int i = 0 ; i < cDelLength ; i++)
586         {
587             int nextChar = bufferedReader.read();
588             if(nextChar != characterDelimiter[i])
589             {
590                 //not a double delimter case
591
bufferedReader.reset();
592                 skipped = false;
593                 break;
594             }
595         }
596         return skipped;
597     }
598
599
600
601     //omit the line feed character(\n)
602
private void omitLineFeed() throws IOException JavaDoc
603     {
604         bufferedReader.mark(1);
605         int nextChar = bufferedReader.read();
606         if(nextChar != '\n')
607         {
608             //not a Line Feed
609
bufferedReader.reset();
610         }
611     }
612
613
614
615   /**returns the number of the current row
616   */

617   int getCurrentRowNumber() {
618     return lineNumber;
619   }
620
621   /**the way we read the next row from input file depends on it's format
622     * @exception Exception if there is an error
623     */

624   boolean readNextRow(String JavaDoc[] returnStringArray) throws Exception JavaDoc {
625     boolean readVal;
626     int idx;
627
628     if (!streamOpenForReading) {
629        openFile();
630        //as earlier, ignore the first row if it's colum definition
631
//do uppercase because the ui shows the values as True and False
632
if (hasColumnDefinition){
633           ignoreFirstRow();
634        }
635     }
636     if (formatCode == DEFAULT_FORMAT_CODE)
637        readVal=readNextDelimitedRow(returnStringArray);
638     else
639        readVal=readNextFixedRow(returnStringArray);
640
641     return readVal;
642   }
643
644     // made this a field so it isn't inited for each row, just
645
// set and cleared on the rows that need it (the last row
646
// in a file, typically, so it isn't used much)
647

648     private boolean haveSep = true;
649   //read the specified column width for each column
650
private boolean readNextFixedRow(String JavaDoc[] returnStringArray) throws Exception JavaDoc {
651     // readLength is how many bytes it has read so far
652
int readLength = 0;
653     int totalLength = 0;
654
655     // keep reading until rolWidth bytes have been read
656
while ((readLength +=
657       bufferedReader.read(tempString, readLength,
658                  rowWidth-readLength))
659         < rowWidth) {
660
661       if (readLength == totalLength-1) {// EOF
662
if ( readLength == -1) { // no row, EOF
663
return false;
664          }
665          else {
666             // it's only a bad read if insufficient data was
667
// returned; missing the last record separator is ok
668
if (totalLength != rowWidth - recordSeparator.length) {
669               throw LoadError.unexpectedEndOfFile(lineNumber+1);
670             }
671             else {
672               haveSep = false;
673               break;
674             }
675          }
676       }
677       // else, some thing is read, continue until the whole column is
678
// read
679
totalLength = readLength;
680     }
681
682      int colStart = 0;
683      for (int i=0; i< numberOfColumns; i++) {
684          int colWidth = columnWidths[i];
685
686        if (colWidth == 0) //if column width is 0, return null
687
returnStringArray[i] = null;
688        else {
689           // if found nullstring, return it as null value
690
String JavaDoc checkAgainstNullString = new String JavaDoc(tempString, colStart, colWidth);
691           if (checkAgainstNullString.trim().equals(nullString))
692              returnStringArray[i] = null;
693           else
694               returnStringArray[i] = checkAgainstNullString;
695           colStart += colWidth;
696        }
697      }
698
699      //if what we read is not recordSeparator, throw an exception
700
if (haveSep) {
701         for (int i=(recordSeparatorLength-1); i>=0; i--) {
702             if (tempString[colStart+i] != recordSeparator[i])
703                throw LoadError.recordSeparatorMissing(lineNumber+1);
704         }
705      } else haveSep = true; // reset for the next time, if any.
706

707      lineNumber++;
708      return true;
709   }
710
711   //by this time, we know number of columns that make up a row in this data file
712
//so first look for number of columns-1 field delimites and then look for record
713
//delimiter
714
private boolean readNextDelimitedRow(String JavaDoc[] returnStringArray) throws Exception JavaDoc {
715
716     int upperLimit = numberOfColumns-1; //reduce # field accesses
717

718     //no data in the input file for some reason
719
if (upperLimit < 0)
720        return false;
721
722     //look for number of columns - 1 field separators
723
for (int i = 0; i<upperLimit; i++) {
724       if (!readNextToken(fieldSeparator, 0, fieldSeparatorLength, false) ) {
725         if (i == 0) // still on the first check
726
return false;
727         else
728           throw LoadError.unexpectedEndOfFile(lineNumber+1);
729       }
730       //following is to take care of a case like "aa"aa This will result in an
731
//error. Also a case like "aa" will truncate it to just aa. valid blank
732
//chars are ' ' '\r' '\t'
733
if (stopDelimiterPosition!=0 && ((stopDelimiterPosition) != totalCharsSoFar)) {
734         for (int k=stopDelimiterPosition+1; k<totalCharsSoFar; k++) {
735           // alc: should change || to && since || case is never true --
736
// currentChar can't be three different things at once.
737
// alc: why no \n? BTW, \r and \n should be replaced
738
// or amended with the first char of line.separator...
739
//char currentChar = currentToken[k];
740
//if (currentChar != ' ' && currentChar != '\r' && currentChar != '\t')
741
// use String.trim()'s definition of whitespace.
742
// i18n - check for whitespace - avoid doing a hard coded
743
// character check and use the isWhitespace method to cover all
744
// the Unicode options
745
if (Character.isWhitespace(currentToken[k])==false) {
746               throw LoadError.dataAfterStopDelimiter(lineNumber+1, i+1);
747           }
748         }
749         totalCharsSoFar = stopDelimiterPosition;
750       }
751       //totalCharsSoFar can become -1 in readNextToken
752
if (totalCharsSoFar != -1) {
753         returnStringArray[i] = new String JavaDoc(currentToken,
754                       positionOfNonWhiteSpaceCharInFront, totalCharsSoFar);
755       }
756       else
757          returnStringArray[i] = null;
758     }
759
760     //look for record separator for the last column's value
761
//if I find endoffile and the it's only one column table, then it's a valid endoffile
762
//case. Otherwise, it's an error case. Without the following check for the return value
763
//of readNextToken, import was going into infinite loop for a table with single column
764
//import. end-of-file was getting ignored without the following if.
765
if (!readNextToken(recordSeparator, 0, recordSeparatorLength, true) ) {
766        if (upperLimit == 0)
767           return false;
768        else
769           throw LoadError.unexpectedEndOfFile(lineNumber+1);
770     }
771     //following is to take care of a case like "aa"aa This will result in an
772
//error. Also a case like "aa" will truncate it to just aa. valid blank
773
//chars are ' ' '\r' '\t'
774
if (stopDelimiterPosition!=0 && (stopDelimiterPosition != totalCharsSoFar)) {
775       for (int i=stopDelimiterPosition+1; i<totalCharsSoFar; i++) {
776         // alc: should change || to && since || case is never true --
777
// currentChar can't be three different things at once.
778
// alc: why no \n? BTW, \r and \n should be replaced
779
// or amended with the first char of line.separator...
780
//char currentChar = currentToken[i];
781
//if (currentChar != ' ' && currentChar != '\r' && currentChar != '\t')
782
// use String.trim()'s definition of whitespace.
783
// i18n - check for whitespace - avoid doing a hard coded character
784
// check and use the isWhitespace method to cover all the Unicode
785
// options
786
if (Character.isWhitespace(currentToken[i])==false) {
787           throw LoadError.dataAfterStopDelimiter(lineNumber+1, numberOfColumns);
788         }
789       }
790       totalCharsSoFar = stopDelimiterPosition;
791     }
792
793     //to be able to read delimited files that have a delimeter at the end,
794
//we have to reduce totalCharsSoFar by one when it is last column.
795
//Otherwise last delimeter becomes part of the data.
796
if (hasDelimiterAtEnd) {
797       if (!(fieldStopDelimiterLength > 0)) { //if there is no field stop delimeter specified,
798
//hopefully fieldStopDelimiterLength will not be >0
799

800         //there is weird behavior in the code that makes it read the last
801
//delimeter as part of the last column data, so this forces us to
802
//reduce number of read chars only if there is data stop delimeter
803

804         //Only if it is the last column:
805
//if (fieldStopDelimiter==null){
806
--totalCharsSoFar;
807         //}
808
}
809     }
810
811     if (totalCharsSoFar != -1) {
812
813       /* This is a hack to fix a problem: When there is missing data in columns
814       and hasDelimiterAtEnd==true, then the last delimiter was read as the last column data.
815       Hopefully this will tackle that issue by skipping the last column which is in this case
816       just the delimiter.
817       We need to be careful about the case when the last column data itself is
818       actually same as the delimiter.
819       */

820       if (!hasDelimiterAtEnd) {//normal path:
821
returnStringArray[upperLimit] = new String JavaDoc(currentToken,
822                           positionOfNonWhiteSpaceCharInFront, totalCharsSoFar);
823       }
824       else if (totalCharsSoFar==fieldSeparatorLength && isFieldSep(currentToken) ){
825         //means hasDelimiterAtEnd==true and all of the above are true
826

827         String JavaDoc currentStr = new String JavaDoc(currentToken,
828                           positionOfNonWhiteSpaceCharInFront, totalCharsSoFar);
829
830         if (currentToken[totalCharsSoFar+1]==fieldStopDelimiter[0]){
831           returnStringArray[upperLimit] = currentStr;
832         }
833         else {
834           returnStringArray[upperLimit] = null;
835         }
836       }
837       else {
838         //means hasDelimiterAtEnd==true and previous case is wrong.
839
if (totalCharsSoFar>0) {
840           returnStringArray[upperLimit] = new String JavaDoc(currentToken,
841                             positionOfNonWhiteSpaceCharInFront, totalCharsSoFar);
842         }
843         else{
844           returnStringArray[upperLimit] = null;
845         }
846       }
847     }
848     else
849       returnStringArray[upperLimit] = null;
850
851     lineNumber++;
852     return true;
853   }
854   //tells if a char array is field separator:
855
private boolean isFieldSep(char[] chrArray){
856     for (int i=0; i<chrArray.length && i<fieldSeparatorLength; i++){
857       if (chrArray[i]!=fieldSeparator[i])
858         return false;
859     }
860     return true;
861   }
862   //read one column's value at a time
863
boolean readNextToken(char[] delimiter, int delimiterIndex,
864                                int delimiterLength,
865                                boolean isRecordSeperator) throws Exception JavaDoc {
866     int nextChar;
867
868     fieldStopDelimiterIndex = 0;
869     fieldStartDelimiterIndex = 0;
870     totalCharsSoFar = 0;
871     //at the start of every new token, make white space in front count 0
872
positionOfNonWhiteSpaceCharInFront = 0;
873     stopDelimiterPosition = 0;
874     foundStartAndStopDelimiters = false;
875     foundStartDelimiter = false;
876     int returnValue;
877
878     while (true) {
879       nextChar = bufferedReader.read();
880       if (nextChar == -1) //end of file
881
return false;
882
883       //read the character into the token holder. If token holder reaches it's capacity,
884
//double it's capacity
885
currentToken[totalCharsSoFar++] = (char)nextChar;
886       //check if character read is white space char in front
887
checkForWhiteSpaceInFront();
888       if (totalCharsSoFar == currentTokenMaxSize) {
889         currentTokenMaxSize = currentTokenMaxSize * 2;
890         char[] tempArray = new char[currentTokenMaxSize];
891         System.arraycopy(currentToken, 0, tempArray, 0, totalCharsSoFar);
892         currentToken = tempArray;
893       }
894
895       returnValue = lookForPassedSeparator(delimiter, delimiterIndex,
896                                            delimiterLength, nextChar,
897                                            isRecordSeperator);
898       if (returnValue == -1) {
899          //if no stop delimiter found that "" this means null
900
//also if no stop delimiter found then get rid of spaces around the token
901
if (!foundStartAndStopDelimiters ) {
902             if (totalCharsSoFar == 0)
903                totalCharsSoFar = -1;
904             else {
905                //get the count of white spaces from back and subtract that and white spaces in
906
//the front from the characters read so far so that we ignore spaces around the
907
//token.
908
checkForWhiteSpaceInBack();
909                totalCharsSoFar = totalCharsSoFar - positionOfNonWhiteSpaceCharInFront - positionOfNonWhiteSpaceCharInBack;
910             }
911          }
912          return true;
913       }
914       delimiterIndex = returnValue;
915     }
916   }
917 }
918
919
920
921
922
923
924
Popular Tags