KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > jimm > datavision > source > charsep > DelimParser


1 package jimm.datavision.source.charsep;
2 import java.io.Reader JavaDoc;
3 import java.io.IOException JavaDoc;
4 import java.util.List JavaDoc;
5 import java.util.ArrayList JavaDoc;
6
7 /**
8  * Parses delimited data. Handles quotes and embedded delimiters.
9  *
10  * @author Jim Menard, <a HREF="mailto:jimm@io.com">jimm@io.com</a>
11  */

12 public class DelimParser {
13
14 public static final int EOF = -1;
15
16 protected char delimiter;
17 protected Reader JavaDoc in;
18 protected int pushbackChar;
19
20 /**
21  * Constructor, using ',' as the delimiter. The caller must close
22  * <var>in</var>.
23  *
24  * @param in input reader
25  */

26 public DelimParser(Reader JavaDoc in) {
27     this(in, ',');
28 }
29
30 /**
31  * Constructor. The caller must close <var>in</var>.
32  *
33  * @param in input reader
34  * @param delimiter delimiter character
35  */

36 public DelimParser(Reader JavaDoc in, char delimiter) {
37     this.delimiter = delimiter;
38     this.in = in;
39     pushbackChar = EOF;
40 }
41
42 /**
43  * Returns an array of column data or <code>null</code> if there is no more
44  * data. Handles delimiters and quotes within the data just as they are
45  * generated by Excel comma- and tab-separated files.
46  *
47  * @return a <code>List</code> of strings; return <code>null</code> if
48  * there is no more data.
49  */

50 public List JavaDoc parse() throws IOException JavaDoc {
51     ArrayList JavaDoc columns = null;
52     boolean insideQuotes = false;
53     int numQuotesSeen = 0;
54     StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
55
56     int charAsInt;
57     char c;
58     char prevChar = '\0';
59     while ((charAsInt = nextChar()) != EOF) {
60     c = (char)charAsInt;
61
62     switch (c) {
63     case '"': // Quote character
64
if (!insideQuotes) { // Start of quoted column
65
insideQuotes = true;
66         numQuotesSeen = 0;
67         }
68         else if (insideQuotes) { // Inside quoted column
69
if (numQuotesSeen == 1) { // This is second of doubled quotes
70
buf.append(c);
71             numQuotesSeen = 0;
72         }
73         else
74             numQuotesSeen = 1;
75         }
76         break;
77     case '\n': // Linefeed/newline
78
case '\r':
79         if (insideQuotes) {
80         if (numQuotesSeen == 1) { // Closing quote at end of line
81
if (columns == null) columns = new ArrayList JavaDoc();
82             columns.add(buf.toString());
83             return columns;
84         }
85         else
86             buf.append(c);
87         }
88         else { // End of line; return columns
89
// Handle DOS line endings
90
if (c == '\r') { // Check for following '\n
91
charAsInt = nextChar();
92             c = (char)charAsInt;
93             if (c != '\n') // Eat following '\n' if it exists
94
pushback(charAsInt); // Else put it back
95
}
96
97         charAsInt = nextChar();
98         c = (char)charAsInt;
99         if (columns == null && buf.length() == 0 && charAsInt == EOF)
100             return null; // Empty line at end of file
101

102         pushback(charAsInt);
103         if (columns == null) columns = new ArrayList JavaDoc();
104         columns.add(buf.toString());
105         return columns;
106         }
107         break;
108     default:
109         if (c == delimiter) { // Normal delimiter
110
if (!insideQuotes) {
111             if (columns == null) columns = new ArrayList JavaDoc();
112             columns.add(buf.toString());
113             buf = new StringBuffer JavaDoc();
114         }
115         else { // Inside quoted column
116
// Delimiter at end of quoted column data
117
if (numQuotesSeen == 1) {
118             insideQuotes = false;
119             if (columns == null) columns = new ArrayList JavaDoc();
120             columns.add(buf.toString());
121             buf = new StringBuffer JavaDoc();
122             }
123             // Delimiter inside quoted column
124
else
125             buf.append(delimiter);
126         }
127         }
128         else { // Everything else
129
numQuotesSeen = 0;
130         buf.append(c);
131         }
132         break;
133     }
134
135     prevChar = c;
136     }
137
138     // We've reached EOF
139
if (columns == null && buf.length() == 0) // Empty line at end of file
140
return null;
141
142     if (buf.length() > 0 || prevChar == delimiter) {
143     if (columns == null) columns = new ArrayList JavaDoc();
144     columns.add(buf.toString());
145     }
146     return columns;
147 }
148
149 protected int nextChar() throws IOException JavaDoc {
150     if (pushbackChar == EOF)
151     return in.read();
152     else {
153     int c = pushbackChar;
154     pushbackChar = EOF;
155     return c;
156     }
157 }
158
159 protected void pushback(int charAsInt) {
160     pushbackChar = charAsInt;
161 }
162
163 }
164
Popular Tags