1 package org.apache.turbine.util.parser; 2 3 18 19 import java.io.BufferedReader ; 20 import java.io.IOException ; 21 import java.io.InputStreamReader ; 22 import java.io.Reader ; 23 import java.io.StreamTokenizer ; 24 25 import java.util.ArrayList ; 26 import java.util.Iterator ; 27 import java.util.List ; 28 import java.util.NoSuchElementException ; 29 30 import org.apache.commons.logging.Log; 31 import org.apache.commons.logging.LogFactory; 32 33 56 public abstract class DataStreamParser implements Iterator 57 { 58 59 private static Log log = LogFactory.getLog(DataStreamParser.class); 60 61 64 private static final boolean DEBUG = false; 65 66 69 protected static final String EMPTYFIELDNAME = "UNKNOWNFIELD"; 70 71 74 private List columnNames; 75 76 79 private StreamTokenizer tokenizer; 80 81 84 private ValueParser lineValues; 85 86 89 private boolean neverRead = true; 90 91 94 private String characterEncoding; 95 96 99 private char fieldSeparator; 100 101 110 public DataStreamParser(Reader in, List columnNames, 111 String characterEncoding) 112 { 113 this.columnNames = columnNames; 114 this.characterEncoding = characterEncoding; 115 116 if (this.characterEncoding == null) 117 { 118 this.characterEncoding = "US-ASCII"; 120 try 121 { 122 this.characterEncoding = ((InputStreamReader ) in).getEncoding(); 123 } 124 catch (ClassCastException e) 125 { 126 } 127 } 128 129 tokenizer = new StreamTokenizer (new BufferedReader (in)); 130 initTokenizer(tokenizer); 131 } 132 133 140 protected void initTokenizer(StreamTokenizer tokenizer) 141 { 142 tokenizer.ordinaryChars('0', '9'); 145 tokenizer.ordinaryChars('-', '-'); 146 tokenizer.ordinaryChars('.', '.'); 147 148 150 tokenizer.wordChars(' ', Integer.MAX_VALUE); 151 152 tokenizer.quoteChar('"'); 154 155 tokenizer.eolIsSignificant(true); 157 } 158 159 163 public void setFieldSeparator(char fieldSeparator) 164 { 165 this.fieldSeparator = fieldSeparator; 166 tokenizer.ordinaryChar(fieldSeparator); 168 } 169 170 175 public void setColumnNames(List columnNames) 176 { 177 this.columnNames = columnNames; 178 } 179 180 187 public void readColumnNames() 188 throws IOException 189 { 190 columnNames = new ArrayList (); 191 int lastTtype = 0; 192 int fieldCounter = 1; 193 194 neverRead = false; 195 tokenizer.nextToken(); 196 while (tokenizer.ttype == StreamTokenizer.TT_WORD || tokenizer.ttype == StreamTokenizer.TT_EOL 197 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator) 198 { 199 if (tokenizer.ttype != fieldSeparator && tokenizer.ttype != StreamTokenizer.TT_EOL) 200 { 201 columnNames.add(tokenizer.sval); 202 fieldCounter++; 203 } 204 else if (tokenizer.ttype == fieldSeparator && lastTtype == fieldSeparator) 205 { 206 columnNames.add(EMPTYFIELDNAME + fieldCounter); 208 fieldCounter++; 209 } 210 else if (lastTtype == fieldSeparator && tokenizer.ttype == StreamTokenizer.TT_EOL) 211 { 212 columnNames.add(EMPTYFIELDNAME + fieldCounter); 213 break; 214 } 215 else if (tokenizer.ttype == StreamTokenizer.TT_EOL) 216 { 217 break; 218 } 219 lastTtype = tokenizer.ttype; 220 tokenizer.nextToken(); 221 } 222 } 223 224 230 public boolean hasNextRow() 231 throws IOException 232 { 233 if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL) 236 { 237 tokenizer.nextToken(); 238 tokenizer.pushBack(); 239 neverRead = false; 240 } 241 return tokenizer.ttype != StreamTokenizer.TT_EOF; 242 } 243 244 251 public ValueParser nextRow() 252 throws IOException , NoSuchElementException 253 { 254 if (!hasNextRow()) 255 { 256 throw new NoSuchElementException (); 257 } 258 259 if (lineValues == null) 260 { 261 lineValues = new BaseValueParser(characterEncoding); 262 } 263 else 264 { 265 lineValues.clear(); 266 } 267 268 Iterator it = columnNames.iterator(); 269 tokenizer.nextToken(); 270 while (tokenizer.ttype == StreamTokenizer.TT_WORD 271 || tokenizer.ttype == '"' || tokenizer.ttype == fieldSeparator) 272 { 273 int lastTtype = 0; 274 if (it.hasNext()) 277 { 278 String colname = it.next().toString(); 279 String colval = tokenizer.sval; 280 if (tokenizer.ttype != fieldSeparator && lastTtype != fieldSeparator) 281 { 282 if (DEBUG) 283 { 284 log.debug("DataStreamParser.nextRow(): " + 285 colname + "=" + colval); 286 } 287 lineValues.add(colname, colval); 288 } 289 else if (tokenizer.ttype == fieldSeparator && lastTtype != fieldSeparator) 290 { 291 lastTtype = tokenizer.ttype; 292 tokenizer.nextToken(); 293 if (tokenizer.ttype != fieldSeparator && tokenizer.sval != null) 294 { 295 lineValues.add(colname, tokenizer.sval); 296 } 297 else if (tokenizer.ttype == StreamTokenizer.TT_EOL) 298 { 299 tokenizer.pushBack(); 300 } 301 } 302 } 303 tokenizer.nextToken(); 304 } 305 306 return lineValues; 307 } 308 309 314 public boolean hasNext() 315 { 316 boolean hasNext = false; 317 318 try 319 { 320 hasNext = hasNextRow(); 321 } 322 catch (IOException e) 323 { 324 log.error("IOException in CSVParser.hasNext", e); 325 } 326 327 return hasNext; 328 } 329 330 337 public Object next() 338 throws NoSuchElementException 339 { 340 Object nextRow = null; 341 342 try 343 { 344 nextRow = nextRow(); 345 } 346 catch (IOException e) 347 { 348 log.error("IOException in CSVParser.next", e); 349 throw new NoSuchElementException (); 350 } 351 352 return nextRow; 353 } 354 355 360 public void remove() 361 throws UnsupportedOperationException 362 { 363 throw new UnsupportedOperationException (); 364 } 365 } 366 | Popular Tags |