KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > openedit > archive > cumulus > Parser


1 /*
2  * Created on Aug 16, 2005
3  */

4 package com.openedit.archive.cumulus;
5
6
7 /*
8  * Copyright (c) Ian F. Darwin, http://www.darwinsys.com/, 1996-2002.
9  * All rights reserved. Software written by Ian F. Darwin and others.
10  * $Id$
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  * notice, this list of conditions and the following disclaimer in the
19  * documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Java, the Duke mascot, and all variants of Sun's Java "steaming coffee
34  * cup" logo are trademarks of Sun Microsystems. Sun's, and James Gosling's,
35  * pioneering role in inventing and promulgating (and standardizing) the Java
36  * language and environment is gratefully acknowledged.
37  *
38  * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
39  * inventing predecessor languages C and C++ is also gratefully acknowledged.
40  */

41 import java.util.*;
42 import java.util.regex.Matcher JavaDoc;
43 import java.util.regex.Pattern JavaDoc;
44
45 /* Simple demo of CSV parser class.
46  */

47 /** Parse comma-separated values (CSV), a common Windows file format.
48  * Sample input: "LU",86.25,"11/4/1998","2:19PM",+4.0625
49  * <p>
50  * Inner logic adapted from a C++ original that was
51  * Copyright (C) 1999 Lucent Technologies
52  * Excerpted from 'The Practice of Programming'
53  * by Brian W. Kernighan and Rob Pike.
54  * <p>
55  * Included by permission of the http://tpop.awl.com/ web site,
56  * which says:
57  * "You may use this code for any purpose, as long as you leave
58  * the copyright notice and book citation attached." I have done so.
59  * @author Brian W. Kernighan and Rob Pike (C++ original)
60  * @author Ian F. Darwin (translation into Java and removal of I/O)
61  * @author Ben Ballard (rewrote advQuoted to handle '""' and for readability)
62  */

63 public class Parser {
64
65       public static void main(String JavaDoc[] args) {
66           Parser parser = new Parser();
67             List list = parser.parse(
68               "\"LU\",86.25,\"11/4/1998\",\"2:19PM\",+4.0625");
69             Iterator it = list.iterator();
70             while (it.hasNext()) {
71               System.out.println(it.next());
72             }
73
74             // Now test with a non-default separator
75
parser = new Parser('|');
76             list = parser.parse(
77               "\"LU\"|86.25|\"11/4/1998\"|\"2:19PM\"|+4.0625");
78             it = list.iterator();
79             while (it.hasNext()) {
80               System.out.println(it.next());
81             }
82           }
83      
84      
85   public static final char DEFAULT_SEP = ',';
86
87   /** Construct a CSV parser, with the default separator (`,'). */
88   public Parser() {
89     this(DEFAULT_SEP);
90   }
91
92   /** Construct a CSV parser with a given separator.
93    * @param sep The single char for the separator (not a list of
94    * separator characters)
95    */

96   public Parser(char sep) {
97     fieldSep = sep;
98   }
99
100   /** The fields in the current String */
101   protected List list = new ArrayList();
102   protected Pattern JavaDoc tabs = Pattern.compile("\t");
103
104   /** the separator char for this parser */
105   protected char fieldSep;
106
107   
108   public String JavaDoc[] parseRegEx(String JavaDoc line)
109   {
110       //list.clear();
111
String JavaDoc[] args = tabs.split(line);
112       //list.addAll(Arrays.asList(args));
113
return args;
114   }
115   
116   /** parse: break the input String into fields
117    * @return java.util.Iterator containing each field
118    * from the original as a String, in order.
119    */

120   public List parse(String JavaDoc line)
121   {
122     StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
123     list.clear(); // recycle to initial state
124
int i = 0;
125
126     if (line.length() == 0) {
127       list.add(line);
128       return list;
129     }
130
131     do {
132             sb.setLength(0);
133             if (i < line.length() && line.charAt(i) == '"')
134                 i = advQuoted(line, sb, ++i); // skip quote
135
else
136                 i = advPlain(line, sb, i);
137             list.add(sb.toString());
138       i++;
139     } while (i < line.length());
140
141     return list;
142   }
143
144   /** advQuoted: quoted field; return index of next separator */
145   protected int advQuoted(String JavaDoc s, StringBuffer JavaDoc sb, int i)
146   {
147     int j;
148     int len= s.length();
149         for (j=i; j<len; j++) {
150             if (s.charAt(j) == '"' && j+1 < len) {
151                 if (s.charAt(j+1) == '"') {
152                     j++; // skip escape char
153
} else if (s.charAt(j+1) == fieldSep) { //next delimeter
154
j++; // skip end quotes
155
break;
156                 }
157             } else if (s.charAt(j) == '"' && j+1 == len) { // end quotes at end of line
158
break; //done
159
}
160       sb.append(s.charAt(j)); // regular character.
161
}
162     return j;
163   }
164
165   /** advPlain: unquoted field; return index of next separator */
166   protected int advPlain(String JavaDoc s, StringBuffer JavaDoc sb, int i)
167   {
168     int j;
169
170     j = s.indexOf(fieldSep, i); // look for separator
171
if (j == -1) { // none found
172
sb.append(s.substring(i));
173             return s.length();
174         } else {
175             sb.append(s.substring(i, j));
176             return j;
177         }
178     }
179 }
180
181
Popular Tags