KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > hp > hpl > jena > util > Tokenizer


1 /******************************************************************
2  * File: Tokenizer.java
3  * Created by: Dave Reynolds
4  * Created on: 24-Jun-2003
5  *
6  * (c) Copyright 2003, 2004, 2005 Hewlett-Packard Development Company, LP
7  * [See end of file]
8  * $Id: Tokenizer.java,v 1.4 2005/02/21 12:18:58 andy_seaborne Exp $
9  *****************************************************************/

10 package com.hp.hpl.jena.util;
11
12 import java.util.NoSuchElementException JavaDoc;
13
14 /**
15  * A tokenizer, similar to java's StringTokenizer but allows for quoted
16  * character strings which can include other separators.
17  *
18  * @author <a HREF="mailto:der@hplb.hpl.hp.com">Dave Reynolds</a>
19  * @version $Revision: 1.4 $ on $Date: 2005/02/21 12:18:58 $
20  */

21 public class Tokenizer {
22     
23     /** The string being parsed */
24     protected String JavaDoc source;
25     
26     /** The index of the first unreturned char in source */
27     protected int p;
28
29     /** The set of delimiter characters */
30     protected String JavaDoc delim;
31     
32     /** If true then delimiters should be returned as tokens */
33     protected boolean returnDelims;
34     
35     /** Literal string delimiters */
36     protected String JavaDoc literalDelim;
37     
38     /** The lex state */
39     protected int state;
40     
41     /** A lookahead for tokens */
42     protected String JavaDoc lookahead;
43     
44     /** State flag: normal parse */
45     protected static final int NORMAL = 1;
46     
47     /** State flag: start of literal */
48     protected static final int LITERAL_START = 2;
49     
50     /** State flag: end of literal */
51     protected static final int LITERAL_END = 3;
52     
53     /**
54      * Constructor.
55      * @param str the source string to be parsed
56      * @param delim The set of delimiter characters
57      * @param literalDelim Literal string delimiters
58      * @param returnDelims If true then delimiters should be returned as tokens
59      */

60     public Tokenizer(String JavaDoc str, String JavaDoc delim, String JavaDoc literalDelim, boolean returnDelims) {
61         this.source = str;
62         this.delim = delim;
63         this.literalDelim = literalDelim;
64         this.returnDelims = returnDelims;
65         p = 0;
66         state = NORMAL;
67     }
68     
69     /**
70      * Return the next token.
71      * @throws java.util.NoSuchElementException if there are no more tokens available
72      */

73     public String JavaDoc nextToken() {
74         String JavaDoc result = null;
75         if (lookahead != null) {
76             result = lookahead;
77             lookahead = null;
78         } else {
79             result = getNextToken();
80         }
81         if (result == null) {
82             throw new NoSuchElementException JavaDoc("No more elements in tokenized string");
83         }
84         if (!returnDelims) {
85             if (result.length() == 1) {
86                 char c = result.charAt(0);
87                 if (delim.indexOf(c) != -1 || literalDelim.indexOf(c) != -1) {
88                     return nextToken();
89                 }
90             }
91         }
92         return result;
93     }
94     
95     /**
96      * Test if there are more tokens which can be returned.
97      */

98     public boolean hasMoreTokens() {
99         if (lookahead != null) lookahead = getNextToken();
100         return lookahead != null;
101     }
102     
103     /**
104      * Find the next token which can either be a delimiter or a real token.
105      */

106     private String JavaDoc getNextToken() {
107         if (p >= source.length()) {
108             return null;
109         }
110         switch(state) {
111         case NORMAL:
112             if (is(literalDelim)) {
113                 state = LITERAL_START;
114                 p++;
115                 return source.substring(p-1, p);
116             } else if (is(delim)) {
117                 p++;
118                 return source.substring(p-1, p);
119             } else {
120                 int start = p;
121                 p++;
122                 while (p < source.length() && ! is(delim)) p++;
123                 return source.substring(start, p);
124             }
125         case LITERAL_START:
126             int start = p;
127             while (isLiteral() && p < source.length()) p++;
128             state = LITERAL_END;
129             return source.substring(start, p);
130         case LITERAL_END:
131             state = NORMAL;
132             p++;
133             return source.substring(p-1, p);
134         }
135         return null;
136     }
137     
138     
139     /**
140      * Returns true if the current character is contained in the given classification.
141      */

142     private boolean is(String JavaDoc classification) {
143         return classification.indexOf(source.charAt(p)) != -1;
144     }
145
146     /**
147      * Returns true if the current character a legal literal innard
148      */

149     private boolean isLiteral() {
150         if (is(literalDelim)) {
151             // check for previous escape
152
if (source.charAt(p-1) == '\\') return true;
153             return false;
154         } else {
155             return true;
156         }
157     }
158 }
159
160
161 /*
162     (c) Copyright 2003, 2004, 2005 Hewlett-Packard Development Company, LP
163     All rights reserved.
164
165     Redistribution and use in source and binary forms, with or without
166     modification, are permitted provided that the following conditions
167     are met:
168
169     1. Redistributions of source code must retain the above copyright
170        notice, this list of conditions and the following disclaimer.
171
172     2. Redistributions in binary form must reproduce the above copyright
173        notice, this list of conditions and the following disclaimer in the
174        documentation and/or other materials provided with the distribution.
175
176     3. The name of the author may not be used to endorse or promote products
177        derived from this software without specific prior written permission.
178
179     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
180     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
181     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
182     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
183     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
184     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
185     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
186     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
187     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
188     THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
189 */
Popular Tags