KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > activation > registries > MailcapTokenizer


1 /*
2  * The contents of this file are subject to the terms
3  * of the Common Development and Distribution License
4  * (the "License"). You may not use this file except
5  * in compliance with the License.
6  *
7  * You can obtain a copy of the license at
8  * glassfish/bootstrap/legal/CDDLv1.0.txt or
9  * https://glassfish.dev.java.net/public/CDDLv1.0.html.
10  * See the License for the specific language governing
11  * permissions and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL
14  * HEADER in each file and include the License file at
15  * glassfish/bootstrap/legal/CDDLv1.0.txt. If applicable,
16  * add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your
18  * own identifying information: Portions Copyright [yyyy]
19  * [name of copyright owner]
20  */

21
22 /*
23  * @(#)MailcapTokenizer.java 1.6 05/11/16
24  *
25  * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
26  */

27
28 package com.sun.activation.registries;
29
30 /**
31  * A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
32  * Useful for parsing MIME content types.
33  */

34 public class MailcapTokenizer {
35
36     public static final int UNKNOWN_TOKEN = 0;
37     public static final int START_TOKEN = 1;
38     public static final int STRING_TOKEN = 2;
39     public static final int EOI_TOKEN = 5;
40     public static final int SLASH_TOKEN = '/';
41     public static final int SEMICOLON_TOKEN = ';';
42     public static final int EQUALS_TOKEN = '=';
43
44     /**
45      * Constructor
46      *
47      * @parameter inputString the string to tokenize
48      */

49     public MailcapTokenizer(String JavaDoc inputString) {
50     data = inputString;
51     dataIndex = 0;
52     dataLength = inputString.length();
53
54     currentToken = START_TOKEN;
55     currentTokenValue = "";
56
57     isAutoquoting = false;
58     autoquoteChar = ';';
59     }
60
61     /**
62      * Set whether auto-quoting is on or off.
63      *
64      * Auto-quoting means that all characters after the first
65      * non-whitespace, non-control character up to the auto-quote
66      * terminator character or EOI (minus any whitespace immediatley
67      * preceeding it) is considered a token.
68      *
69      * This is required for handling command strings in a mailcap entry.
70      */

71     public void setIsAutoquoting(boolean value) {
72     isAutoquoting = value;
73     }
74
75     /**
76      * Set the auto-quote terminating character.
77      */

78     public void setAutoquoteChar(char value) {
79     autoquoteChar = value;
80     }
81
82     /**
83      * Retrieve current token.
84      *
85      * @returns The current token value
86      */

87     public int getCurrentToken() {
88     return currentToken;
89     }
90
91     /*
92      * Get a String that describes the given token.
93      */

94     public static String JavaDoc nameForToken(int token) {
95     String JavaDoc name = "really unknown";
96
97     switch(token) {
98         case UNKNOWN_TOKEN:
99         name = "unknown";
100         break;
101         case START_TOKEN:
102         name = "start";
103         break;
104         case STRING_TOKEN:
105         name = "string";
106         break;
107         case EOI_TOKEN:
108         name = "EOI";
109         break;
110         case SLASH_TOKEN:
111         name = "'/'";
112         break;
113         case SEMICOLON_TOKEN:
114         name = "';'";
115         break;
116         case EQUALS_TOKEN:
117         name = "'='";
118         break;
119     }
120
121     return name;
122     }
123
124     /*
125      * Retrieve current token value.
126      *
127      * @returns A String containing the current token value
128      */

129     public String JavaDoc getCurrentTokenValue() {
130     return currentTokenValue;
131     }
132
133     /*
134      * Process the next token.
135      *
136      * @returns the next token
137      */

138     public int nextToken() {
139     if (dataIndex < dataLength) {
140         // skip white space
141
while ((dataIndex < dataLength) &&
142             (isWhiteSpaceChar(data.charAt(dataIndex)))) {
143         ++dataIndex;
144         }
145
146         if (dataIndex < dataLength) {
147         // examine the current character and see what kind of token we have
148
char c = data.charAt(dataIndex);
149         if (isAutoquoting) {
150             if (!isAutoquoteSpecialChar(c)) {
151             processAutoquoteToken();
152             } else if ((c == ';') || (c == '=')) {
153             currentToken = c;
154             currentTokenValue = new Character JavaDoc(c).toString();
155             ++dataIndex;
156             } else {
157             currentToken = UNKNOWN_TOKEN;
158             currentTokenValue = new Character JavaDoc(c).toString();
159             ++dataIndex;
160             }
161         } else {
162             if (isStringTokenChar(c)) {
163             processStringToken();
164             } else if ((c == '/') || (c == ';') || (c == '=')) {
165             currentToken = c;
166             currentTokenValue = new Character JavaDoc(c).toString();
167             ++dataIndex;
168             } else {
169             currentToken = UNKNOWN_TOKEN;
170             currentTokenValue = new Character JavaDoc(c).toString();
171             ++dataIndex;
172             }
173         }
174         } else {
175         currentToken = EOI_TOKEN;
176         currentTokenValue = null;
177         }
178     } else {
179         currentToken = EOI_TOKEN;
180         currentTokenValue = null;
181     }
182
183     return currentToken;
184     }
185
186     private void processStringToken() {
187     // capture the initial index
188
int initialIndex = dataIndex;
189
190     // skip to 1st non string token character
191
while ((dataIndex < dataLength) &&
192         isStringTokenChar(data.charAt(dataIndex))) {
193         ++dataIndex;
194     }
195
196     currentToken = STRING_TOKEN;
197     currentTokenValue = data.substring(initialIndex, dataIndex);
198     }
199
200     private void processAutoquoteToken() {
201     // capture the initial index
202
int initialIndex = dataIndex;
203
204     // now skip to the 1st non-escaped autoquote termination character
205
boolean foundTerminator = false;
206     while ((dataIndex < dataLength) && !foundTerminator) {
207         char c = data.charAt(dataIndex);
208         if (c != autoquoteChar) {
209         ++dataIndex;
210         } else {
211         foundTerminator = true;
212         }
213     }
214
215     currentToken = STRING_TOKEN;
216     currentTokenValue =
217         fixEscapeSequences(data.substring(initialIndex, dataIndex));
218     }
219
220     public static boolean isSpecialChar(char c) {
221     boolean lAnswer = false;
222
223     switch(c) {
224         case '(':
225         case ')':
226         case '<':
227         case '>':
228         case '@':
229         case ',':
230         case ';':
231         case ':':
232         case '\\':
233         case '"':
234         case '/':
235         case '[':
236         case ']':
237         case '?':
238         case '=':
239         lAnswer = true;
240         break;
241     }
242
243     return lAnswer;
244     }
245
246     public static boolean isAutoquoteSpecialChar(char c) {
247     boolean lAnswer = false;
248
249     switch(c) {
250         case ';':
251         case '=':
252         lAnswer = true;
253         break;
254     }
255
256     return lAnswer;
257     }
258
259     public static boolean isControlChar(char c) {
260     return Character.isISOControl(c);
261     }
262
263     public static boolean isWhiteSpaceChar(char c) {
264     return Character.isWhitespace(c);
265     }
266
267     public static boolean isStringTokenChar(char c) {
268     return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
269     }
270
271     private static String JavaDoc fixEscapeSequences(String JavaDoc inputString) {
272     int inputLength = inputString.length();
273     StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
274     buffer.ensureCapacity(inputLength);
275
276     for (int i = 0; i < inputLength; ++i) {
277         char currentChar = inputString.charAt(i);
278         if (currentChar != '\\') {
279         buffer.append(currentChar);
280         } else {
281         if (i < inputLength - 1) {
282             char nextChar = inputString.charAt(i + 1);
283             buffer.append(nextChar);
284
285             // force a skip over the next character too
286
++i;
287         } else {
288             buffer.append(currentChar);
289         }
290         }
291     }
292
293     return buffer.toString();
294     }
295
296     private String JavaDoc data;
297     private int dataIndex;
298     private int dataLength;
299     private int currentToken;
300     private String JavaDoc currentTokenValue;
301     private boolean isAutoquoting;
302     private char autoquoteChar;
303
304     /*
305     public static void main(String[] args) {
306     for (int i = 0; i < args.length; ++i) {
307         MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);
308
309         System.out.println("Original: |" + args[i] + "|");
310
311         int currentToken = tokenizer.nextToken();
312         while (currentToken != EOI_TOKEN) {
313         switch(currentToken) {
314             case UNKNOWN_TOKEN:
315             System.out.println(" Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|");
316             break;
317             case START_TOKEN:
318             System.out.println(" Start Token: |" + tokenizer.getCurrentTokenValue() + "|");
319             break;
320             case STRING_TOKEN:
321             System.out.println(" String Token: |" + tokenizer.getCurrentTokenValue() + "|");
322             break;
323             case EOI_TOKEN:
324             System.out.println(" EOI Token: |" + tokenizer.getCurrentTokenValue() + "|");
325             break;
326             case SLASH_TOKEN:
327             System.out.println(" Slash Token: |" + tokenizer.getCurrentTokenValue() + "|");
328             break;
329             case SEMICOLON_TOKEN:
330             System.out.println(" Semicolon Token: |" + tokenizer.getCurrentTokenValue() + "|");
331             break;
332             case EQUALS_TOKEN:
333             System.out.println(" Equals Token: |" + tokenizer.getCurrentTokenValue() + "|");
334             break;
335             default:
336             System.out.println(" Really Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|");
337             break;
338         }
339
340         currentToken = tokenizer.nextToken();
341         }
342
343         System.out.println("");
344     }
345     }
346     */

347 }
348
Popular Tags