KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > mmbase > util > StringTagger


1 /*
2
3 This software is OSI Certified Open Source Software.
4 OSI Certified is a certification mark of the Open Source Initiative.
5
6 The license (Mozilla version 1.0) can be read at the MMBase site.
7 See http://www.MMBase.org/license
8
9 */

10 package org.mmbase.util;
11
12 import java.util.*;
13
14 /**
15  * StringTagger, Creates a object with tags and fields from a String.
16  * Its ideal for name-value pairs and name-value pairs with multivalues.
17  * It also provides support for quoted values, and recognizes values that are 'function' calls with
18  * their own parameter list (allowing to ignore any tokens within these lists when parsing).
19  *
20  * @application SCAN
21  * @code-conventions Some methods (Values, Value etc) have wrong names (and are duplicating Map methods btw)
22  * @author Daniel Ockeloen
23  * @author Pierre van Rooden
24  * @version $Id: StringTagger.java,v 1.17 2005/01/30 16:46:35 nico Exp $
25  */

26 public class StringTagger implements Map {
27
28     /**
29      * The name-value pairs where the value is a single string
30      */

31     private Hashtable tokens;
32     /**
33      * The name-value pairs where the value is a list of strings
34      */

35     private Map multitokens;
36     /**
37      * Token used to separate tags (default a space).
38      */

39     private char tagStart;
40     /**
41      * Token used to separate the tag name from its value (default '=').
42      */

43     private char tagSeparator;
44     /**
45      * Token used to separate multiple values within a tag (default ',').
46      */

47     private char fieldSeparator;
48     /**
49      * Token used to indicate quoted values (default '\"').
50      */

51     private char quote;
52     /**
53      * Token used to indicate the start of a function parameter list (default '(').
54      */

55     private char functionOpen;
56     /**
57      * Token used to indicate the end of a function parameter list (default ')').
58      */

59     private char functionClose;
60
61     /**
62      * The line that was parsed.
63      */

64     private String JavaDoc startline = "";
65
66     /**
67      * Creates a StringTag for the given line.
68      * Example : StringTagger("cmd=lookup names='Daniel Ockeloen, Rico Jansen'",' ','=',','\'','('.')')
69      * @param line : to be tagged line
70      * @param tagStart : Seperator for the Tags
71      * @param tagSeparator : Seperator inside the Tag (between name and value)
72      * @param fieldSeparator : Seperator inside the value
73      * @param quote : Char used if a quoted value
74      * @param functionOpen char used to open a function parameter list
75      * @param functionClose char used to close a function parameter list
76      */

77     public StringTagger(String JavaDoc line, char tagStart, char tagSeparator,char fieldSeparator, char quote,
78                                      char functionOpen, char functionClose) {
79         this.tagStart = tagStart;
80         this.startline = line;
81         this.tagSeparator = tagSeparator;
82         this.fieldSeparator = fieldSeparator;
83         this.quote = quote;
84         this.functionOpen = functionOpen;
85         this.functionClose = functionClose;
86         tokens = new Hashtable(); //needing elements(), keys()
87
multitokens = new HashMap();
88         createTagger(line);
89     }
90
91     /**
92      * Creates a StringTag for the given line.
93      * Uses default characters for the function parameter list tokens.
94      * Example : StringTagger("cmd=lookup names='Daniel Ockeloen, Rico Jansen'",' ','=',','\'')
95      * @param line : to be tagged line
96      * @param tagStart : Seperator for the Tags
97      * @param tagSeparator : Seperator inside the Tag (between name and value)
98      * @param fieldSeparator : Seperator inside the value
99      * @param quote : Char used if a quoted value
100      */

101     public StringTagger(String JavaDoc line, char tagStart, char tagSeparator,char fieldSeparator, char quote) {
102         this(line, tagStart, tagSeparator,fieldSeparator, quote,'(',')');
103     }
104
105     /**
106      * Creates a StringTag for the given line.
107      * Uses default characters for all tokens.
108      * @param line : to be tagged line
109      */

110     public StringTagger(String JavaDoc line) {
111         this(line,' ','=',',','"','(',')');
112     }
113
114     /**
115      * Parses the given line, and stores all value-pairs found in the
116      * tokens and multitokens fields.
117      * @param line : to be tagged line (why is this a parameter when it can eb retrieved from startline?)
118      * @since MMBase-1.7
119      */

120     protected void createTagger(String JavaDoc line) {
121         StringTokenizer tok2 = new StringTokenizer(line+tagStart,""+tagSeparator+tagStart,true);
122         String JavaDoc part,tag,prevtok,tok;
123         boolean isTag,isPart,isQuoted;
124
125         isTag = true;
126         isPart = false;
127         isQuoted = false;
128         prevtok = "";
129         tag = part = ""; // should be StringBuffer
130
// log.debug("Tagger -> |"+tagStart+"|"+tagSeparator+"|"+quote+"|");
131
while(tok2.hasMoreTokens()) {
132             tok = tok2.nextToken();
133 // log.debug("tagger tok ("+isTag+","+isPart+","+isQuoted+") |"+tok+"|"+prevtok+"|");
134
if (tok.equals(""+tagSeparator)) {
135                 if (isTag) {
136                     tag = prevtok;
137                     isTag = false;
138                 } else {
139                     if (!isQuoted) {
140                         splitTag(tag+tagSeparator+part);
141                         isTag = true;
142                         isPart = false;
143                         part = "";
144                     } else {
145                         part += tok;
146                     }
147                 }
148             } else if (tok.equals(""+tagStart)) {
149                 if (isPart) {
150                     if (isQuoted) {
151                         part += tok;
152                     } else {
153                         if (!prevtok.equals("" + tagStart)) {
154                             splitTag(tag + tagSeparator + part);
155                             isTag = true;
156                             isPart = false;
157                             part = "";
158                         }
159                     }
160                     prevtok = tok;
161                 }
162             } else {
163                 if (!isTag) isPart = true;
164 // log.debug("isTag "+isTag+" "+isPart);
165
if (isPart) {
166                     if (isQuoted) {
167                         // Check end quote
168
if (tok.charAt(tok.length() - 1) == quote) {
169                             isQuoted = false;
170                         }
171                         part += tok;
172                     } else {
173                         if (tok.charAt(0) == quote && !(tok.charAt(tok.length() - 1) == quote)) {
174                             isQuoted = true;
175                         }
176                         part += tok;
177                     }
178                 }
179 // log.debug("isTag "+isTag+" "+isPart+" "+isQuoted);
180
prevtok = tok;
181             }
182         }
183     }
184
185     /**
186      * Handles and splits a tag in its component parts, and store the elemements in
187      * the tokens and multitokens fields.
188      * @param tag the string containing the tag
189      * @since MMBase-1.7
190      */

191     protected void splitTag(String JavaDoc tag) {
192         int tagPos = tag.indexOf(tagSeparator);
193         String JavaDoc name = tag.substring(0,tagPos);
194         String JavaDoc result = tag.substring(tagPos+1);
195 // log.debug("SplitTag |"+name+"|"+result+"|");
196

197         if (result.length()>1 && result.charAt(0) == quote && result.charAt(result.length() - 1) == quote) {
198             result = result.substring(1, result.length() - 1);
199         }
200         tokens.put(name, result);
201
202         StringTokenizer toks = new StringTokenizer(result, "" + fieldSeparator + functionOpen + functionClose, true);
203         // If quoted, strip the " " from beginning and end ?
204
Vector multi = new Vector();
205         if(toks.hasMoreTokens()) {
206             String JavaDoc tokvalue="";
207             int nesting = 0;
208             while (toks.hasMoreTokens()) {
209                 String JavaDoc tok = toks.nextToken();
210                 if (tok.equals("" + fieldSeparator)) {
211                     if (nesting == 0) {
212                         multi.add(tokvalue);
213                         tokvalue = "";
214                     } else {
215                         tokvalue += tok;
216                     }
217                 } else if (tok.equals("" + functionOpen)) {
218                     nesting++;
219                     tokvalue += tok;
220                 } else if (tok.equals("" + functionClose)) {
221                     nesting--;
222                     tokvalue += tok;
223                 } else {
224                     tokvalue += tok;
225                 }
226             }
227             multi.add(tokvalue);
228         }
229         multitokens.put(name, multi);
230     }
231
232
233     // Map interface methods
234

235     /**
236      * Clears all data
237      */

238     public void clear() {
239         tokens.clear();
240         multitokens.clear();
241         startline="";
242     }
243
244     /**
245      * Checks whether a key exits.
246      */

247     public boolean containsKey (Object JavaDoc ob) {
248         return tokens.containsKey(ob);
249     }
250
251     /**
252      * Checks whether a value exits.
253      */

254     public boolean containsValue (Object JavaDoc ob) {
255         return tokens.containsValue(ob);
256     }
257
258     /**
259      * returns all values
260      */

261     public Set entrySet() {
262         return tokens.entrySet();
263     }
264
265     /**
266      * Returns whether two objects are the same
267      * @param ob the key of the value to retrieve
268      */

269     public boolean equals(Object JavaDoc ob) {
270         return (ob instanceof Map) && (ob.hashCode() == this.hashCode());
271     }
272
273     /**
274      * Returns the value of a key as an Object.
275      * The value returned is a single, unseparated, string.<br />
276      * Use {@link #Values} to get a list of multi-values as a <code>Vector</code>.<br />
277      * Use {@link #Value} to get the first value as a String
278      * @param ob the key of the value to retrieve
279      */

280     public Object JavaDoc get(Object JavaDoc ob) {
281         return tokens.get(ob);
282     }
283
284     /**
285      * Hashcode for sorting and comparing
286      */

287     public int hashCode() {
288         return multitokens.hashCode();
289     }
290
291     /**
292      * Checks whether the tagger is empty
293      */

294     public boolean isEmpty() {
295         return tokens.isEmpty();
296     }
297
298     /**
299      * Returns a Set of the name keys.
300      */

301     public Set keySet() {
302         return tokens.keySet();
303     }
304
305     /**
306      * sets a value (for the Map interface).
307      */

308     public Object JavaDoc put(Object JavaDoc key, Object JavaDoc value) {
309         Object JavaDoc res = tokens.get(key);
310         setValue((String JavaDoc)key, (String JavaDoc)value);
311         return res;
312     }
313
314     /**
315      * Manually sets a set of values (for the Map interface).
316      */

317     public void putAll(Map map) {
318         throw new UnsupportedOperationException JavaDoc();
319     }
320
321     /**
322      * remove a value (for the Map interface).
323      */

324     public Object JavaDoc remove(Object JavaDoc key) {
325         Object JavaDoc res = tokens.get(key);
326         tokens.remove(key);
327         multitokens.remove(key);
328         return res;
329     }
330
331     /**
332      * sets a value (for the Map interface).
333      */

334     public int size() {
335         return tokens.size();
336     }
337
338     /**
339      * returns all values
340      */

341     public Collection values() {
342         return tokens.values();
343     }
344
345     // Custom methods
346

347     /**
348      * Returns a Enumeration of the name keys.
349      */

350     public Enumeration keys() {
351         return tokens.keys();
352     }
353
354     /**
355      * toString
356      */

357     public String JavaDoc toString() {
358         StringBuffer JavaDoc content = new StringBuffer JavaDoc("[");
359         for (Enumeration e = keys(); e.hasMoreElements();) {
360             String JavaDoc key = (String JavaDoc) e.nextElement();
361             content.append('<').append(key);
362             content.append('=').append(Values(key));
363             content.append('>');
364         }
365         content.append(']');
366         return content.toString();
367     }
368
369     /**
370      * Returns a Enumeration of the values as String.
371      * The values returned are all single, unsepartated, strings.
372      * Use {@link #multiElements} to get a list of multi-values.
373      */

374     public Enumeration elements() {
375         return tokens.elements();
376     }
377
378     /**
379      * Returns a Enumeration of the values as Vectors that contain
380      * the seperated values.
381      * Use {@link #elements} to get a list of single, unseparated, values.
382      */

383     public Enumeration multiElements(String JavaDoc token) {
384         Vector tmp = (Vector) multitokens.get(token);
385         if (tmp != null) {
386             return tmp.elements();
387         } else {
388             return null;
389         }
390     }
391
392     /**
393      * Returns the values as a Vector that contains the separated values.
394      * <br />
395      * Use {@link #get} to get the list of values as a <code>String</code><br />
396      * Use {@link #Value} to get the first value as a String
397      * @param token the key of the value to retrieve
398      */

399     public Vector Values(String JavaDoc token) {
400         Vector tmp = (Vector) multitokens.get(token);
401         return tmp;
402     }
403
404     /**
405      * Returns the original parsed line
406      * @param token unused
407      */

408     public String JavaDoc ValuesString(String JavaDoc token) {
409         return startline;
410     }
411
412     /**
413      * Returns the first value as a <code>String</code>.
414      * In case of a single value, it returns that value. In case of multiple values,
415      * it returns the
416      * Use {@link #get} to get the list of values as a <code>String</code><br />
417      * Use {@link #Values} to get a list of multi-values as a <code>Vector</code>.<br />
418      * @param token the key of the value to retrieve
419      */

420     public String JavaDoc Value(String JavaDoc token) {
421         String JavaDoc val;
422         Vector tmp=(Vector) multitokens.get(token);
423         if (tmp!=null && tmp.size()>0) {
424             val=(String JavaDoc) tmp.elementAt(0);
425             if (val != null) {
426                 val = Strip.DoubleQuote(val,Strip.BOTH); // added stripping daniel
427
return val;
428             } else {
429                 return null;
430             }
431         } else {
432             return null;
433         }
434     }
435
436     /**
437      * Manually sets a single value.
438      */

439     public void setValue(String JavaDoc token,String JavaDoc val) {
440         Vector newval = new Vector();
441         newval.addElement(val);
442         tokens.put(token,newval);
443         multitokens.put(token,newval);
444     }
445
446     /**
447      * Manually sets a multi-value value.
448      */

449     public void setValues(String JavaDoc token,Vector values) {
450         tokens.put(token,values.toString());
451         multitokens.put(token,values);
452     }
453
454     /**
455      * For testing
456      */

457     public static void main(String JavaDoc args[]) {
458         StringTagger tag = new StringTagger(args[0]);
459     }
460
461 }
462
Popular Tags