KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jacorb > trading > client > typemgr > Lex


1
2 // Copyright (C) 1998-1999
3
// Object Oriented Concepts, Inc.
4

5 // **********************************************************************
6
//
7
// Copyright (c) 1997
8
// Mark Spruiell (mark@intellisoft.com)
9
//
10
// See the COPYING file for more information
11
//
12
// **********************************************************************
13

14 package org.jacorb.trading.client.typemgr;
15
16 import java.io.*;
17 import java.util.*;
18
19
20 /**
21  * Lex is the lexical analyzer used to produce tokens from an
22  * input source
23  */

24 public class Lex
25 {
26   private StringBuffer JavaDoc m_input;
27   private int m_token = ERROR;
28   private StringBuffer JavaDoc m_buffer = new StringBuffer JavaDoc();
29   private String JavaDoc m_lexeme = null;
30   private int m_pos = 0;
31   private int m_line = 1;
32   private Hashtable m_literals = new Hashtable();
33   private boolean m_eof = false;
34
35
36   public static final int ERROR = 0;
37   public static final int END = 1; // EOF
38
public static final int LBRACE = 2;
39   public static final int RBRACE = 3;
40   public static final int LANGLE = 4;
41   public static final int RANGLE = 5;
42   public static final int COLON = 6;
43   public static final int DOUBLECOLON = 7;
44   public static final int SEMICOLON = 8;
45   public static final int COMMA = 9;
46   public static final int SERVICE = 10;
47   public static final int INTERFACE = 11;
48   public static final int IDENT = 12;
49   public static final int MANDATORY = 13;
50   public static final int READONLY = 14;
51   public static final int PROPERTY = 15;
52   public static final int UNSIGNED = 16;
53   public static final int BOOLEAN = 17;
54   public static final int CHAR = 18;
55   public static final int SHORT = 19;
56   public static final int LONG = 20;
57   public static final int FLOAT = 21;
58   public static final int DOUBLE = 22;
59   public static final int STRING = 23;
60   public static final int SEQUENCE = 24;
61   public static final int OTHER = 25;
62
63
64
65
66   private Lex()
67   {
68   }
69
70
71   /**
72    * Constructs a new lexical analyzer
73    */

74   public Lex(Reader reader)
75   {
76     BufferedReader br = new BufferedReader(reader);
77     m_input = new StringBuffer JavaDoc();
78
79       // read all of the characters into our string buffer
80
boolean eof = false;
81     char[] chars = new char[512];
82     while (! eof) {
83       try {
84         int len = br.read(chars);
85         if (len < 0)
86           eof = true;
87         else
88           m_input.append(chars, 0, len);
89       }
90       catch (IOException e) {
91       }
92     }
93
94       // load literals - maps the token to its numeric value
95
m_literals.put("service", new Integer JavaDoc(SERVICE));
96     m_literals.put("interface", new Integer JavaDoc(INTERFACE));
97     m_literals.put("mandatory", new Integer JavaDoc(MANDATORY));
98     m_literals.put("readonly", new Integer JavaDoc(READONLY));
99     m_literals.put("property", new Integer JavaDoc(PROPERTY));
100     m_literals.put("unsigned", new Integer JavaDoc(UNSIGNED));
101     m_literals.put("boolean", new Integer JavaDoc(BOOLEAN));
102     m_literals.put("char", new Integer JavaDoc(CHAR));
103     m_literals.put("short", new Integer JavaDoc(SHORT));
104     m_literals.put("long", new Integer JavaDoc(LONG));
105     m_literals.put("float", new Integer JavaDoc(FLOAT));
106     m_literals.put("double", new Integer JavaDoc(DOUBLE));
107     m_literals.put("string", new Integer JavaDoc(STRING));
108     m_literals.put("sequence", new Integer JavaDoc(SEQUENCE));
109     m_literals.put("other", new Integer JavaDoc(OTHER));
110   }
111
112
113   /**
114    * Advances to the next token
115    */

116   public void nextToken()
117     throws LexException
118   {
119     int result = ERROR;
120
121     boolean done = false;
122
123     clearLexeme();
124
125     while (! done) {
126
127       char c = nextChar();
128
129       // check for end-of-file
130
if (eof()) {
131         result = END;
132         break;
133       }
134
135       switch (c) {
136         case '{':
137           result = LBRACE;
138           done = true;
139           addLexeme(c);
140           break;
141
142         case '}':
143           result = RBRACE;
144           done = true;
145           addLexeme(c);
146           break;
147
148         case '<':
149           result = LANGLE;
150           done = true;
151           addLexeme(c);
152           break;
153
154         case '>':
155           result = RANGLE;
156           done = true;
157           addLexeme(c);
158           break;
159
160 /***
161         case ':':
162           result = COLON;
163           done = true;
164           addLexeme(c);
165           break;
166 ***/

167
168         case ':': {
169             addLexeme(c);
170             c = nextChar();
171             if (c != ':') {
172               pushBack(c);
173               result = COLON;
174               done = true;
175             }
176             else {
177               result = DOUBLECOLON;
178               addLexeme(c);
179               done = true;
180             }
181           }
182           break;
183
184         case ';':
185           result = SEMICOLON;
186           done = true;
187           addLexeme(c);
188           break;
189
190         case ',':
191           result = COMMA;
192           done = true;
193           addLexeme(c);
194           break;
195
196         case '/': {
197             c = nextChar();
198             if (c != '/')
199               throw new LexException("expected comment", m_line);
200
201               // consume the rest of the line
202
while ((c = nextChar()) != '\n')
203               ;
204
205             continue;
206           }
207
208         case ' ':
209         case '\t':
210         case '\n':
211         case '\r':
212           continue; // skip whitespace
213

214       } // switch (c)
215

216
217         // if we're still not done, then we must have a string, either
218
// a literal or an identifier
219
if (! done) {
220           // if the character isn't compatible with the beginning of
221
// a literal or identifier, it's an error
222
if (c != '_' && ! Character.isLetter(c))
223           throw new LexException("unexpected input", m_line);
224
225           // deal with string literal or identifier
226

227         while (isIdent(c) && ! eof()) {
228           addLexeme(c);
229           c = nextChar();
230         }
231
232         if (! eof())
233           pushBack(c);
234
235           // see if the lexeme is a literal
236
String JavaDoc lexeme = getLexeme();
237         Integer JavaDoc val = (Integer JavaDoc)m_literals.get(lexeme);
238
239           // if we didn't find a literal, then it must be an identifier
240
if (val == null)
241           result = IDENT;
242         else
243           result = val.intValue();
244
245         done = true;
246       }
247
248     } // while (! done)
249

250     m_token = result;
251   }
252
253
254   /**
255    * Returns the current token
256    */

257   public int getToken()
258   {
259     return m_token;
260   }
261
262
263   /**
264    * Returns the current lexeme
265    */

266   public String JavaDoc getLexeme()
267   {
268     if (m_lexeme == null)
269       m_lexeme = m_buffer.toString();
270
271     return m_lexeme;
272   }
273
274
275   /**
276    * Returns the current line of the analyzer
277    */

278   public int getLine()
279   {
280     return m_line;
281   }
282
283
284   protected boolean eof()
285   {
286     return m_eof;
287   }
288
289
290   protected char nextChar()
291   {
292     char result = 0;
293
294     if (m_pos < m_input.length()) {
295       result = m_input.charAt(m_pos);
296       m_pos++;
297
298       if (result == '\n')
299         m_line++;
300     }
301     else
302       m_eof = true;
303
304     return result;
305   }
306
307
308   protected void pushBack(char c)
309   {
310     if (c != 0)
311       m_pos--;
312   }
313
314
315   protected boolean isIdent(char c)
316   {
317     return (Character.isLetter(c) || Character.isDigit(c) || (c == '_'));
318   }
319
320
321   protected void clearLexeme()
322   {
323     m_lexeme = null;
324     m_buffer.setLength(0);
325   }
326
327
328   protected void addLexeme(char c)
329   {
330     m_buffer.append(c);
331   }
332
333   /**************** comment out this line to enable main()
334
335   public static void main(String[] args)
336   {
337     try {
338       Lex lex = new Lex(new InputStreamReader(System.in));
339       lex.nextToken();
340       int token = lex.getToken();
341       while (token != Lex.END && token != Lex.ERROR) {
342         System.out.println("Token = '" + lex.getLexeme() + "' (" +
343           tokenName(token) + ")");
344         lex.nextToken();
345         token = lex.getToken();
346       }
347
348       System.out.println("Token = " + tokenName(token));
349     }
350     catch (LexException e) {
351       System.out.println("Error (" + e.getLine() + ") : " + e.getMessage());
352     }
353   }
354
355
356   protected static String tokenName(int token)
357   {
358     String result;
359
360     switch (token) {
361       case ERROR:
362         result = "ERROR";
363         break;
364       case END:
365         result = "END";
366         break;
367       case LBRACE:
368         result = "LBRACE";
369         break;
370       case RBRACE:
371         result = "RBRACE";
372         break;
373       case LANGLE:
374         result = "LANGLE";
375         break;
376       case RANGLE:
377         result = "RANGLE";
378         break;
379       case COLON:
380         result = "COLON";
381         break;
382       case DOUBLECOLON:
383         result = "DOUBLECOLON";
384         break;
385       case SEMICOLON:
386         result = "SEMICOLON";
387         break;
388       case COMMA:
389         result = "COMMA";
390         break;
391       case SERVICE:
392         result = "SERVICE";
393         break;
394       case INTERFACE:
395         result = "INTERFACE";
396         break;
397       case IDENT:
398         result = "IDENT";
399         break;
400       case MANDATORY:
401         result = "MANDATORY";
402         break;
403       case READONLY:
404         result = "READONLY";
405         break;
406       case PROPERTY:
407         result = "PROPERTY";
408         break;
409       case UNSIGNED:
410         result = "UNSIGNED";
411         break;
412       case BOOLEAN:
413         result = "BOOLEAN";
414         break;
415       case CHAR:
416         result = "CHAR";
417         break;
418       case SHORT:
419         result = "SHORT";
420         break;
421       case LONG:
422         result = "LONG";
423         break;
424       case FLOAT:
425         result = "FLOAT";
426         break;
427       case DOUBLE:
428         result = "DOUBLE";
429         break;
430       case STRING:
431         result = "STRING";
432         break;
433       case SEQUENCE:
434         result = "SEQUENCE";
435         break;
436       case OTHER:
437         result = "OTHER";
438         break;
439       default:
440         result = "<unknown>";
441     }
442
443     return result;
444   }
445
446   /*********** comment out this line to enable main() */

447 }
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
Popular Tags