Java > Open Source Codes > org > jacorb > trading > constraint > Lex


1   
2   // Copyright (C) 1998-1999
3   // Object Oriented Concepts, Inc.
4   
5   // **********************************************************************
6   //
7   // Copyright (c) 1997
8   // Mark Spruiell (mark@intellisoft.com)
9   //
10  // See the COPYING file for more information
11  //
12  // **********************************************************************
13  
14  package org.jacorb.trading.constraint;
15  
16  import java.io.*;
17  import java.util.*;
18  
19  
20  /**
21   * Lex is the lexical analyzer used to produce tokens from an
22   * input source
23   */
24  public class Lex
25  {
26    private StringBuffer   m_input;
27    private int m_token            = ERROR;
28    private StringBuffer   m_buffer  = new StringBuffer  ();
29    private String   m_lexeme        = null;
30    private int m_pos              = 0;
31    private Hashtable m_literals   = new Hashtable();
32    private boolean m_eof          = false;
33  
34  
35    public static final int ERROR         = 0;
36    public static final int END           = 1;  // EOF
37    public static final int LPAREN        = 2;
38    public static final int RPAREN        = 3;
39    public static final int EXIST         = 4;
40    public static final int MINUS         = 5;
41    public static final int NOT           = 6;
42    public static final int MULT          = 7;
43    public static final int DIV           = 8;
44    public static final int PLUS          = 9;
45    public static final int TILDE         = 10;
46    public static final int IN            = 11;
47    public static final int EQUAL         = 12;
48    public static final int NOT_EQUAL     = 13;
49    public static final int LESS          = 14;
50    public static final int LESS_EQUAL    = 15;
51    public static final int GREATER       = 16;
52    public static final int GREATER_EQUAL = 17;
53    public static final int AND           = 18;
54    public static final int OR            = 19;
55    public static final int IDENT         = 20;
56    public static final int TRUE_LIT      = 21;  // literal
57    public static final int FALSE_LIT     = 22;  // literal
58    public static final int STRING_LIT    = 23;  // literal
59    public static final int NUMBER_LIT    = 24;  // literal
60    public static final int PREF_MIN      = 25;
61    public static final int PREF_MAX      = 26;
62    public static final int PREF_WITH     = 27;
63    public static final int PREF_RANDOM   = 28;
64    public static final int PREF_FIRST    = 29;
65  
66  
67    private Lex()
68    {
69    }
70  
71  
72    /**
73     * Constructs a new lexical analyzer
74     */
75    public Lex(Reader reader)
76    {
77      BufferedReader br = new BufferedReader(reader);
78      m_input = new StringBuffer  ();
79  
80        // read all of the characters into our string buffer
81      boolean eof = false;
82      char[] chars = new char[512];
83      while (! eof) {
84        try {
85          int len = br.read(chars);
86          if (len < 0)
87            eof = true;
88          else
89            m_input.append(chars, 0, len);
90        }
91        catch (IOException e) {
92        }
93      }
94  
95        // load literals - maps the token to its numeric value
96      m_literals.put("TRUE", new Integer  (TRUE_LIT));
97      m_literals.put("FALSE", new Integer  (FALSE_LIT));
98      m_literals.put("and", new Integer  (AND));
99      m_literals.put("not", new Integer  (NOT));
100     m_literals.put("or", new Integer  (OR));
101     m_literals.put("exist", new Integer  (EXIST));
102     m_literals.put("in", new Integer  (IN));
103     m_literals.put("min", new Integer  (PREF_MIN));
104     m_literals.put("max", new Integer  (PREF_MAX));
105     m_literals.put("with", new Integer  (PREF_WITH));
106     m_literals.put("random", new Integer  (PREF_RANDOM));
107     m_literals.put("first", new Integer  (PREF_FIRST));
108 
109       // advance to first token
110     nextToken();
111   }
112 
113 
114   /**
115    * Advances to the next token
116    */
117   public void nextToken()
118   {
119     int result = ERROR;
120 
121     boolean done = false;
122 
123     clearLexeme();
124 
125     while (! done) {
126 
127       char c = nextChar();
128 
129       // check for end-of-file
130       if (eof()) {
131         result = END;
132         break;
133       }
134 
135       switch (c) {
136         case '(':
137           result = LPAREN;
138           done = true;
139           addLexeme(c);
140           break;
141 
142         case ')':
143           result = RPAREN;
144           done = true;
145           addLexeme(c);
146           break;
147 
148         case '-':
149           result = MINUS;
150           done = true;
151           addLexeme(c);
152           break;
153 
154         case '*':
155           result = MULT;
156           done = true;
157           addLexeme(c);
158           break;
159 
160         case '/':
161           result = DIV;
162           done = true;
163           addLexeme(c);
164           break;
165 
166         case '+':
167           result = PLUS;
168           done = true;
169           addLexeme(c);
170           break;
171 
172         case '~':
173           result = TILDE;
174           done = true;
175           addLexeme(c);
176           break;
177 
178         case '=': {
179             // make sure the next character is '=' (to form token '==')
180           char peek = nextChar();
181           if (peek == '=') {
182             addLexeme(c);
183             addLexeme(peek);
184             result = EQUAL;
185           }
186           else {
187             pushBack(peek);
188             result = ERROR;
189           }
190           done = true;
191           break;
192         }
193 
194         case '!': {
195             // make sure the next character is '=' (to form token '!=')
196           char peek = nextChar();
197           if (peek == '=') {
198             addLexeme(c);
199             addLexeme(peek);
200             result = NOT_EQUAL;
201           }
202           else
203             result = ERROR;
204           done = true;
205           break;
206         }
207 
208         case '<': {
209           char peek = nextChar();
210           addLexeme(c);
211             // check for trailing '=' (to form token '<=')
212           if (peek == '=') {
213             addLexeme(peek);
214             result = LESS_EQUAL;
215           }
216           else {
217               // trailing '=' not found - push back the character
218             pushBack(peek);
219             result = LESS;
220           }
221           done = true;
222           break;
223         }
224 
225         case '>': {
226           char peek = nextChar();
227           addLexeme(c);
228             // check for trailing '=' (to form token '>=')
229           if (peek == '=') {
230             addLexeme(peek);
231             result = GREATER_EQUAL;
232           }
233           else {
234               // trailing '=' not found - push back the character
235             pushBack(peek);
236             result = GREATER;
237           }
238           done = true;
239           break;
240         }
241 
242         case '\'': {
243             // quoted string
244 
245           result = STRING_LIT;
246 
247             // search until we find the matching quote
248           while ((c = nextChar()) != '\'') {
249             if (c == '\n') {
250               result = ERROR;
251               break;
252             }
253 
254               // allow characters to be escaped (e.g. the quote)
255             if (c == '\\')
256               c = nextChar(); // skip the escape char
257 
258               // if we encounter EOF before the matching quote, it's an error
259             if (eof()) {
260               result = ERROR;
261               break;
262             }
263 
264             addLexeme(c);
265           } // while
266 
267           done = true;
268           break;
269         }
270 
271         case '0':
272         case '1':
273         case '2':
274         case '3':
275         case '4':
276         case '5':
277         case '6':
278         case '7':
279         case '8':
280         case '9': {
281             // deal with a number
282 
283           boolean seenExp = false;
284           boolean seenPeriod = false;
285           boolean formatError = false;
286           char lastChar = 0;
287 
288           while (! done) {
289 
290             switch (c) {
291               case '.':
292                   // it's an error if we've already seen a '.'
293                 if (seenPeriod) {
294                   formatError = true;
295                   done = true;
296                 }
297                 else {
298                   addLexeme(c);
299                   seenPeriod = true;
300                 }
301                 break;
302 
303               case 'E':
304               case 'e':
305                   // it's an error if we've already seen a 'E' or if
306                   // the previous character was not a digit
307                 if (seenExp || ! Character.isDigit(lastChar)) {
308                   formatError = true;
309                   done = true;
310                 }
311                 else {
312                   seenExp = true;
313                   addLexeme(c);
314                   c = nextChar();
315                     // check for +/- on exponent
316                   if (c == '+' || c == '-')
317                     addLexeme(c);
318                   else
319                     pushBack(c);
320                 }
321                 break;
322 
323               case '0':
324               case '1':
325               case '2':
326               case '3':
327               case '4':
328               case '5':
329               case '6':
330               case '7':
331               case '8':
332               case '9':
333                 addLexeme(c);
334                 break;
335 
336               default:
337                   // anything else terminates the number
338                 pushBack(c);
339                 done = true;
340                 break;
341             } // switch (c)
342 
343             if (! done) {
344               lastChar = c; // remember the last character
345               c = nextChar();
346               if (eof())
347                 done = true;
348             }
349           } // while (! done)
350 
351             // if there were no errors, then decide what kind of
352             // number we've found
353           if (! formatError) {
354               // if the last character seen is not a digit, it's an error
355             if (! Character.isDigit(lastChar))
356               result = ERROR;
357             else
358               result = NUMBER_LIT;
359           }
360           break;
361         }
362 
363         case ' ':
364         case '\t':
365         case '\n':
366           continue;  // skip whitespace
367 
368       } // switch (c)
369 
370         // if we're still not done, then we must have a string, either
371         // a literal or an identifier
372       if (! done) {
373           // if the character isn't compatible with the beginning of
374           // a literal or identifier, it's an error
375         if (c != '_' && ! Character.isLetter(c)) {
376           result = ERROR;
377           done = true;
378         }
379         else {
380 
381             // deal with string literal or identifier
382 
383           while (isIdent(c) && ! eof()) {
384             addLexeme(c);
385             c = nextChar();
386           }
387 
388           if (! eof())
389             pushBack(c);
390 
391             // see if the lexeme is a literal
392           String   lexeme = getLexeme();
393           Integer   val = (Integer  )m_literals.get(lexeme);
394 
395             // if we didn't find a literal, then it must be an identifier
396           if (val == null)
397             result = IDENT;
398           else
399             result = val.intValue();
400 
401           done = true;
402         }
403       }
404 
405     } // while (! done)
406 
407     m_token = result;
408   }
409 
410 
411   /**
412    * Returns the current token
413    */
414   public int getToken()
415   {
416     return m_token;
417   }
418 
419 
420   /**
421    * Returns the current lexeme
422    */
423   public String   getLexeme()
424   {
425     if (m_lexeme == null)
426       m_lexeme = m_buffer.toString();
427 
428     return m_lexeme;
429   }
430 
431 
432   /**
433    * Returns the current position of the analyzer
434    */
435   public int getPosition()
436   {
437     return m_pos;
438   }
439 
440 
441   protected boolean eof()
442   {
443     return m_eof;
444   }
445 
446 
447   protected char nextChar()
448   {
449     char result = 0;
450 
451     if (m_pos < m_input.length()) {
452       result = m_input.charAt(m_pos);
453       m_pos++;
454     }
455     else
456       m_eof = true;
457 
458     return result;
459   }
460 
461 
462   protected void pushBack(char c)
463   {
464     if (c != 0)
465       m_pos--;
466   }
467 
468 
469   protected boolean isIdent(char c)
470   {
471     return (Character.isLetter(c) || Character.isDigit(c) || (c == '_'));
472   }
473 
474 
475   protected void clearLexeme()
476   {
477     m_lexeme = null;
478     m_buffer.setLength(0);
479   }
480 
481 
482   protected void addLexeme(char c)
483   {
484     m_buffer.append(c);
485   }
486 
487   //**************** comment out this line to enable main()
488 
489   public static void main(String  [] args)
490   {
491     if (args.length < 1) {
492       System.err.println("Usage: Lex expr");
493       System.exit(1);
494     }
495 
496     Lex lex = new Lex(new StringReader(args[0]));
497     int token = lex.getToken();
498     while (token != Lex.END && token != Lex.ERROR) {
499       System.out.println("Token = '" + lex.getLexeme() + "' (" +
500         tokenName(token) + ")");
501       lex.nextToken();
502       token = lex.getToken();
503     }
504     System.out.println("Token = " + tokenName(token));
505   }
506 
507 
508   protected static String   tokenName(int token)
509   {
510     String   result;
511 
512     switch (token) {
513       case ERROR:
514         result = "ERROR";
515         break;
516       case END:
517         result = "END";
518         break;
519       case LPAREN:
520         result = "LPAREN";
521         break;
522       case RPAREN:
523         result = "RPAREN";
524         break;
525       case EXIST:
526         result = "EXIST";
527         break;
528       case MINUS:
529         result = "MINUS";
530         break;
531       case NOT:
532         result = "NOT";
533         break;
534       case MULT:
535         result = "MULT";
536         break;
537       case DIV:
538         result = "DIV";
539         break;
540       case PLUS:
541         result = "PLUS";
542         break;
543       case TILDE:
544         result = "TILDE";
545         break;
546       case IN:
547         result = "IN";
548         break;
549       case EQUAL:
550         result = "EQUAL";
551         break;
552       case NOT_EQUAL:
553         result = "NOT_EQUAL";
554         break;
555       case LESS:
556         result = "LESS";
557         break;
558       case LESS_EQUAL:
559         result = "LESS_EQUAL";
560         break;
561       case GREATER:
562         result = "GREATER";
563         break;
564       case GREATER_EQUAL:
565         result = "GREATER_EQUAL";
566         break;
567       case AND:
568         result = "AND";
569         break;
570       case OR:
571         result = "OR";
572         break;
573       case IDENT:
574         result = "IDENT";
575         break;
576       case TRUE_LIT:
577         result = "TRUE_LIT";
578         break;
579       case FALSE_LIT:
580         result = "FALSE_LIT";
581         break;
582       case STRING_LIT:
583         result = "STRING_LIT";
584         break;
585       case NUMBER_LIT:
586         result = "NUMBER_LIT";
587         break;
588       case PREF_MIN:
589         result = "PREF_MIN";
590         break;
591       case PREF_MAX:
592         result = "PREF_MAX";
593         break;
594       case PREF_WITH:
595         result = "PREF_WITH";
596         break;
597       case PREF_RANDOM:
598         result = "PREF_RANDOM";
599         break;
600       case PREF_FIRST:
601         result = "PREF_FIRST";
602         break;
603       default:
604         result = "<unknown>";
605     }
606 
607     return result;
608   }
609 
610   //*********** comment out this line to enable main() */
611 }
612 
613 
614 
615 
616 
617 
618 
619 
620 
621 
622 
623 
624 
625 
626 
627 
628
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags