LispReader


1   package gnu.kawa.lispexpr;
2   import gnu.text.*;
3   import gnu.mapping.*;
4   import gnu.lists.*;
5   import gnu.math.*;
6   import gnu.expr.*;
7   
8   /** A Lexer for reading S-expressions in generic Lisp-like syntax.
9    * This class may have outlived its usefulness: It's mostly just a
10   * wrapper around a LineBufferedReader plus a helper token-buffer.
11   * The functionality should be moved to ReadTable, though it is
12   * unclear what to do about the tokenBuffer.
13   */
14  
15  public class LispReader extends Lexer
16  {
17    public LispReader(LineBufferedReader port)
18    {
19      super(port);
20    }
21  
22    public LispReader(LineBufferedReader port, SourceMessages messages)
23    {
24      super(port, messages);
25    }
26  
27    /** Resolve a unit name, if possible.
28     * Returns null if the unit name is unknown. */
29    public static Object   lookupUnit (String   name)
30    {
31      name = (name + "$unit").intern();
32      try
33        {
34      return Environment.getCurrent().getChecked(name);
35        }
36      catch (UnboundLocationException ex)
37        {
38      return name;
39        }
40    }
41  
42    /** Read a #|...|#-style comment (which may contain other nested comments).
43      * Assumes the initial "#|" has already been read.
44      */
45    final public void readNestedComment (char c1, char c2)
46         throws java.io.IOException  , SyntaxException
47    {
48      int commentNesting = 1;
49      int startLine = port.getLineNumber();
50      int startColumn = port.getColumnNumber();
51      do
52        {
53      int c = read ();
54      if (c == '|')
55        {
56          c = read();
57          if (c == c1)
58            commentNesting--;
59        }
60      else if (c == c1)
61        {
62          c = read();
63          if (c == c2)
64            commentNesting++;
65        }
66      if (c < 0)
67        {
68              eofError("unexpected end-of-file in " + c1 + c2
69                       + " comment starting here",
70                       startLine + 1, startColumn - 1);
71          return;
72        }
73        } while (commentNesting > 0);
74    }
75  
76    /** Get specification of how symbols should be case-folded.
77      * @return Either 'P' (means preserve case), 'U' (upcase),
78      * 'D' (downcase, or 'I' (invert case).
79      */
80    static char getReadCase()
81    {
82      char read_case;
83      try
84        {
85      String   read_case_string
86        = Environment.getCurrent().get("symbol-read-case", "P").toString();
87      read_case = read_case_string.charAt(0);
88      if (read_case == 'P') ;
89      else if (read_case == 'u')
90        read_case = 'U';
91      else if (read_case == 'd' || read_case == 'l' || read_case == 'L')
92        read_case = 'D';
93      else if (read_case == 'i')
94        read_case = 'I';
95        }
96      catch (Exception   ex)
97        {
98      read_case = 'P';
99        }
100     return read_case;
101   }
102 
103   public Object   readValues (int ch,  ReadTable rtable)
104       throws java.io.IOException  , SyntaxException
105   {
106     return readValues(ch, rtable.lookup(ch), rtable);
107   }
108 
109   /** May return zero or multiple values. */
110   public Object   readValues (int ch, ReadTableEntry entry, ReadTable rtable)
111       throws java.io.IOException  , SyntaxException
112   {
113     // Step numbers refer to steps in section 2.2 of the HyperSpec.
114     // Step 1:
115     int startPos = tokenBufferLength;
116 
117     if (entry == null)
118       {
119     // Step 2:
120     String   err = ("invalid character #\\"+((char) ch));  // FIXME
121     if (interactive) fatal(err);
122     else error(err);
123     return Values.empty;
124       }
125     int kind = entry.getKind();
126     seenEscapes = false;
127     switch (kind)
128       {
129       case ReadTable.WHITESPACE:
130     // Step 3:
131     return Values.empty;
132       case ReadTable.TERMINATING_MACRO:
133       case ReadTable.NON_TERMINATING_MACRO:
134     Object   value = entry.read(this, ch, -1);
135     return value;
136       case ReadTable.CONSTITUENT:
137         if (ch == rtable.postfixLookupOperator)
138           { // Force an initial ':' to be treated as a CONSTITUENT.
139             tokenBufferAppend(ch);
140             ch = read();
141           }
142       case ReadTable.SINGLE_ESCAPE: // Step 5:
143       case ReadTable.MULTIPLE_ESCAPE: // Step 6:
144       default:  // 
145     break;
146       }
147 
148     readToken(ch, getReadCase(), rtable);
149     int endPos = tokenBufferLength;
150     if (seenEscapes)
151       return returnSymbol(startPos, endPos, rtable);
152     else
153       return handleToken(startPos, endPos, rtable);
154   }
155 
156   public static final char TOKEN_ESCAPE_CHAR = '\uffff';
157 
158   /** If true, then tokenbuffer contains escaped characters.
159    * These are prefixed (in the buffer) by TOKEN_ESCAPE_CHAR.
160    */
161   protected boolean seenEscapes;
162 
163   /** True if ":IDENTIFIER" should be treated as a keyword. */
164   protected boolean initialColonIsKeyword = true;
165 
166   /** True if "IDENTIFIER:" should be treated as a keyword. */
167   protected boolean finalColonIsKeyword = true;
168 
169   void readToken(int ch, char readCase, ReadTable rtable)
170       throws java.io.IOException  , SyntaxException
171   {
172     boolean inEscapes = false;
173     for (;; ch = read())
174       {
175     if (ch < 0)
176       {
177         if (inEscapes)
178           eofError("unexpected EOF between escapes");
179         else
180           break;
181       }
182     ReadTableEntry entry = rtable.lookup(ch);
183     if (entry == null)
184       {
185         if (inEscapes)
186           {
187         tokenBufferAppend(TOKEN_ESCAPE_CHAR);
188         tokenBufferAppend(ch);
189         continue;
190           }
191         unread(ch);
192         break;
193       }
194     int kind = entry.getKind();
195         if (ch == rtable.postfixLookupOperator && ! inEscapes
196             && validPostfixLookupStart(rtable))
197           kind = ReadTable.TERMINATING_MACRO;
198                   
199     if (kind == ReadTable.SINGLE_ESCAPE)
200       {
201         ch = read();
202         if (ch < 0)
203           eofError("unexpected EOF after single escape");
204         tokenBufferAppend(TOKEN_ESCAPE_CHAR);
205         tokenBufferAppend(ch);
206         seenEscapes = true;
207         continue;
208       }
209     if (kind == ReadTable.MULTIPLE_ESCAPE)
210       {
211         inEscapes = ! inEscapes;
212         continue;
213       }
214     if (inEscapes)
215       {
216         // Step 9:
217         tokenBufferAppend(TOKEN_ESCAPE_CHAR);
218         tokenBufferAppend(ch);
219       }
220     else
221       {
222         // Step 8:
223         switch (kind)
224           {
225           case ReadTable.CONSTITUENT:
226         // ... fall through ...
227           case ReadTable.NON_TERMINATING_MACRO:
228         if (readCase == 'U'
229             || (readCase == 'I' && Character.isLowerCase((char) ch)))
230           ch = Character.toUpperCase((char) ch);
231         else if (readCase == 'D'
232              || (readCase == 'I'
233                  && Character.isUpperCase((char) ch)))
234           ch = Character.toLowerCase ((char) ch);
235         tokenBufferAppend(ch);
236         continue;
237           case ReadTable.MULTIPLE_ESCAPE:
238         inEscapes = true;
239         seenEscapes = true;
240         continue;
241           case ReadTable.TERMINATING_MACRO:
242         unread(ch);
243         return;
244           case ReadTable.WHITESPACE:
245         // if (readPreservingWhitespace) FIXME
246         unread(ch);
247         return;
248           }
249       }
250       }
251   }
252 
253   public Object   readObject ()
254       throws java.io.IOException  , SyntaxException
255   {
256     char saveReadState = ((InPort) port).readState;
257     int startPos = tokenBufferLength;
258     ((InPort) port).readState = ' ';
259     try
260       {
261         ReadTable rtable = ReadTable.getCurrent();
262     for (;;)
263       {
264         int line = port.getLineNumber();
265         int column = port.getColumnNumber();
266         int ch = port.read();
267         if (ch < 0)
268           return Sequence.eofValue; // FIXME
269             Object   value = readValues(ch, rtable);
270         if (value == Values.empty)
271           continue;
272         return handlePostfix(value, rtable, line, column);
273       }
274       }
275     finally
276       {
277     tokenBufferLength = startPos;
278     ((InPort) port).readState = saveReadState;
279       }
280   }
281 
282   protected boolean validPostfixLookupStart (ReadTable rtable)
283       throws java.io.IOException  
284   {
285     int ch = port.peek();
286     ReadTableEntry entry;
287     if (ch < 0 || ch == ':' || (entry = rtable.lookup(ch)) == null
288         || ch == rtable.postfixLookupOperator)
289       return false;
290     int kind = entry.getKind();
291     return kind == ReadTable.CONSTITUENT
292       || kind == ReadTable.NON_TERMINATING_MACRO
293       || kind == ReadTable.MULTIPLE_ESCAPE
294       || kind == ReadTable.SINGLE_ESCAPE;
295   }
296 
297   Object   handlePostfix (Object   value, ReadTable rtable, int line, int column)
298       throws java.io.IOException  , SyntaxException
299   {
300     if (value == QuoteExp.voidExp)
301       value = Values.empty;
302     for (;;)
303       {
304         int ch = port.peek();
305         if (ch < 0 || ch != rtable.postfixLookupOperator)
306           break;
307         // A kludge to map PreOpWord to ($lookup$ Pre 'Word).
308         port.read();
309         if (! validPostfixLookupStart(rtable))
310           {
311             unread();
312             break;
313           }
314         ch = port.read();
315         Object   rightOperand = readValues(ch, rtable.lookup(ch), rtable);
316         value = LList.list2(value,
317                             LList.list2(LispLanguage.quote_sym, rightOperand));
318         value = PairWithPosition.make(LispLanguage.lookup_sym, value,
319                                       port.getName(), line+1, column+1);
320       }
321     return value;
322   }
323 
324   private boolean isPotentialNumber (char[] buffer, int start, int end)
325   {
326     int sawDigits = 0;
327     for (int i = start;  i < end;  i++)
328       {
329     char ch = buffer[i];
330     if (Character.isDigit(ch))
331       sawDigits++;
332     else if (ch == '-' || ch == '+')
333       {
334         if (i + 1 == end)
335           return false;
336       }
337     else if (ch == '#')
338       return true;
339     else if (Character.isLetter(ch) || ch == '/'
340          || ch == '_' || ch == '^')
341       {
342         // CommonLisp defines _123 (and ^123) as a "potential number";
343         // most implementations seem to define it as a symbol.
344         // Scheme does defines it as a symbol.
345         if (i == start)
346           return false;
347       }
348     else if (ch != '.')
349       return false;
350       }
351     return sawDigits > 0;
352   }
353 
354   static final int SCM_COMPLEX = 1;
355   public static final int SCM_NUMBERS = SCM_COMPLEX;
356 
357   /** Parse a number.
358    * @param buffer contains the characters of the number
359    * @param start startinging index of the number in the buffer
360    * @param count number of characters in buffer to use
361    * @param exactness either 'i' or 'I' force an inexact result,
362    *   either 'e' or 'E' force an exact result,
363    *   '\0' yields an inact or inexact depending on the form of the literal,
364    *   while ' ' is like '\0' but does not allow more exactness specifiers.
365    * @param radix the number base to use or 0 if unspecified
366    * @return the number if a valid number; null or a String-valued error
367    *   message if if there was some error parsing the number.
368    */
369   public static Object   parseNumber(char[] buffer, int start, int count,
370                    char exactness, int radix, int flags)
371   {
372     int end = start + count;
373     int pos = start;
374     if (pos >= end)
375       return "no digits";
376     char ch = buffer[pos++];
377     while (ch == '#')
378       {
379     if (pos >= end)
380       return "no digits";
381     ch = buffer[pos++];
382     switch (ch)
383       {
384       case 'b':  case 'B':
385         if (radix != 0)
386           return "duplicate radix specifier";
387         radix = 2;
388         break;
389       case 'o':  case 'O':
390         if (radix != 0)
391           return "duplicate radix specifier";
392         radix = 8;
393         break;
394       case 'd':  case 'D':
395         if (radix != 0)
396           return "duplicate radix specifier";
397         radix = 10;
398         break;
399       case 'x':  case 'X':
400         if (radix != 0)
401           return "duplicate radix specifier";
402         radix = 16;
403         break;
404       case 'e':  case 'E':
405       case 'i':  case 'I':
406         if (exactness != '\0')
407           {
408         if (exactness == ' ')
409           return "non-prefix exactness specifier";
410         else
411           return "duplicate exactness specifier";
412           }
413         exactness = ch;
414         break;
415       default:
416         int value = 0;
417         for (;;)
418           {
419         int dig = Character.digit(ch, 10);
420         if (dig < 0)
421           break;
422         value = 10 * value + dig;
423         if (pos >= end)
424           return "missing letter after '#'";
425         ch = buffer[pos++];
426           }
427         if (ch == 'R' || ch == 'r')
428           {
429         if (radix != 0)
430           return "duplicate radix specifier";
431         if (value < 2 || value > 35)
432           return "invalid radix specifier";
433         radix = value;
434         break;
435           }
436         return "unknown modifier '#" + ch + '\'';
437       }
438     if (pos >= end)
439       return "no digits";
440     ch = buffer[pos++];
441       }
442     if (exactness == '\0')
443       exactness = ' ';
444     if (radix == 0)
445       {
446     for (int i = count;  ; )
447       {
448         if (--i < 0)
449           {
450         // FIXME - should get *read-base* in CommonLisp:
451         // radix = *read_base*;
452         radix = 10;
453         break;
454           }
455         if (buffer[start+i] == '.')
456           {
457         radix = 10;
458         break;
459           }
460       }
461       }
462 
463     boolean negative = ch == '-';
464     boolean numeratorNegative = negative;
465     if (ch == '-' || ch == '+')
466       {
467     if (pos >= end)
468       return "no digits following sign";
469     ch = buffer[pos++];
470       }
471 
472     // Special case for '+i' and '-i'.
473     if ((ch == 'i' || ch == 'I') && pos == end && start == pos - 2
474     && (flags & SCM_COMPLEX) != 0)
475       {
476     char sign = buffer[start];
477     if (sign != '+' && sign != '-')
478       return "no digits";
479     if (exactness == 'i' || exactness == 'I')
480       return new DComplex(0, negative ? -1 : 1);
481     return negative ? Complex.imMinusOne() : Complex.imOne();
482       }
483 
484     int realStart = pos - 1;
485     boolean hash_seen = false;
486     char exp_seen = '\000';
487     int digits_start = -1;
488     int decimal_point = -1;
489     boolean copy_needed = false;
490     boolean underscore_seen = false;
491     IntNum numerator = null;
492     long lvalue = 0;
493   loop:
494     for (;;)
495       {
496     int digit = Character.digit(ch, radix);
497     if (digit >= 0)
498       {
499         if (hash_seen && decimal_point < 0)
500           return "digit after '#' in number";
501         if (digits_start < 0)
502           digits_start = pos - 1;
503         lvalue = radix * lvalue + digit;
504       }
505     else
506       {
507         switch (ch)
508           {
509         /*
510           case '_':
511         underscore_seen = true;
512         break;
513         */
514         /*
515           case '#':
516         if (radix != 10)
517           return "'#' in non-decimal number";
518         if (digits_start < 0)
519           return "'#' with no preceeding digits in number";
520         hash_seen = true;
521         break;
522         */
523           case '.':
524         if (decimal_point >= 0)
525           return "duplicate '.' in number";
526         if (radix != 10)
527           return "'.' in non-decimal number";
528         decimal_point = pos - 1;
529         break;
530           case 'e': case 's': case 'f': case 'd': case 'l':
531           case 'E': case 'S': case 'F': case 'D': case 'L':
532         if (pos == end || radix != 10)
533           {
534             pos--;
535             break loop;
536           }
537         char next = buffer[pos];
538         if (next == '+' || next == '-')
539           {
540             if (++ pos >= end
541             || Character.digit(buffer[pos], 10) < 0)
542               return "missing exponent digits";
543           }
544         else if (Character.digit(next, 10) < 0)
545           {
546             pos--;
547             break loop;
548           }
549         if (exp_seen != '\000')
550           return "duplicate exponent";
551         if (radix != 10)
552           return "exponent in non-decimal number";
553         if (digits_start < 0)
554           return "mantissa with no digits";
555         exp_seen = ch;
556         for (;;)
557           {
558             pos++;
559             if (pos >= end || Character.digit(buffer[pos], 10) < 0)
560               break loop;
561           }
562           case '/':
563         if (numerator != null)
564           return "multiple fraction symbol '/'";
565         if (digits_start < 0)
566           return "no digits before fraction symbol '/'";
567         if (exp_seen != '\000' || decimal_point >= 0)
568           return "fraction symbol '/' following exponent or '.'";
569         numerator = valueOf(buffer, digits_start, pos - digits_start,
570                     radix, negative, lvalue);
571         digits_start = -1;
572         lvalue = 0;
573         negative = false;
574         hash_seen = false;
575         underscore_seen = false;
576         break;
577           default:
578         pos--;
579         break loop;
580           }
581       }
582     if (pos == end)
583       break;
584     ch = buffer[pos++];
585       }
586 
587     if (digits_start < 0)
588       return "no digits";
589 
590     if (hash_seen || underscore_seen)
591       {
592     // FIXME make copy, removing '_' and replacing '#' by '0'.
593       }
594 
595     boolean inexact = (exactness == 'i' || exactness == 'I'
596                || (exactness == ' ' && hash_seen));
597     RealNum number = null;
598     if (exp_seen != '\000' || decimal_point >= 0)
599       {
600     if (digits_start > decimal_point && decimal_point >= 0)
601       digits_start = decimal_point;
602     if (numerator != null)
603       return "floating-point number after fraction symbol '/'";
604     String   str = new String  (buffer, digits_start, pos - digits_start);
605     double d = Convert.parseDouble(str);
606     number = new DFloNum(negative ? - d : d);
607       }
608     else
609       {
610     IntNum iresult = valueOf(buffer, digits_start, pos - digits_start,
611                  radix, negative, lvalue);
612     if (numerator == null)
613       number = iresult;
614     else
615       {
616         // Check for zero denominator values: 0/0, n/0, and -n/0
617         // (i.e. NaN, Infinity, and -Infinity).
618         if (iresult.isZero ())
619           {
620         boolean numeratorZero = numerator.isZero();
621         if (inexact)
622           number =  new DFloNum ((numeratorZero ? Double.NaN
623                       : numeratorNegative ? Double.NEGATIVE_INFINITY
624                       : Double.POSITIVE_INFINITY));
625         else if (numeratorZero)
626           return "0/0 is undefined";
627         else
628           number = RatNum.make(numerator, iresult);
629           }
630         else
631           {
632         number = RatNum.make(numerator, iresult);
633           }
634       }
635     if (inexact && number.isExact())
636       // We want #i-0 or #i-0/1 to be -0.0, not 0.0.
637       number = new DFloNum(numeratorNegative && number.isZero() ? -0.0
638                    : number.doubleValue());
639       }
640 
641     if (exactness == 'e' || exactness == 'E')
642       number = number.toExact();
643 
644     if (pos < end)
645       {
646     ch = buffer[pos++];
647 
648     if (ch == '@')
649       { /* polar notation */
650         Object   angle = parseNumber(buffer, pos, end - pos,
651                        exactness, 10, flags);
652         if (angle instanceof String  )
653           return angle;
654         if (! (angle instanceof RealNum))
655           return "invalid complex polar constant";
656         RealNum rangle = (RealNum) angle;
657         /* r4rs requires 0@1.0 to be inexact zero, even if (make-polar
658          * 0 1.0) is exact zero, so check for this case.  */
659         if (number.isZero () && !rangle.isExact ())
660           return new DFloNum (0.0);
661 
662         return Complex.polar (number, rangle);
663       }
664 
665     if (ch == '-' || ch == '+')
666       {
667         pos--;
668         Object   imag = parseNumber(buffer, pos, end - pos,
669                       exactness, 10, flags);
670         if (imag instanceof String  )
671           return imag;
672         if (! (imag instanceof Complex))
673           return "invalid numeric constant ("+imag+")";
674         Complex cimag = (Complex) imag;
675         RealNum re = cimag.re();
676         if (! re.isZero())
677           return "invalid numeric constant";
678         return Complex.make(number, cimag.im());
679       }
680 
681     int lcount = 0;
682     for (;;)
683       {
684         if (! Character.isLetter(ch))
685           {
686         pos--;
687         break;
688           }
689         lcount++;
690         if (pos == end)
691           break;
692         ch = buffer[pos++];
693       }
694 
695     if (lcount == 1)
696       {
697         char prev = buffer[pos-1];
698         if (prev == 'i' || prev == 'I')
699           {
700         if (pos < end)
701           return "junk after imaginary suffix 'i'";
702         return Complex.make(IntNum.zero (), number);
703           }
704       }
705     if (lcount > 0)
706       {
707         Object   unit = null;
708         for (;;)
709           {
710         String   word = new String  (buffer, pos - lcount, lcount);
711         Object   u = lookupUnit(word);
712 
713         int power = 1;
714         if (pos < end)
715           {
716             ch = buffer[pos];
717             if (ch == '^' && ++pos < end)
718               ch = buffer[pos];
719             boolean neg = ch == '-';
720             if ((ch == '-' || ch == '+') && ++pos < end)
721               ch = buffer[pos];
722             power = -1;
723             for (;;)
724               {
725             int d = Character.digit(ch, 10);
726             if (d < 0)
727               {
728                 if (power < 0)
729                   return "junk after unit name";
730                 break;
731               }
732             power = power < 0 ? d  : 10 * power + d;
733             if (++pos == end)
734               break;
735             if (power > 1000000)
736               return "unit power too large";
737             ch = buffer[pos];
738               }
739             if (neg) power = -power;
740           }
741 
742         // "expt" and "*" are too open to name clashes. FIXME.
743         if (power != 1)
744           {
745             if (u instanceof Unit)
746               u = Unit.pow((Unit) u, power);
747             else
748               u = LList.list3("expt", u, IntNum.make(power));
749           }
750         if (unit == null)
751           unit = u;
752         else if (u instanceof Unit && unit instanceof Unit)
753           unit = Unit.times((Unit) unit, (Unit) u);
754         else
755           unit = LList.list3("*", unit, u);
756         if (pos >= end)
757           break;
758         ch = buffer[pos++];
759         if (ch == '*')
760           {
761             if (pos == end)
762               return "end of token after '*'";
763             ch = buffer[pos++];
764           }
765         lcount = 0;
766         for (;;)
767           {
768             if (! Character.isLetter(ch))
769               {
770             pos--;
771             break;
772               }
773             lcount++;
774             if (pos == end)
775               break;
776             ch = buffer[pos++];
777           }
778         if (lcount == 0)
779           return "excess junk after unit";
780           }
781 
782         if (unit == null)
783           return "expected unit";
784         else if (unit instanceof Unit)
785           return Quantity.make(number, (Unit) unit);
786         else
787           return LList.list3("*", number, unit);
788       }
789     else
790       return "excess junk after number";
791     
792       }
793     return number;
794   }
795 
796   private static IntNum valueOf (char[] buffer, int digits_start,
797                  int number_of_digits,
798                  int radix, boolean negative,
799                  long lvalue)
800   {
801     // It turns out that if number_of_digits + radix <= 28
802     // then the value will fit in a long without overflow,
803     // so we can use the value calculated in lvalue.
804     if (number_of_digits + radix <= 28)
805       return IntNum.make(negative ? - lvalue : lvalue);
806     else
807       return IntNum.valueOf(buffer, digits_start, number_of_digits,
808                 radix, negative);
809   }
810 
811   protected Object   returnSymbol(int startPos, int endPos, ReadTable rtable)
812   {
813     char readCase = getReadCase();
814     if (readCase == 'I')
815       {
816     int upperCount = 0;
817     int lowerCount = 0;
818     for (int i = startPos;  i < endPos;  i++)
819       {
820         char ch = tokenBuffer[i];
821         if (ch == TOKEN_ESCAPE_CHAR)
822           i++;
823         else if (Character.isLowerCase(ch))
824           lowerCount++;
825         else if (Character.isUpperCase(ch))
826           upperCount++;
827       }
828     if (lowerCount == 0)
829       readCase = 'D';
830     else if (upperCount == 0)
831       readCase = 'U';
832     else
833       readCase = 'P';
834       }
835 
836     int packageMarker = -1;
837     int j = startPos;
838     for (int i = startPos;  i < endPos;  i++)
839       {
840     char ch = tokenBuffer[i];
841     if (ch == TOKEN_ESCAPE_CHAR)
842       {
843         if (++ i < endPos)
844           tokenBuffer[j++] = tokenBuffer[i];
845         continue;
846       }
847     if (ch == ':')
848       packageMarker = packageMarker >= 0 ? -1 : j;
849     else if (readCase == 'U')
850       ch = Character.toUpperCase(ch);
851     else if (readCase == 'D')
852       ch = Character.toLowerCase(ch);
853     tokenBuffer[j++] = ch;
854       }
855     endPos = j;
856 
857     int len = endPos - startPos;
858 
859     if (initialColonIsKeyword && packageMarker == startPos && len > 1)
860       {
861     startPos++;
862     String   str = new String  (tokenBuffer, startPos, endPos-startPos);
863     return Keyword.make(str.intern());
864     }
865     if (finalColonIsKeyword && packageMarker == endPos - 1 && len > 1)
866       {
867     String   str = new String  (tokenBuffer, startPos, len - 1);
868     return Keyword.make(str.intern());
869       }
870     return rtable.makeSymbol(new String  (tokenBuffer, startPos, len));
871   }
872 
873   /** Classify and return a token in tokenBuffer from startPos to endPos. */
874   public Object   handleToken(int startPos, int endPos,  ReadTable rtable)
875   {
876     Object   value = parseNumber(tokenBuffer, startPos, endPos - startPos,
877                    '\0', 0, SCM_NUMBERS);
878     if (value != null && ! (value instanceof String  ))
879       return value;
880     if (isPotentialNumber(tokenBuffer, startPos, endPos))
881       {
882     error(value == null ? "not a valid number"
883           : "not a valid number: " + value);
884     return IntNum.zero();
885       }
886     return returnSymbol(startPos, endPos, rtable);
887   }
888 
889   /** Reads a C-style String escape sequence.
890    * Assume '\\' has already been read.
891    * Return the converted character, or -1 on EOF, or -2 to ignore. */
892   public int readEscape()
893     throws java.io.IOException  , SyntaxException 
894   {
895     int c = read();
896     if (c < 0)
897       {
898     eofError("unexpected EOF in character literal");
899     return -1;
900       }
901     return readEscape(c);
902   }
903 
904   public final int readEscape(int c)
905     throws java.io.IOException  , SyntaxException 
906   {
907     switch ((char) c)
908       {
909       case 'a':  c =  7;  break;  // alarm/bell
910       case 'b':  c =  8;  break;  // backspace
911       case 't':  c =  9;  break;  // tab
912       case 'n':  c = 10;  break;  // newline
913       case 'v':  c = 11;  break;  // vertical tab
914       case 'f':  c = 12;  break;  // formfeed
915       case 'r':  c = 13;  break;  // carriage return
916       case 'e':  c = 27;  break;  // escape
917       case '\"': c = 34;  break;  // quote
918       case '\\': c = 92;  break;  // backslash
919       case ' ': // Skip to end of line, inclusive.
920     for (;;)
921       {
922         c = read();
923         if (c < 0)
924           {
925         eofError("unexpected EOF in character literal");
926         return -1;
927           }
928         if (c == '\n')
929           return -2;
930         if (c == '\r')
931           {
932         if (peek() == '\n')
933           skip();
934         return -2;
935           }
936         if (c != ' ' && c != '\t')
937           {
938         unread(c);
939         break;
940           }
941       }
942       case '\r':
943     if (peek() == '\n')
944       skip();
945     return -2;
946       case '\n':
947     return -2;
948       case 'M':
949     c = read();
950     if (c != '-')
951       {
952         error("Invalid escape character syntax");
953         return '?';
954       }
955     c = read();
956     if (c == '\\')
957       c = readEscape();
958     return c | 0200;
959       case 'C':
960     c = read();
961     if (c != '-')
962       {
963         error("Invalid escape character syntax");
964         return '?';
965       }
966     /* ... fall through ... */
967       case '^':
968     c = read();
969     if (c == '\\')
970       c = readEscape();
971     if (c == '?')
972       return 0177;
973     return c & (0200 | 037);
974       case '0':
975       case '1':
976       case '2':
977       case '3':
978       case '4':
979       case '5':
980       case '6':
981       case '7':
982     /* An octal escape, as in ANSI C.  */
983     c = c - '0';
984     for (int count = 0;  ++count < 3; )
985       {
986         int d = read();
987         int v = Character.digit((char) d, 8);
988         if (v >= 0)
989           c = (c << 3) + v;
990         else
991           {
992         if (d >= 0)
993           unread(d);
994         break;
995           }
996       }
997     break;
998       case 'u':
999     c = 0;
1000    for (int i = 4;  --i >= 0; )
1001      {
1002        int d = read ();
1003        if (d < 0)
1004          eofError("premature EOF in \\u escape");
1005        int v = Character.digit ((char) d, 16);
1006        if (v < 0)
1007          error("non-hex character following \\u");
1008        c = 16 * c + v;
1009      }
1010    break;
1011      case 'x':
1012    c = 0;
1013    /* A hex escape, as in ANSI C.  */
1014    for (;;)
1015      {
1016        int d = read();
1017        int v = Character.digit((char) d, 16);
1018        if (v >= 0)
1019          c = (c << 4) + v;
1020        else
1021          {
1022        if (d >= 0)
1023          unread(d);
1024        break;
1025          }
1026      }
1027    break;
1028      default:  break;
1029      }
1030    return c;
1031  }
1032
1033  public final Object   readObject (int c)
1034      throws java.io.IOException  , SyntaxException
1035  {
1036    unread(c);
1037    return readObject();
1038  }
1039
1040  /** Read a "command" - a top-level expression or declaration.
1041   * Return Sequence.eofValue of end of file. */
1042  public Object   readCommand ()
1043      throws java.io.IOException  , SyntaxException
1044  {
1045    return readObject();
1046  }
1047
1048  protected Object   makeNil ()
1049  {
1050    return LList.Empty;
1051  }
1052
1053  protected Object   makePair (Object   car, int line, int column)
1054  {
1055    return PairWithPosition.make(car, LList.Empty,
1056                                 port.getName(), line + 1, column + 1);
1057  }
1058
1059  public Object   makePair (Object   car, Object   cdr)
1060  {
1061    Object   pair = makePair(car, -1, -1);
1062    setCdr(pair, cdr);
1063    return pair;
1064  }
1065
1066  protected void setCdr (Object   pair, Object   cdr)
1067  {
1068    ((Pair) pair).cdr = cdr;
1069  }
1070
1071  /** Read a number from a LispReader
1072   * @param previous number of characters already pushed on tokenBuffer
1073   * @param reader LispReader to read from
1074   * @param radix base to use or -1 if unspecified
1075   */
1076  public static Object   readNumberWithRadix(int previous, LispReader reader, int radix)
1077    throws java.io.IOException  , SyntaxException
1078  {
1079    int startPos = reader.tokenBufferLength - previous;
1080    reader.readToken(reader.read(), 'P', ReadTable.getCurrent());
1081    int endPos = reader.tokenBufferLength;
1082    if (startPos == endPos)
1083      {
1084    reader.error("missing numeric token");
1085    return IntNum.zero();
1086      }
1087    Object   result = LispReader.parseNumber(reader.tokenBuffer, startPos,
1088                       endPos - startPos, '\0', radix, 0);
1089    if (result instanceof String  )
1090      {
1091    reader.error((String  ) result);
1092    return IntNum.zero();
1093      }
1094    else if (result == null)
1095      {
1096    reader.error("invalid numeric constant");
1097    return IntNum.zero();
1098      }
1099    else
1100      return result;
1101  }
1102
1103  public static Object   readCharacter (LispReader reader)
1104    throws java.io.IOException  , SyntaxException
1105  {
1106    int ch = reader.read();
1107    if (ch < 0)
1108      reader.eofError("unexpected EOF in character literal");
1109    int startPos = reader.tokenBufferLength;
1110    reader.tokenBufferAppend(ch);
1111    reader.readToken(reader.read(), 'D', ReadTable.getCurrent());
1112    int length = reader.tokenBufferLength - startPos;
1113    if (length == 1)
1114      return Char.make(reader.tokenBuffer[startPos]);
1115    String   name = new String  (reader.tokenBuffer, startPos, length);
1116    ch = Char.nameToChar(name);
1117    if (ch >= 0)
1118      return Char.make(ch);
1119    ch = Character.digit(reader.tokenBuffer[startPos], 8);
1120    if (ch >= 0)
1121      {
1122    int value = ch;
1123    for (int i = 1;  ;  i++)
1124      {
1125        if (i == length)
1126          return Char.make(value);
1127        ch = Character.digit(reader.tokenBuffer[startPos + i], 8);
1128        if (ch < 0)
1129          break;
1130        value = 8 * value + ch;
1131      }
1132      }
1133    reader.error("unknown character name: " + name);
1134    return Char.make('?');
1135  }
1136
1137  public static Object   readSpecial (LispReader reader)
1138    throws java.io.IOException  , SyntaxException
1139  {
1140    int ch = reader.read();
1141    if (ch < 0)
1142      reader.eofError("unexpected EOF in #! special form");
1143
1144    /* Handle Unix #!PROGRAM line at start of file. */
1145    if (ch == '/'
1146    && reader.getLineNumber() == 0
1147    && reader.getColumnNumber() == 3)
1148      {
1149    ReaderIgnoreRestOfLine.getInstance().read(reader, '#', 1);
1150    return Values.empty;
1151      }
1152
1153    int startPos = reader.tokenBufferLength;
1154    reader.tokenBufferAppend(ch);
1155    reader.readToken(reader.read(), 'D', ReadTable.getCurrent());
1156    int length = reader.tokenBufferLength - startPos;
1157    String   name = new String  (reader.tokenBuffer, startPos, length);
1158    if (name.equals("optional"))
1159      return Special.optional;
1160    if (name.equals("rest"))
1161      return Special.rest;
1162    if (name.equals("key"))
1163      return Special.key;
1164    if (name.equals("eof"))
1165      return Special.eof;
1166    if (name.equals("void"))
1167      //return Values.empty;
1168      return QuoteExp.voidExp;
1169    if (name.equals("default"))
1170      return Special.dfault;
1171    if (name.equals("undefined"))
1172      return Special.undefined;
1173    if (name.equals("null"))
1174      return null;
1175    reader.error("unknown named constant #!"+name);
1176    return null;
1177  }
1178
1179  public static SimpleVector
1180  readSimpleVector(LispReader reader, char kind)
1181    throws java.io.IOException  , SyntaxException
1182  {
1183    int size = 0;
1184    int ch;
1185    for (;;)
1186      {
1187    ch = reader.read();
1188    if (ch < 0)
1189      reader.eofError("unexpected EOF reading uniform vector");
1190    int digit = Character.digit((char) ch, 10);
1191    if (digit < 0)
1192      break;
1193    size = size * 10 + digit;
1194      }
1195    if (! (size == 8 || size == 16 || size == 32 || size == 64)
1196        || (kind == 'F' && size < 32)
1197        || ch != '(')
1198      {
1199        reader.error("invalid uniform vector syntax");
1200        return null;
1201      }
1202    Object   list = ReaderParens.readList(reader, '(', -1, ')');
1203    int len = LList.listLength(list, false);
1204    if (len < 0)
1205      {
1206        reader.error("invalid elements in uniform vector syntax");
1207        return null;
1208      }
1209    Sequence q = (Sequence) list;
1210    switch (kind)
1211      {
1212      case 'F':
1213        switch (size)
1214          {
1215          case 32:  return new F32Vector(q);
1216          case 64:  return new F64Vector(q);
1217          }
1218      case 'S':
1219        switch (size)
1220          {
1221          case  8:  return new S8Vector(q);
1222          case 16:  return new S16Vector(q);
1223          case 32:  return new S32Vector(q);
1224          case 64:  return new S64Vector(q);
1225          }
1226      case 'U':
1227        switch (size)
1228          {
1229          case  8:  return new U8Vector(q);
1230          case 16:  return new U16Vector(q);
1231          case 32:  return new U32Vector(q);
1232          case 64:  return new U64Vector(q);
1233          }
1234      }
1235    return null;
1236  }
1237}
1238
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags