altos/lisp: Add floats
[fw/altos] / src / lisp / ao_lisp_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_lisp.h"
16 #include "ao_lisp_read.h"
17 #include <math.h>
18
19 static const uint16_t   lex_classes[128] = {
20         IGNORE,         /* ^@ */
21         IGNORE,         /* ^A */
22         IGNORE,         /* ^B */
23         IGNORE,         /* ^C */
24         IGNORE,         /* ^D */
25         IGNORE,         /* ^E */
26         IGNORE,         /* ^F */
27         IGNORE,         /* ^G */
28         IGNORE,         /* ^H */
29         WHITE,          /* ^I */
30         WHITE,          /* ^J */
31         WHITE,          /* ^K */
32         WHITE,          /* ^L */
33         WHITE,          /* ^M */
34         IGNORE,         /* ^N */
35         IGNORE,         /* ^O */
36         IGNORE,         /* ^P */
37         IGNORE,         /* ^Q */
38         IGNORE,         /* ^R */
39         IGNORE,         /* ^S */
40         IGNORE,         /* ^T */
41         IGNORE,         /* ^U */
42         IGNORE,         /* ^V */
43         IGNORE,         /* ^W */
44         IGNORE,         /* ^X */
45         IGNORE,         /* ^Y */
46         IGNORE,         /* ^Z */
47         IGNORE,         /* ^[ */
48         IGNORE,         /* ^\ */
49         IGNORE,         /* ^] */
50         IGNORE,         /* ^^ */
51         IGNORE,         /* ^_ */
52         PRINTABLE|WHITE,        /*    */
53         PRINTABLE,              /* ! */
54         PRINTABLE|STRINGC,      /* " */
55         PRINTABLE|POUND,        /* # */
56         PRINTABLE,              /* $ */
57         PRINTABLE,              /* % */
58         PRINTABLE,              /* & */
59         PRINTABLE|SPECIAL,      /* ' */
60         PRINTABLE|SPECIAL,      /* ( */
61         PRINTABLE|SPECIAL,      /* ) */
62         PRINTABLE,              /* * */
63         PRINTABLE|SIGN,         /* + */
64         PRINTABLE,              /* , */
65         PRINTABLE|SIGN,         /* - */
66         PRINTABLE|DOTC|FLOATC,  /* . */
67         PRINTABLE,              /* / */
68         PRINTABLE|DIGIT,        /* 0 */
69         PRINTABLE|DIGIT,        /* 1 */
70         PRINTABLE|DIGIT,        /* 2 */
71         PRINTABLE|DIGIT,        /* 3 */
72         PRINTABLE|DIGIT,        /* 4 */
73         PRINTABLE|DIGIT,        /* 5 */
74         PRINTABLE|DIGIT,        /* 6 */
75         PRINTABLE|DIGIT,        /* 7 */
76         PRINTABLE|DIGIT,        /* 8 */
77         PRINTABLE|DIGIT,        /* 9 */
78         PRINTABLE,              /* : */
79         PRINTABLE|COMMENT,      /* ; */
80         PRINTABLE,              /* < */
81         PRINTABLE,              /* = */
82         PRINTABLE,              /* > */
83         PRINTABLE,              /* ? */
84         PRINTABLE,              /*  @ */
85         PRINTABLE,              /*  A */
86         PRINTABLE,              /*  B */
87         PRINTABLE,              /*  C */
88         PRINTABLE,              /*  D */
89         PRINTABLE|FLOATC,       /*  E */
90         PRINTABLE,              /*  F */
91         PRINTABLE,              /*  G */
92         PRINTABLE,              /*  H */
93         PRINTABLE,              /*  I */
94         PRINTABLE,              /*  J */
95         PRINTABLE,              /*  K */
96         PRINTABLE,              /*  L */
97         PRINTABLE,              /*  M */
98         PRINTABLE,              /*  N */
99         PRINTABLE,              /*  O */
100         PRINTABLE,              /*  P */
101         PRINTABLE,              /*  Q */
102         PRINTABLE,              /*  R */
103         PRINTABLE,              /*  S */
104         PRINTABLE,              /*  T */
105         PRINTABLE,              /*  U */
106         PRINTABLE,              /*  V */
107         PRINTABLE,              /*  W */
108         PRINTABLE,              /*  X */
109         PRINTABLE,              /*  Y */
110         PRINTABLE,              /*  Z */
111         PRINTABLE,              /*  [ */
112         PRINTABLE|BACKSLASH,    /*  \ */
113         PRINTABLE,              /*  ] */
114         PRINTABLE,              /*  ^ */
115         PRINTABLE,              /*  _ */
116         PRINTABLE,              /*  ` */
117         PRINTABLE,              /*  a */
118         PRINTABLE,              /*  b */
119         PRINTABLE,              /*  c */
120         PRINTABLE,              /*  d */
121         PRINTABLE|FLOATC,       /*  e */
122         PRINTABLE,              /*  f */
123         PRINTABLE,              /*  g */
124         PRINTABLE,              /*  h */
125         PRINTABLE,              /*  i */
126         PRINTABLE,              /*  j */
127         PRINTABLE,              /*  k */
128         PRINTABLE,              /*  l */
129         PRINTABLE,              /*  m */
130         PRINTABLE,              /*  n */
131         PRINTABLE,              /*  o */
132         PRINTABLE,              /*  p */
133         PRINTABLE,              /*  q */
134         PRINTABLE,              /*  r */
135         PRINTABLE,              /*  s */
136         PRINTABLE,              /*  t */
137         PRINTABLE,              /*  u */
138         PRINTABLE,              /*  v */
139         PRINTABLE,              /*  w */
140         PRINTABLE,              /*  x */
141         PRINTABLE,              /*  y */
142         PRINTABLE,              /*  z */
143         PRINTABLE,              /*  { */
144         PRINTABLE,              /*  | */
145         PRINTABLE,              /*  } */
146         PRINTABLE,              /*  ~ */
147         IGNORE,                 /*  ^? */
148 };
149
150 static int lex_unget_c;
151
152 static inline int
153 lex_get()
154 {
155         int     c;
156         if (lex_unget_c) {
157                 c = lex_unget_c;
158                 lex_unget_c = 0;
159         } else {
160                 c = ao_lisp_getc();
161         }
162         return c;
163 }
164
165 static inline void
166 lex_unget(int c)
167 {
168         if (c != EOF)
169                 lex_unget_c = c;
170 }
171
172 static uint16_t lex_class;
173
174 static int
175 lexc(void)
176 {
177         int     c;
178         do {
179                 c = lex_get();
180                 if (c == EOF) {
181                         c = 0;
182                         lex_class = ENDOFFILE;
183                 } else {
184                         c &= 0x7f;
185                         lex_class = lex_classes[c];
186                 }
187         } while (lex_class & IGNORE);
188         return c;
189 }
190
191 static int
192 lex_quoted(void)
193 {
194         int     c;
195         int     v;
196         int     count;
197
198         c = lex_get();
199         if (c == EOF) {
200                 lex_class = ENDOFFILE;
201                 return 0;
202         }
203         lex_class = 0;
204         c &= 0x7f;
205         switch (c) {
206         case 'n':
207                 return '\n';
208         case 'f':
209                 return '\f';
210         case 'b':
211                 return '\b';
212         case 'r':
213                 return '\r';
214         case 'v':
215                 return '\v';
216         case 't':
217                 return '\t';
218         case '0':
219         case '1':
220         case '2':
221         case '3':
222         case '4':
223         case '5':
224         case '6':
225         case '7':
226                 v = c - '0';
227                 count = 1;
228                 while (count <= 3) {
229                         c = lex_get();
230                         if (c == EOF)
231                                 return EOF;
232                         c &= 0x7f;
233                         if (c < '0' || '7' < c) {
234                                 lex_unget(c);
235                                 break;
236                         }
237                         v = (v << 3) + c - '0';
238                         ++count;
239                 }
240                 return v;
241         default:
242                 return c;
243         }
244 }
245
246 #define AO_LISP_TOKEN_MAX       32
247
248 static char     token_string[AO_LISP_TOKEN_MAX];
249 static int32_t  token_int;
250 static int      token_len;
251 static float    token_float;
252
253 static inline void add_token(int c) {
254         if (c && token_len < AO_LISP_TOKEN_MAX - 1)
255                 token_string[token_len++] = c;
256 }
257
258 static inline void del_token(void) {
259         if (token_len > 0)
260                 token_len--;
261 }
262
263 static inline void end_token(void) {
264         token_string[token_len] = '\0';
265 }
266
267 struct namedfloat {
268         const char      *name;
269         float           value;
270 };
271
272 static const struct namedfloat namedfloats[] = {
273         { .name = "+inf.0", .value = INFINITY },
274         { .name = "-inf.0", .value = -INFINITY },
275         { .name = "+nan.0", .value = NAN },
276         { .name = "-nan.0", .value = NAN },
277 };
278
279 #define NUM_NAMED_FLOATS        (sizeof namedfloats / sizeof namedfloats[0])
280
281 static int
282 _lex(void)
283 {
284         int     c;
285
286         token_len = 0;
287         for (;;) {
288                 c = lexc();
289                 if (lex_class & ENDOFFILE)
290                         return END;
291
292                 if (lex_class & WHITE)
293                         continue;
294
295                 if (lex_class & COMMENT) {
296                         while ((c = lexc()) != '\n') {
297                                 if (lex_class & ENDOFFILE)
298                                         return END;
299                         }
300                         continue;
301                 }
302
303                 if (lex_class & (SPECIAL|DOTC)) {
304                         add_token(c);
305                         end_token();
306                         switch (c) {
307                         case '(':
308                         case '[':
309                                 return OPEN;
310                         case ')':
311                         case ']':
312                                 return CLOSE;
313                         case '\'':
314                                 return QUOTE;
315                         case '.':
316                                 return DOT;
317                         }
318                 }
319                 if (lex_class & POUND) {
320                         c = lexc();
321                         switch (c) {
322                         case 't':
323                                 add_token(c);
324                                 end_token();
325                                 return BOOL;
326                         case 'f':
327                                 add_token(c);
328                                 end_token();
329                                 return BOOL;
330                         case '\\':
331                                 for (;;) {
332                                         int alphabetic;
333                                         c = lexc();
334                                         alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
335                                         if (token_len == 0) {
336                                                 add_token(c);
337                                                 if (!alphabetic)
338                                                         break;
339                                         } else {
340                                                 if (alphabetic)
341                                                         add_token(c);
342                                                 else {
343                                                         lex_unget(c);
344                                                         break;
345                                                 }
346                                         }
347                                 }
348                                 end_token();
349                                 if (token_len == 1)
350                                         token_int = token_string[0];
351                                 else if (!strcmp(token_string, "space"))
352                                         token_int = ' ';
353                                 else if (!strcmp(token_string, "newline"))
354                                         token_int = '\n';
355                                 else if (!strcmp(token_string, "tab"))
356                                         token_int = '\t';
357                                 else if (!strcmp(token_string, "return"))
358                                         token_int = '\r';
359                                 else if (!strcmp(token_string, "formfeed"))
360                                         token_int = '\f';
361                                 else {
362                                         ao_lisp_error(AO_LISP_INVALID, "invalid character token #\\%s", token_string);
363                                         continue;
364                                 }
365                                 return NUM;
366                         }
367                 }
368                 if (lex_class & STRINGC) {
369                         for (;;) {
370                                 c = lexc();
371                                 if (lex_class & BACKSLASH)
372                                         c = lex_quoted();
373                                 if (lex_class & (STRINGC|ENDOFFILE)) {
374                                         end_token();
375                                         return STRING;
376                                 }
377                                 add_token(c);
378                         }
379                 }
380                 if (lex_class & PRINTABLE) {
381                         int     isfloat;
382                         int     hasdigit;
383                         int     isneg;
384                         int     isint;
385                         int     epos;
386
387                         isfloat = 1;
388                         isint = 1;
389                         hasdigit = 0;
390                         token_int = 0;
391                         isneg = 0;
392                         epos = 0;
393                         for (;;) {
394                                 if (!(lex_class & NUMBER)) {
395                                         isint = 0;
396                                         isfloat = 0;
397                                 } else {
398                                         if (!(lex_class & INTEGER))
399                                                 isint = 0;
400                                         if (token_len != epos &&
401                                             (lex_class & SIGN))
402                                         {
403                                                 isint = 0;
404                                                 isfloat = 0;
405                                         }
406                                         if (c == '-')
407                                                 isneg = 1;
408                                         if (c == '.' && epos != 0)
409                                                 isfloat = 0;
410                                         if (c == 'e' || c == 'E') {
411                                                 if (token_len == 0)
412                                                         isfloat = 0;
413                                                 else
414                                                         epos = token_len + 1;
415                                         }
416                                         if (lex_class & DIGIT) {
417                                                 hasdigit = 1;
418                                                 if (isint)
419                                                         token_int = token_int * 10 + c - '0';
420                                         }
421                                 }
422                                 add_token (c);
423                                 c = lexc ();
424                                 if ((lex_class & (NOTNAME)) && (c != '.' || !isfloat)) {
425                                         unsigned int u;
426 //                                      if (lex_class & ENDOFFILE)
427 //                                              clearerr (f);
428                                         lex_unget(c);
429                                         end_token ();
430                                         if (isint && hasdigit) {
431                                                 if (isneg)
432                                                         token_int = -token_int;
433                                                 return NUM;
434                                         }
435                                         if (isfloat && hasdigit) {
436                                                 token_float = atof(token_string);
437                                                 return FLOAT;
438                                         }
439                                         for (u = 0; u < NUM_NAMED_FLOATS; u++)
440                                                 if (!strcmp(namedfloats[u].name, token_string)) {
441                                                         token_float = namedfloats[u].value;
442                                                         return FLOAT;
443                                                 }
444                                         return NAME;
445                                 }
446                         }
447                 }
448         }
449 }
450
451 static inline int lex(void)
452 {
453         int     parse_token = _lex();
454         DBGI("token %d (%s)\n", parse_token, token_string);
455         return parse_token;
456 }
457
458 static int parse_token;
459
460 struct ao_lisp_cons     *ao_lisp_read_cons;
461 struct ao_lisp_cons     *ao_lisp_read_cons_tail;
462 struct ao_lisp_cons     *ao_lisp_read_stack;
463
464 #define READ_IN_QUOTE   0x01
465 #define READ_SAW_DOT    0x02
466 #define READ_DONE_DOT   0x04
467
468 static int
469 push_read_stack(int cons, int read_state)
470 {
471         DBGI("push read stack %p 0x%x\n", ao_lisp_read_cons, read_state);
472         DBG_IN();
473         if (cons) {
474                 ao_lisp_read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(ao_lisp_read_cons),
475                                                        ao_lisp__cons(ao_lisp_int_poly(read_state),
476                                                                      ao_lisp_cons_poly(ao_lisp_read_stack)));
477                 if (!ao_lisp_read_stack)
478                         return 0;
479         }
480         ao_lisp_read_cons = NULL;
481         ao_lisp_read_cons_tail = NULL;
482         return 1;
483 }
484
485 static int
486 pop_read_stack(int cons)
487 {
488         int     read_state = 0;
489         if (cons) {
490                 ao_lisp_read_cons = ao_lisp_poly_cons(ao_lisp_read_stack->car);
491                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
492                 read_state = ao_lisp_poly_int(ao_lisp_read_stack->car);
493                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
494                 for (ao_lisp_read_cons_tail = ao_lisp_read_cons;
495                      ao_lisp_read_cons_tail && ao_lisp_read_cons_tail->cdr;
496                      ao_lisp_read_cons_tail = ao_lisp_poly_cons(ao_lisp_read_cons_tail->cdr))
497                         ;
498         } else {
499                 ao_lisp_read_cons = 0;
500                 ao_lisp_read_cons_tail = 0;
501                 ao_lisp_read_stack = 0;
502         }
503         DBG_OUT();
504         DBGI("pop read stack %p %d\n", ao_lisp_read_cons, read_state);
505         return read_state;
506 }
507
508 ao_poly
509 ao_lisp_read(void)
510 {
511         struct ao_lisp_atom     *atom;
512         char                    *string;
513         int                     cons;
514         int                     read_state;
515         ao_poly                 v;
516
517
518         cons = 0;
519         read_state = 0;
520         ao_lisp_read_cons = ao_lisp_read_cons_tail = ao_lisp_read_stack = 0;
521         for (;;) {
522                 parse_token = lex();
523                 while (parse_token == OPEN) {
524                         if (!push_read_stack(cons, read_state))
525                                 return AO_LISP_NIL;
526                         cons++;
527                         read_state = 0;
528                         parse_token = lex();
529                 }
530
531                 switch (parse_token) {
532                 case END:
533                 default:
534                         if (cons)
535                                 ao_lisp_error(AO_LISP_EOF, "unexpected end of file");
536                         return _ao_lisp_atom_eof;
537                         break;
538                 case NAME:
539                         atom = ao_lisp_atom_intern(token_string);
540                         if (atom)
541                                 v = ao_lisp_atom_poly(atom);
542                         else
543                                 v = AO_LISP_NIL;
544                         break;
545                 case NUM:
546                         v = ao_lisp_integer_poly(token_int);
547                         break;
548                 case FLOAT:
549                         v = ao_lisp_float_get(token_float);
550                         break;
551                 case BOOL:
552                         if (token_string[0] == 't')
553                                 v = _ao_lisp_bool_true;
554                         else
555                                 v = _ao_lisp_bool_false;
556                         break;
557                 case STRING:
558                         string = ao_lisp_string_copy(token_string);
559                         if (string)
560                                 v = ao_lisp_string_poly(string);
561                         else
562                                 v = AO_LISP_NIL;
563                         break;
564                 case QUOTE:
565                         if (!push_read_stack(cons, read_state))
566                                 return AO_LISP_NIL;
567                         cons++;
568                         read_state = READ_IN_QUOTE;
569                         v = _ao_lisp_atom_quote;
570                         break;
571                 case CLOSE:
572                         if (!cons) {
573                                 v = AO_LISP_NIL;
574                                 break;
575                         }
576                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
577                         --cons;
578                         read_state = pop_read_stack(cons);
579                         break;
580                 case DOT:
581                         if (!cons) {
582                                 ao_lisp_error(AO_LISP_INVALID, ". outside of cons");
583                                 return AO_LISP_NIL;
584                         }
585                         if (!ao_lisp_read_cons) {
586                                 ao_lisp_error(AO_LISP_INVALID, ". first in cons");
587                                 return AO_LISP_NIL;
588                         }
589                         read_state |= READ_SAW_DOT;
590                         continue;
591                 }
592
593                 /* loop over QUOTE ends */
594                 for (;;) {
595                         if (!cons)
596                                 return v;
597
598                         if (read_state & READ_DONE_DOT) {
599                                 ao_lisp_error(AO_LISP_INVALID, ". not last in cons");
600                                 return AO_LISP_NIL;
601                         }
602
603                         if (read_state & READ_SAW_DOT) {
604                                 read_state |= READ_DONE_DOT;
605                                 ao_lisp_read_cons_tail->cdr = v;
606                         } else {
607                                 struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, AO_LISP_NIL);
608                                 if (!read)
609                                         return AO_LISP_NIL;
610
611                                 if (ao_lisp_read_cons_tail)
612                                         ao_lisp_read_cons_tail->cdr = ao_lisp_cons_poly(read);
613                                 else
614                                         ao_lisp_read_cons = read;
615                                 ao_lisp_read_cons_tail = read;
616                         }
617
618                         if (!(read_state & READ_IN_QUOTE) || !ao_lisp_read_cons->cdr)
619                                 break;
620
621                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
622                         --cons;
623                         read_state = pop_read_stack(cons);
624                 }
625         }
626         return v;
627 }