altos/lisp: Add quasiquote
[fw/altos] / src / lisp / ao_lisp_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_lisp.h"
16 #include "ao_lisp_read.h"
17 #include <math.h>
18
19 static const uint16_t   lex_classes[128] = {
20         IGNORE,         /* ^@ */
21         IGNORE,         /* ^A */
22         IGNORE,         /* ^B */
23         IGNORE,         /* ^C */
24         IGNORE,         /* ^D */
25         IGNORE,         /* ^E */
26         IGNORE,         /* ^F */
27         IGNORE,         /* ^G */
28         IGNORE,         /* ^H */
29         WHITE,          /* ^I */
30         WHITE,          /* ^J */
31         WHITE,          /* ^K */
32         WHITE,          /* ^L */
33         WHITE,          /* ^M */
34         IGNORE,         /* ^N */
35         IGNORE,         /* ^O */
36         IGNORE,         /* ^P */
37         IGNORE,         /* ^Q */
38         IGNORE,         /* ^R */
39         IGNORE,         /* ^S */
40         IGNORE,         /* ^T */
41         IGNORE,         /* ^U */
42         IGNORE,         /* ^V */
43         IGNORE,         /* ^W */
44         IGNORE,         /* ^X */
45         IGNORE,         /* ^Y */
46         IGNORE,         /* ^Z */
47         IGNORE,         /* ^[ */
48         IGNORE,         /* ^\ */
49         IGNORE,         /* ^] */
50         IGNORE,         /* ^^ */
51         IGNORE,         /* ^_ */
52         PRINTABLE|WHITE,        /*    */
53         PRINTABLE,              /* ! */
54         PRINTABLE|STRINGC,      /* " */
55         PRINTABLE|POUND,        /* # */
56         PRINTABLE,              /* $ */
57         PRINTABLE,              /* % */
58         PRINTABLE,              /* & */
59         PRINTABLE|SPECIAL,      /* ' */
60         PRINTABLE|SPECIAL,      /* ( */
61         PRINTABLE|SPECIAL,      /* ) */
62         PRINTABLE,              /* * */
63         PRINTABLE|SIGN,         /* + */
64         PRINTABLE|SPECIAL,      /* , */
65         PRINTABLE|SIGN,         /* - */
66         PRINTABLE|DOTC|FLOATC,  /* . */
67         PRINTABLE,              /* / */
68         PRINTABLE|DIGIT,        /* 0 */
69         PRINTABLE|DIGIT,        /* 1 */
70         PRINTABLE|DIGIT,        /* 2 */
71         PRINTABLE|DIGIT,        /* 3 */
72         PRINTABLE|DIGIT,        /* 4 */
73         PRINTABLE|DIGIT,        /* 5 */
74         PRINTABLE|DIGIT,        /* 6 */
75         PRINTABLE|DIGIT,        /* 7 */
76         PRINTABLE|DIGIT,        /* 8 */
77         PRINTABLE|DIGIT,        /* 9 */
78         PRINTABLE,              /* : */
79         PRINTABLE|COMMENT,      /* ; */
80         PRINTABLE,              /* < */
81         PRINTABLE,              /* = */
82         PRINTABLE,              /* > */
83         PRINTABLE,              /* ? */
84         PRINTABLE,              /*  @ */
85         PRINTABLE,              /*  A */
86         PRINTABLE,              /*  B */
87         PRINTABLE,              /*  C */
88         PRINTABLE,              /*  D */
89         PRINTABLE|FLOATC,       /*  E */
90         PRINTABLE,              /*  F */
91         PRINTABLE,              /*  G */
92         PRINTABLE,              /*  H */
93         PRINTABLE,              /*  I */
94         PRINTABLE,              /*  J */
95         PRINTABLE,              /*  K */
96         PRINTABLE,              /*  L */
97         PRINTABLE,              /*  M */
98         PRINTABLE,              /*  N */
99         PRINTABLE,              /*  O */
100         PRINTABLE,              /*  P */
101         PRINTABLE,              /*  Q */
102         PRINTABLE,              /*  R */
103         PRINTABLE,              /*  S */
104         PRINTABLE,              /*  T */
105         PRINTABLE,              /*  U */
106         PRINTABLE,              /*  V */
107         PRINTABLE,              /*  W */
108         PRINTABLE,              /*  X */
109         PRINTABLE,              /*  Y */
110         PRINTABLE,              /*  Z */
111         PRINTABLE,              /*  [ */
112         PRINTABLE|BACKSLASH,    /*  \ */
113         PRINTABLE,              /*  ] */
114         PRINTABLE,              /*  ^ */
115         PRINTABLE,              /*  _ */
116         PRINTABLE|SPECIAL,      /*  ` */
117         PRINTABLE,              /*  a */
118         PRINTABLE,              /*  b */
119         PRINTABLE,              /*  c */
120         PRINTABLE,              /*  d */
121         PRINTABLE|FLOATC,       /*  e */
122         PRINTABLE,              /*  f */
123         PRINTABLE,              /*  g */
124         PRINTABLE,              /*  h */
125         PRINTABLE,              /*  i */
126         PRINTABLE,              /*  j */
127         PRINTABLE,              /*  k */
128         PRINTABLE,              /*  l */
129         PRINTABLE,              /*  m */
130         PRINTABLE,              /*  n */
131         PRINTABLE,              /*  o */
132         PRINTABLE,              /*  p */
133         PRINTABLE,              /*  q */
134         PRINTABLE,              /*  r */
135         PRINTABLE,              /*  s */
136         PRINTABLE,              /*  t */
137         PRINTABLE,              /*  u */
138         PRINTABLE,              /*  v */
139         PRINTABLE,              /*  w */
140         PRINTABLE,              /*  x */
141         PRINTABLE,              /*  y */
142         PRINTABLE,              /*  z */
143         PRINTABLE,              /*  { */
144         PRINTABLE,              /*  | */
145         PRINTABLE,              /*  } */
146         PRINTABLE,              /*  ~ */
147         IGNORE,                 /*  ^? */
148 };
149
150 static int lex_unget_c;
151
152 static inline int
153 lex_get()
154 {
155         int     c;
156         if (lex_unget_c) {
157                 c = lex_unget_c;
158                 lex_unget_c = 0;
159         } else {
160                 c = ao_lisp_getc();
161         }
162         return c;
163 }
164
165 static inline void
166 lex_unget(int c)
167 {
168         if (c != EOF)
169                 lex_unget_c = c;
170 }
171
172 static uint16_t lex_class;
173
174 static int
175 lexc(void)
176 {
177         int     c;
178         do {
179                 c = lex_get();
180                 if (c == EOF) {
181                         c = 0;
182                         lex_class = ENDOFFILE;
183                 } else {
184                         c &= 0x7f;
185                         lex_class = lex_classes[c];
186                 }
187         } while (lex_class & IGNORE);
188         return c;
189 }
190
191 static int
192 lex_quoted(void)
193 {
194         int     c;
195         int     v;
196         int     count;
197
198         c = lex_get();
199         if (c == EOF) {
200                 lex_class = ENDOFFILE;
201                 return 0;
202         }
203         lex_class = 0;
204         c &= 0x7f;
205         switch (c) {
206         case 'n':
207                 return '\n';
208         case 'f':
209                 return '\f';
210         case 'b':
211                 return '\b';
212         case 'r':
213                 return '\r';
214         case 'v':
215                 return '\v';
216         case 't':
217                 return '\t';
218         case '0':
219         case '1':
220         case '2':
221         case '3':
222         case '4':
223         case '5':
224         case '6':
225         case '7':
226                 v = c - '0';
227                 count = 1;
228                 while (count <= 3) {
229                         c = lex_get();
230                         if (c == EOF)
231                                 return EOF;
232                         c &= 0x7f;
233                         if (c < '0' || '7' < c) {
234                                 lex_unget(c);
235                                 break;
236                         }
237                         v = (v << 3) + c - '0';
238                         ++count;
239                 }
240                 return v;
241         default:
242                 return c;
243         }
244 }
245
246 #define AO_LISP_TOKEN_MAX       32
247
248 static char     token_string[AO_LISP_TOKEN_MAX];
249 static int32_t  token_int;
250 static int      token_len;
251 static float    token_float;
252
253 static inline void add_token(int c) {
254         if (c && token_len < AO_LISP_TOKEN_MAX - 1)
255                 token_string[token_len++] = c;
256 }
257
258 static inline void del_token(void) {
259         if (token_len > 0)
260                 token_len--;
261 }
262
263 static inline void end_token(void) {
264         token_string[token_len] = '\0';
265 }
266
267 struct namedfloat {
268         const char      *name;
269         float           value;
270 };
271
272 static const struct namedfloat namedfloats[] = {
273         { .name = "+inf.0", .value = INFINITY },
274         { .name = "-inf.0", .value = -INFINITY },
275         { .name = "+nan.0", .value = NAN },
276         { .name = "-nan.0", .value = NAN },
277 };
278
279 #define NUM_NAMED_FLOATS        (sizeof namedfloats / sizeof namedfloats[0])
280
281 static int
282 _lex(void)
283 {
284         int     c;
285
286         token_len = 0;
287         for (;;) {
288                 c = lexc();
289                 if (lex_class & ENDOFFILE)
290                         return END;
291
292                 if (lex_class & WHITE)
293                         continue;
294
295                 if (lex_class & COMMENT) {
296                         while ((c = lexc()) != '\n') {
297                                 if (lex_class & ENDOFFILE)
298                                         return END;
299                         }
300                         continue;
301                 }
302
303                 if (lex_class & (SPECIAL|DOTC)) {
304                         add_token(c);
305                         end_token();
306                         switch (c) {
307                         case '(':
308                         case '[':
309                                 return OPEN;
310                         case ')':
311                         case ']':
312                                 return CLOSE;
313                         case '\'':
314                                 return QUOTE;
315                         case '.':
316                                 return DOT;
317                         case '`':
318                                 return QUASIQUOTE;
319                         case ',':
320                                 c = lexc();
321                                 if (c == '@') {
322                                         add_token(c);
323                                         end_token();
324                                         return UNQUOTE_SPLICING;
325                                 } else {
326                                         lex_unget(c);
327                                         return UNQUOTE;
328                                 }
329                         }
330                 }
331                 if (lex_class & POUND) {
332                         c = lexc();
333                         switch (c) {
334                         case 't':
335                                 add_token(c);
336                                 end_token();
337                                 return BOOL;
338                         case 'f':
339                                 add_token(c);
340                                 end_token();
341                                 return BOOL;
342                         case '\\':
343                                 for (;;) {
344                                         int alphabetic;
345                                         c = lexc();
346                                         alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
347                                         if (token_len == 0) {
348                                                 add_token(c);
349                                                 if (!alphabetic)
350                                                         break;
351                                         } else {
352                                                 if (alphabetic)
353                                                         add_token(c);
354                                                 else {
355                                                         lex_unget(c);
356                                                         break;
357                                                 }
358                                         }
359                                 }
360                                 end_token();
361                                 if (token_len == 1)
362                                         token_int = token_string[0];
363                                 else if (!strcmp(token_string, "space"))
364                                         token_int = ' ';
365                                 else if (!strcmp(token_string, "newline"))
366                                         token_int = '\n';
367                                 else if (!strcmp(token_string, "tab"))
368                                         token_int = '\t';
369                                 else if (!strcmp(token_string, "return"))
370                                         token_int = '\r';
371                                 else if (!strcmp(token_string, "formfeed"))
372                                         token_int = '\f';
373                                 else {
374                                         ao_lisp_error(AO_LISP_INVALID, "invalid character token #\\%s", token_string);
375                                         continue;
376                                 }
377                                 return NUM;
378                         }
379                 }
380                 if (lex_class & STRINGC) {
381                         for (;;) {
382                                 c = lexc();
383                                 if (lex_class & BACKSLASH)
384                                         c = lex_quoted();
385                                 if (lex_class & (STRINGC|ENDOFFILE)) {
386                                         end_token();
387                                         return STRING;
388                                 }
389                                 add_token(c);
390                         }
391                 }
392                 if (lex_class & PRINTABLE) {
393                         int     isfloat;
394                         int     hasdigit;
395                         int     isneg;
396                         int     isint;
397                         int     epos;
398
399                         isfloat = 1;
400                         isint = 1;
401                         hasdigit = 0;
402                         token_int = 0;
403                         isneg = 0;
404                         epos = 0;
405                         for (;;) {
406                                 if (!(lex_class & NUMBER)) {
407                                         isint = 0;
408                                         isfloat = 0;
409                                 } else {
410                                         if (!(lex_class & INTEGER))
411                                                 isint = 0;
412                                         if (token_len != epos &&
413                                             (lex_class & SIGN))
414                                         {
415                                                 isint = 0;
416                                                 isfloat = 0;
417                                         }
418                                         if (c == '-')
419                                                 isneg = 1;
420                                         if (c == '.' && epos != 0)
421                                                 isfloat = 0;
422                                         if (c == 'e' || c == 'E') {
423                                                 if (token_len == 0)
424                                                         isfloat = 0;
425                                                 else
426                                                         epos = token_len + 1;
427                                         }
428                                         if (lex_class & DIGIT) {
429                                                 hasdigit = 1;
430                                                 if (isint)
431                                                         token_int = token_int * 10 + c - '0';
432                                         }
433                                 }
434                                 add_token (c);
435                                 c = lexc ();
436                                 if ((lex_class & (NOTNAME)) && (c != '.' || !isfloat)) {
437                                         unsigned int u;
438 //                                      if (lex_class & ENDOFFILE)
439 //                                              clearerr (f);
440                                         lex_unget(c);
441                                         end_token ();
442                                         if (isint && hasdigit) {
443                                                 if (isneg)
444                                                         token_int = -token_int;
445                                                 return NUM;
446                                         }
447                                         if (isfloat && hasdigit) {
448                                                 token_float = atof(token_string);
449                                                 return FLOAT;
450                                         }
451                                         for (u = 0; u < NUM_NAMED_FLOATS; u++)
452                                                 if (!strcmp(namedfloats[u].name, token_string)) {
453                                                         token_float = namedfloats[u].value;
454                                                         return FLOAT;
455                                                 }
456                                         return NAME;
457                                 }
458                         }
459                 }
460         }
461 }
462
463 static inline int lex(void)
464 {
465         int     parse_token = _lex();
466         DBGI("token %d (%s)\n", parse_token, token_string);
467         return parse_token;
468 }
469
470 static int parse_token;
471
472 struct ao_lisp_cons     *ao_lisp_read_cons;
473 struct ao_lisp_cons     *ao_lisp_read_cons_tail;
474 struct ao_lisp_cons     *ao_lisp_read_stack;
475
476 #define READ_IN_QUOTE   0x01
477 #define READ_SAW_DOT    0x02
478 #define READ_DONE_DOT   0x04
479
480 static int
481 push_read_stack(int cons, int read_state)
482 {
483         DBGI("push read stack %p 0x%x\n", ao_lisp_read_cons, read_state);
484         DBG_IN();
485         if (cons) {
486                 ao_lisp_read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(ao_lisp_read_cons),
487                                                        ao_lisp__cons(ao_lisp_int_poly(read_state),
488                                                                      ao_lisp_cons_poly(ao_lisp_read_stack)));
489                 if (!ao_lisp_read_stack)
490                         return 0;
491         }
492         ao_lisp_read_cons = NULL;
493         ao_lisp_read_cons_tail = NULL;
494         return 1;
495 }
496
497 static int
498 pop_read_stack(int cons)
499 {
500         int     read_state = 0;
501         if (cons) {
502                 ao_lisp_read_cons = ao_lisp_poly_cons(ao_lisp_read_stack->car);
503                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
504                 read_state = ao_lisp_poly_int(ao_lisp_read_stack->car);
505                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
506                 for (ao_lisp_read_cons_tail = ao_lisp_read_cons;
507                      ao_lisp_read_cons_tail && ao_lisp_read_cons_tail->cdr;
508                      ao_lisp_read_cons_tail = ao_lisp_poly_cons(ao_lisp_read_cons_tail->cdr))
509                         ;
510         } else {
511                 ao_lisp_read_cons = 0;
512                 ao_lisp_read_cons_tail = 0;
513                 ao_lisp_read_stack = 0;
514         }
515         DBG_OUT();
516         DBGI("pop read stack %p %d\n", ao_lisp_read_cons, read_state);
517         return read_state;
518 }
519
520 ao_poly
521 ao_lisp_read(void)
522 {
523         struct ao_lisp_atom     *atom;
524         char                    *string;
525         int                     cons;
526         int                     read_state;
527         ao_poly                 v;
528
529
530         cons = 0;
531         read_state = 0;
532         ao_lisp_read_cons = ao_lisp_read_cons_tail = ao_lisp_read_stack = 0;
533         for (;;) {
534                 parse_token = lex();
535                 while (parse_token == OPEN) {
536                         if (!push_read_stack(cons, read_state))
537                                 return AO_LISP_NIL;
538                         cons++;
539                         read_state = 0;
540                         parse_token = lex();
541                 }
542
543                 switch (parse_token) {
544                 case END:
545                 default:
546                         if (cons)
547                                 ao_lisp_error(AO_LISP_EOF, "unexpected end of file");
548                         return _ao_lisp_atom_eof;
549                         break;
550                 case NAME:
551                         atom = ao_lisp_atom_intern(token_string);
552                         if (atom)
553                                 v = ao_lisp_atom_poly(atom);
554                         else
555                                 v = AO_LISP_NIL;
556                         break;
557                 case NUM:
558                         v = ao_lisp_integer_poly(token_int);
559                         break;
560                 case FLOAT:
561                         v = ao_lisp_float_get(token_float);
562                         break;
563                 case BOOL:
564                         if (token_string[0] == 't')
565                                 v = _ao_lisp_bool_true;
566                         else
567                                 v = _ao_lisp_bool_false;
568                         break;
569                 case STRING:
570                         string = ao_lisp_string_copy(token_string);
571                         if (string)
572                                 v = ao_lisp_string_poly(string);
573                         else
574                                 v = AO_LISP_NIL;
575                         break;
576                 case QUOTE:
577                 case QUASIQUOTE:
578                 case UNQUOTE:
579                 case UNQUOTE_SPLICING:
580                         if (!push_read_stack(cons, read_state))
581                                 return AO_LISP_NIL;
582                         cons++;
583                         read_state = READ_IN_QUOTE;
584                         switch (parse_token) {
585                         case QUOTE:
586                                 v = _ao_lisp_atom_quote;
587                                 break;
588                         case QUASIQUOTE:
589                                 v = _ao_lisp_atom_quasiquote;
590                                 break;
591                         case UNQUOTE:
592                                 v = _ao_lisp_atom_unquote;
593                                 break;
594                         case UNQUOTE_SPLICING:
595                                 v = _ao_lisp_atom_unquote2dsplicing;
596                                 break;
597                         }
598                         break;
599                 case CLOSE:
600                         if (!cons) {
601                                 v = AO_LISP_NIL;
602                                 break;
603                         }
604                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
605                         --cons;
606                         read_state = pop_read_stack(cons);
607                         break;
608                 case DOT:
609                         if (!cons) {
610                                 ao_lisp_error(AO_LISP_INVALID, ". outside of cons");
611                                 return AO_LISP_NIL;
612                         }
613                         if (!ao_lisp_read_cons) {
614                                 ao_lisp_error(AO_LISP_INVALID, ". first in cons");
615                                 return AO_LISP_NIL;
616                         }
617                         read_state |= READ_SAW_DOT;
618                         continue;
619                 }
620
621                 /* loop over QUOTE ends */
622                 for (;;) {
623                         if (!cons)
624                                 return v;
625
626                         if (read_state & READ_DONE_DOT) {
627                                 ao_lisp_error(AO_LISP_INVALID, ". not last in cons");
628                                 return AO_LISP_NIL;
629                         }
630
631                         if (read_state & READ_SAW_DOT) {
632                                 read_state |= READ_DONE_DOT;
633                                 ao_lisp_read_cons_tail->cdr = v;
634                         } else {
635                                 struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, AO_LISP_NIL);
636                                 if (!read)
637                                         return AO_LISP_NIL;
638
639                                 if (ao_lisp_read_cons_tail)
640                                         ao_lisp_read_cons_tail->cdr = ao_lisp_cons_poly(read);
641                                 else
642                                         ao_lisp_read_cons = read;
643                                 ao_lisp_read_cons_tail = read;
644                         }
645
646                         if (!(read_state & READ_IN_QUOTE) || !ao_lisp_read_cons->cdr)
647                                 break;
648
649                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
650                         --cons;
651                         read_state = pop_read_stack(cons);
652                 }
653         }
654         return v;
655 }