Merge branch 'master' of ssh://git.gag.com/scm/git/fw/altos
[fw/altos] / src / scheme / ao_scheme_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_scheme.h"
16 #include "ao_scheme_read.h"
17 #include <math.h>
18 #include <stdlib.h>
19
20 static const uint16_t   lex_classes[128] = {
21         IGNORE,         /* ^@ */
22         IGNORE,         /* ^A */
23         IGNORE,         /* ^B */
24         IGNORE,         /* ^C */
25         IGNORE,         /* ^D */
26         IGNORE,         /* ^E */
27         IGNORE,         /* ^F */
28         IGNORE,         /* ^G */
29         IGNORE,         /* ^H */
30         WHITE,          /* ^I */
31         WHITE,          /* ^J */
32         WHITE,          /* ^K */
33         WHITE,          /* ^L */
34         WHITE,          /* ^M */
35         IGNORE,         /* ^N */
36         IGNORE,         /* ^O */
37         IGNORE,         /* ^P */
38         IGNORE,         /* ^Q */
39         IGNORE,         /* ^R */
40         IGNORE,         /* ^S */
41         IGNORE,         /* ^T */
42         IGNORE,         /* ^U */
43         IGNORE,         /* ^V */
44         IGNORE,         /* ^W */
45         IGNORE,         /* ^X */
46         IGNORE,         /* ^Y */
47         IGNORE,         /* ^Z */
48         IGNORE,         /* ^[ */
49         IGNORE,         /* ^\ */
50         IGNORE,         /* ^] */
51         IGNORE,         /* ^^ */
52         IGNORE,         /* ^_ */
53         PRINTABLE|WHITE,        /*    */
54         PRINTABLE,              /* ! */
55         PRINTABLE|STRINGC,      /* " */
56         PRINTABLE|POUND,        /* # */
57         PRINTABLE,              /* $ */
58         PRINTABLE,              /* % */
59         PRINTABLE,              /* & */
60         PRINTABLE|SPECIAL,      /* ' */
61         PRINTABLE|SPECIAL,      /* ( */
62         PRINTABLE|SPECIAL,      /* ) */
63         PRINTABLE,              /* * */
64         PRINTABLE|SIGN,         /* + */
65         PRINTABLE|SPECIAL,      /* , */
66         PRINTABLE|SIGN,         /* - */
67         PRINTABLE|DOTC|FLOATC,  /* . */
68         PRINTABLE,              /* / */
69         PRINTABLE|DIGIT,        /* 0 */
70         PRINTABLE|DIGIT,        /* 1 */
71         PRINTABLE|DIGIT,        /* 2 */
72         PRINTABLE|DIGIT,        /* 3 */
73         PRINTABLE|DIGIT,        /* 4 */
74         PRINTABLE|DIGIT,        /* 5 */
75         PRINTABLE|DIGIT,        /* 6 */
76         PRINTABLE|DIGIT,        /* 7 */
77         PRINTABLE|DIGIT,        /* 8 */
78         PRINTABLE|DIGIT,        /* 9 */
79         PRINTABLE,              /* : */
80         PRINTABLE|COMMENT,      /* ; */
81         PRINTABLE,              /* < */
82         PRINTABLE,              /* = */
83         PRINTABLE,              /* > */
84         PRINTABLE,              /* ? */
85         PRINTABLE,              /*  @ */
86         PRINTABLE,              /*  A */
87         PRINTABLE,              /*  B */
88         PRINTABLE,              /*  C */
89         PRINTABLE,              /*  D */
90         PRINTABLE|FLOATC,       /*  E */
91         PRINTABLE,              /*  F */
92         PRINTABLE,              /*  G */
93         PRINTABLE,              /*  H */
94         PRINTABLE,              /*  I */
95         PRINTABLE,              /*  J */
96         PRINTABLE,              /*  K */
97         PRINTABLE,              /*  L */
98         PRINTABLE,              /*  M */
99         PRINTABLE,              /*  N */
100         PRINTABLE,              /*  O */
101         PRINTABLE,              /*  P */
102         PRINTABLE,              /*  Q */
103         PRINTABLE,              /*  R */
104         PRINTABLE,              /*  S */
105         PRINTABLE,              /*  T */
106         PRINTABLE,              /*  U */
107         PRINTABLE,              /*  V */
108         PRINTABLE,              /*  W */
109         PRINTABLE,              /*  X */
110         PRINTABLE,              /*  Y */
111         PRINTABLE,              /*  Z */
112         PRINTABLE,              /*  [ */
113         PRINTABLE|BACKSLASH,    /*  \ */
114         PRINTABLE,              /*  ] */
115         PRINTABLE,              /*  ^ */
116         PRINTABLE,              /*  _ */
117         PRINTABLE|SPECIAL,      /*  ` */
118         PRINTABLE,              /*  a */
119         PRINTABLE,              /*  b */
120         PRINTABLE,              /*  c */
121         PRINTABLE,              /*  d */
122         PRINTABLE|FLOATC,       /*  e */
123         PRINTABLE,              /*  f */
124         PRINTABLE,              /*  g */
125         PRINTABLE,              /*  h */
126         PRINTABLE,              /*  i */
127         PRINTABLE,              /*  j */
128         PRINTABLE,              /*  k */
129         PRINTABLE,              /*  l */
130         PRINTABLE,              /*  m */
131         PRINTABLE,              /*  n */
132         PRINTABLE,              /*  o */
133         PRINTABLE,              /*  p */
134         PRINTABLE,              /*  q */
135         PRINTABLE,              /*  r */
136         PRINTABLE,              /*  s */
137         PRINTABLE,              /*  t */
138         PRINTABLE,              /*  u */
139         PRINTABLE,              /*  v */
140         PRINTABLE,              /*  w */
141         PRINTABLE,              /*  x */
142         PRINTABLE,              /*  y */
143         PRINTABLE,              /*  z */
144         PRINTABLE,              /*  { */
145         PRINTABLE,              /*  | */
146         PRINTABLE,              /*  } */
147         PRINTABLE,              /*  ~ */
148         IGNORE,                 /*  ^? */
149 };
150
151 static int lex_unget_c;
152
153 static inline int
154 lex_get()
155 {
156         int     c;
157         if (lex_unget_c) {
158                 c = lex_unget_c;
159                 lex_unget_c = 0;
160         } else {
161                 c = ao_scheme_getc();
162         }
163         return c;
164 }
165
166 static inline void
167 lex_unget(int c)
168 {
169         if (c != EOF)
170                 lex_unget_c = c;
171 }
172
173 static uint16_t lex_class;
174
175 static int
176 lexc(void)
177 {
178         int     c;
179         do {
180                 c = lex_get();
181                 if (c == EOF) {
182                         c = 0;
183                         lex_class = ENDOFFILE;
184                 } else {
185                         c &= 0x7f;
186                         lex_class = lex_classes[c];
187                 }
188         } while (lex_class & IGNORE);
189         return c;
190 }
191
192 static int
193 lex_quoted(void)
194 {
195         int     c;
196         int     v;
197         int     count;
198
199         c = lex_get();
200         if (c == EOF) {
201                 lex_class = ENDOFFILE;
202                 return 0;
203         }
204         lex_class = 0;
205         c &= 0x7f;
206         switch (c) {
207         case 'n':
208                 return '\n';
209         case 'f':
210                 return '\f';
211         case 'b':
212                 return '\b';
213         case 'r':
214                 return '\r';
215         case 'v':
216                 return '\v';
217         case 't':
218                 return '\t';
219         case '0':
220         case '1':
221         case '2':
222         case '3':
223         case '4':
224         case '5':
225         case '6':
226         case '7':
227                 v = c - '0';
228                 count = 1;
229                 while (count <= 3) {
230                         c = lex_get();
231                         if (c == EOF)
232                                 return EOF;
233                         c &= 0x7f;
234                         if (c < '0' || '7' < c) {
235                                 lex_unget(c);
236                                 break;
237                         }
238                         v = (v << 3) + c - '0';
239                         ++count;
240                 }
241                 return v;
242         default:
243                 return c;
244         }
245 }
246
247 #define AO_SCHEME_TOKEN_MAX     32
248
249 static char     token_string[AO_SCHEME_TOKEN_MAX];
250 static int32_t  token_int;
251 static int      token_len;
252 static float    token_float;
253
254 static inline void add_token(int c) {
255         if (c && token_len < AO_SCHEME_TOKEN_MAX - 1)
256                 token_string[token_len++] = c;
257 }
258
259 static inline void del_token(void) {
260         if (token_len > 0)
261                 token_len--;
262 }
263
264 static inline void end_token(void) {
265         token_string[token_len] = '\0';
266 }
267
268 struct namedfloat {
269         const char      *name;
270         float           value;
271 };
272
273 static const struct namedfloat namedfloats[] = {
274         { .name = "+inf.0", .value = INFINITY },
275         { .name = "-inf.0", .value = -INFINITY },
276         { .name = "+nan.0", .value = NAN },
277         { .name = "-nan.0", .value = NAN },
278 };
279
280 #define NUM_NAMED_FLOATS        (sizeof namedfloats / sizeof namedfloats[0])
281
282 static int
283 _lex(void)
284 {
285         int     c;
286
287         token_len = 0;
288         for (;;) {
289                 c = lexc();
290                 if (lex_class & ENDOFFILE)
291                         return END;
292
293                 if (lex_class & WHITE)
294                         continue;
295
296                 if (lex_class & COMMENT) {
297                         while ((c = lexc()) != '\n') {
298                                 if (lex_class & ENDOFFILE)
299                                         return END;
300                         }
301                         continue;
302                 }
303
304                 if (lex_class & (SPECIAL|DOTC)) {
305                         add_token(c);
306                         end_token();
307                         switch (c) {
308                         case '(':
309                         case '[':
310                                 return OPEN;
311                         case ')':
312                         case ']':
313                                 return CLOSE;
314                         case '\'':
315                                 return QUOTE;
316                         case '.':
317                                 return DOT;
318                         case '`':
319                                 return QUASIQUOTE;
320                         case ',':
321                                 c = lexc();
322                                 if (c == '@') {
323                                         add_token(c);
324                                         end_token();
325                                         return UNQUOTE_SPLICING;
326                                 } else {
327                                         lex_unget(c);
328                                         return UNQUOTE;
329                                 }
330                         }
331                 }
332                 if (lex_class & POUND) {
333                         c = lexc();
334                         switch (c) {
335                         case 't':
336                                 add_token(c);
337                                 end_token();
338                                 return BOOL;
339                         case 'f':
340                                 add_token(c);
341                                 end_token();
342                                 return BOOL;
343                         case '\\':
344                                 for (;;) {
345                                         int alphabetic;
346                                         c = lexc();
347                                         alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
348                                         if (token_len == 0) {
349                                                 add_token(c);
350                                                 if (!alphabetic)
351                                                         break;
352                                         } else {
353                                                 if (alphabetic)
354                                                         add_token(c);
355                                                 else {
356                                                         lex_unget(c);
357                                                         break;
358                                                 }
359                                         }
360                                 }
361                                 end_token();
362                                 if (token_len == 1)
363                                         token_int = token_string[0];
364                                 else if (!strcmp(token_string, "space"))
365                                         token_int = ' ';
366                                 else if (!strcmp(token_string, "newline"))
367                                         token_int = '\n';
368                                 else if (!strcmp(token_string, "tab"))
369                                         token_int = '\t';
370                                 else if (!strcmp(token_string, "return"))
371                                         token_int = '\r';
372                                 else if (!strcmp(token_string, "formfeed"))
373                                         token_int = '\f';
374                                 else {
375                                         ao_scheme_error(AO_SCHEME_INVALID, "invalid character token #\\%s", token_string);
376                                         continue;
377                                 }
378                                 return NUM;
379                         }
380                 }
381                 if (lex_class & STRINGC) {
382                         for (;;) {
383                                 c = lexc();
384                                 if (lex_class & BACKSLASH)
385                                         c = lex_quoted();
386                                 if (lex_class & (STRINGC|ENDOFFILE)) {
387                                         end_token();
388                                         return STRING;
389                                 }
390                                 add_token(c);
391                         }
392                 }
393                 if (lex_class & PRINTABLE) {
394                         int     isfloat;
395                         int     hasdigit;
396                         int     isneg;
397                         int     isint;
398                         int     epos;
399
400                         isfloat = 1;
401                         isint = 1;
402                         hasdigit = 0;
403                         token_int = 0;
404                         isneg = 0;
405                         epos = 0;
406                         for (;;) {
407                                 if (!(lex_class & NUMBER)) {
408                                         isint = 0;
409                                         isfloat = 0;
410                                 } else {
411                                         if (!(lex_class & INTEGER))
412                                                 isint = 0;
413                                         if (token_len != epos &&
414                                             (lex_class & SIGN))
415                                         {
416                                                 isint = 0;
417                                                 isfloat = 0;
418                                         }
419                                         if (c == '-')
420                                                 isneg = 1;
421                                         if (c == '.' && epos != 0)
422                                                 isfloat = 0;
423                                         if (c == 'e' || c == 'E') {
424                                                 if (token_len == 0)
425                                                         isfloat = 0;
426                                                 else
427                                                         epos = token_len + 1;
428                                         }
429                                         if (lex_class & DIGIT) {
430                                                 hasdigit = 1;
431                                                 if (isint)
432                                                         token_int = token_int * 10 + c - '0';
433                                         }
434                                 }
435                                 add_token (c);
436                                 c = lexc ();
437                                 if ((lex_class & (NOTNAME)) && (c != '.' || !isfloat)) {
438                                         unsigned int u;
439 //                                      if (lex_class & ENDOFFILE)
440 //                                              clearerr (f);
441                                         lex_unget(c);
442                                         end_token ();
443                                         if (isint && hasdigit) {
444                                                 if (isneg)
445                                                         token_int = -token_int;
446                                                 return NUM;
447                                         }
448                                         if (isfloat && hasdigit) {
449                                                 token_float = strtof(token_string, NULL);
450                                                 return FLOAT;
451                                         }
452                                         for (u = 0; u < NUM_NAMED_FLOATS; u++)
453                                                 if (!strcmp(namedfloats[u].name, token_string)) {
454                                                         token_float = namedfloats[u].value;
455                                                         return FLOAT;
456                                                 }
457                                         return NAME;
458                                 }
459                         }
460                 }
461         }
462 }
463
464 static inline int lex(void)
465 {
466         int     parse_token = _lex();
467         RDBGI("token %d (%s)\n", parse_token, token_string);
468         return parse_token;
469 }
470
471 static int parse_token;
472
473 struct ao_scheme_cons   *ao_scheme_read_cons;
474 struct ao_scheme_cons   *ao_scheme_read_cons_tail;
475 struct ao_scheme_cons   *ao_scheme_read_stack;
476
477 #define READ_IN_QUOTE   0x01
478 #define READ_SAW_DOT    0x02
479 #define READ_DONE_DOT   0x04
480
481 static int
482 push_read_stack(int cons, int read_state)
483 {
484         RDBGI("push read stack %p 0x%x\n", ao_scheme_read_cons, read_state);
485         RDBG_IN();
486         if (cons) {
487                 ao_scheme_read_stack = ao_scheme_cons_cons(ao_scheme_cons_poly(ao_scheme_read_cons),
488                                                        ao_scheme__cons(ao_scheme_int_poly(read_state),
489                                                                      ao_scheme_cons_poly(ao_scheme_read_stack)));
490                 if (!ao_scheme_read_stack)
491                         return 0;
492         }
493         ao_scheme_read_cons = NULL;
494         ao_scheme_read_cons_tail = NULL;
495         return 1;
496 }
497
498 static int
499 pop_read_stack(int cons)
500 {
501         int     read_state = 0;
502         if (cons) {
503                 ao_scheme_read_cons = ao_scheme_poly_cons(ao_scheme_read_stack->car);
504                 ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
505                 read_state = ao_scheme_poly_int(ao_scheme_read_stack->car);
506                 ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
507                 for (ao_scheme_read_cons_tail = ao_scheme_read_cons;
508                      ao_scheme_read_cons_tail && ao_scheme_read_cons_tail->cdr;
509                      ao_scheme_read_cons_tail = ao_scheme_poly_cons(ao_scheme_read_cons_tail->cdr))
510                         ;
511         } else {
512                 ao_scheme_read_cons = 0;
513                 ao_scheme_read_cons_tail = 0;
514                 ao_scheme_read_stack = 0;
515         }
516         RDBG_OUT();
517         RDBGI("pop read stack %p %d\n", ao_scheme_read_cons, read_state);
518         return read_state;
519 }
520
521 ao_poly
522 ao_scheme_read(void)
523 {
524         struct ao_scheme_atom   *atom;
525         char                    *string;
526         int                     cons;
527         int                     read_state;
528         ao_poly                 v = AO_SCHEME_NIL;
529
530         cons = 0;
531         read_state = 0;
532         ao_scheme_read_cons = ao_scheme_read_cons_tail = ao_scheme_read_stack = 0;
533         for (;;) {
534                 parse_token = lex();
535                 while (parse_token == OPEN) {
536                         if (!push_read_stack(cons, read_state))
537                                 return AO_SCHEME_NIL;
538                         cons++;
539                         read_state = 0;
540                         parse_token = lex();
541                 }
542
543                 switch (parse_token) {
544                 case END:
545                 default:
546                         if (cons)
547                                 ao_scheme_error(AO_SCHEME_EOF, "unexpected end of file");
548                         return _ao_scheme_atom_eof;
549                         break;
550                 case NAME:
551                         atom = ao_scheme_atom_intern(token_string);
552                         if (atom)
553                                 v = ao_scheme_atom_poly(atom);
554                         else
555                                 v = AO_SCHEME_NIL;
556                         break;
557                 case NUM:
558                         v = ao_scheme_integer_poly(token_int);
559                         break;
560                 case FLOAT:
561                         v = ao_scheme_float_get(token_float);
562                         break;
563                 case BOOL:
564                         if (token_string[0] == 't')
565                                 v = _ao_scheme_bool_true;
566                         else
567                                 v = _ao_scheme_bool_false;
568                         break;
569                 case STRING:
570                         string = ao_scheme_string_copy(token_string);
571                         if (string)
572                                 v = ao_scheme_string_poly(string);
573                         else
574                                 v = AO_SCHEME_NIL;
575                         break;
576                 case QUOTE:
577                 case QUASIQUOTE:
578                 case UNQUOTE:
579                 case UNQUOTE_SPLICING:
580                         if (!push_read_stack(cons, read_state))
581                                 return AO_SCHEME_NIL;
582                         cons++;
583                         read_state = READ_IN_QUOTE;
584                         switch (parse_token) {
585                         case QUOTE:
586                                 v = _ao_scheme_atom_quote;
587                                 break;
588                         case QUASIQUOTE:
589                                 v = _ao_scheme_atom_quasiquote;
590                                 break;
591                         case UNQUOTE:
592                                 v = _ao_scheme_atom_unquote;
593                                 break;
594                         case UNQUOTE_SPLICING:
595                                 v = _ao_scheme_atom_unquote2dsplicing;
596                                 break;
597                         }
598                         break;
599                 case CLOSE:
600                         if (!cons) {
601                                 v = AO_SCHEME_NIL;
602                                 break;
603                         }
604                         v = ao_scheme_cons_poly(ao_scheme_read_cons);
605                         --cons;
606                         read_state = pop_read_stack(cons);
607                         break;
608                 case DOT:
609                         if (!cons) {
610                                 ao_scheme_error(AO_SCHEME_INVALID, ". outside of cons");
611                                 return AO_SCHEME_NIL;
612                         }
613                         if (!ao_scheme_read_cons) {
614                                 ao_scheme_error(AO_SCHEME_INVALID, ". first in cons");
615                                 return AO_SCHEME_NIL;
616                         }
617                         read_state |= READ_SAW_DOT;
618                         continue;
619                 }
620
621                 /* loop over QUOTE ends */
622                 for (;;) {
623                         if (!cons)
624                                 return v;
625
626                         if (read_state & READ_DONE_DOT) {
627                                 ao_scheme_error(AO_SCHEME_INVALID, ". not last in cons");
628                                 return AO_SCHEME_NIL;
629                         }
630
631                         if (read_state & READ_SAW_DOT) {
632                                 read_state |= READ_DONE_DOT;
633                                 ao_scheme_read_cons_tail->cdr = v;
634                         } else {
635                                 struct ao_scheme_cons   *read = ao_scheme_cons_cons(v, AO_SCHEME_NIL);
636                                 if (!read)
637                                         return AO_SCHEME_NIL;
638
639                                 if (ao_scheme_read_cons_tail)
640                                         ao_scheme_read_cons_tail->cdr = ao_scheme_cons_poly(read);
641                                 else
642                                         ao_scheme_read_cons = read;
643                                 ao_scheme_read_cons_tail = read;
644                         }
645
646                         if (!(read_state & READ_IN_QUOTE) || !ao_scheme_read_cons->cdr)
647                                 break;
648
649                         v = ao_scheme_cons_poly(ao_scheme_read_cons);
650                         --cons;
651                         read_state = pop_read_stack(cons);
652                 }
653         }
654         return v;
655 }