721211bc7a9b3ad1e57e23d6803bf516f215de8b
[fw/altos] / src / scheme / ao_scheme_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_scheme.h"
16 #include "ao_scheme_read.h"
17 #include <math.h>
18 #include <stdlib.h>
19
20 static const uint16_t   lex_classes[128] = {
21         IGNORE,         /* ^@ */
22         IGNORE,         /* ^A */
23         IGNORE,         /* ^B */
24         IGNORE,         /* ^C */
25         IGNORE,         /* ^D */
26         IGNORE,         /* ^E */
27         IGNORE,         /* ^F */
28         IGNORE,         /* ^G */
29         IGNORE,         /* ^H */
30         WHITE,          /* ^I */
31         WHITE,          /* ^J */
32         WHITE,          /* ^K */
33         WHITE,          /* ^L */
34         WHITE,          /* ^M */
35         IGNORE,         /* ^N */
36         IGNORE,         /* ^O */
37         IGNORE,         /* ^P */
38         IGNORE,         /* ^Q */
39         IGNORE,         /* ^R */
40         IGNORE,         /* ^S */
41         IGNORE,         /* ^T */
42         IGNORE,         /* ^U */
43         IGNORE,         /* ^V */
44         IGNORE,         /* ^W */
45         IGNORE,         /* ^X */
46         IGNORE,         /* ^Y */
47         IGNORE,         /* ^Z */
48         IGNORE,         /* ^[ */
49         IGNORE,         /* ^\ */
50         IGNORE,         /* ^] */
51         IGNORE,         /* ^^ */
52         IGNORE,         /* ^_ */
53         PRINTABLE|WHITE,        /*    */
54         PRINTABLE,              /* ! */
55         PRINTABLE|STRINGC,      /* " */
56         PRINTABLE|POUND,        /* # */
57         PRINTABLE,              /* $ */
58         PRINTABLE,              /* % */
59         PRINTABLE,              /* & */
60         PRINTABLE|SPECIAL,      /* ' */
61         PRINTABLE|SPECIAL,      /* ( */
62         PRINTABLE|SPECIAL,      /* ) */
63         PRINTABLE,              /* * */
64         PRINTABLE|SIGN,         /* + */
65         PRINTABLE|SPECIAL_QUASI,        /* , */
66         PRINTABLE|SIGN,         /* - */
67         PRINTABLE|DOTC|FLOATC,  /* . */
68         PRINTABLE,              /* / */
69         PRINTABLE|DIGIT,        /* 0 */
70         PRINTABLE|DIGIT,        /* 1 */
71         PRINTABLE|DIGIT,        /* 2 */
72         PRINTABLE|DIGIT,        /* 3 */
73         PRINTABLE|DIGIT,        /* 4 */
74         PRINTABLE|DIGIT,        /* 5 */
75         PRINTABLE|DIGIT,        /* 6 */
76         PRINTABLE|DIGIT,        /* 7 */
77         PRINTABLE|DIGIT,        /* 8 */
78         PRINTABLE|DIGIT,        /* 9 */
79         PRINTABLE,              /* : */
80         PRINTABLE|COMMENT,      /* ; */
81         PRINTABLE,              /* < */
82         PRINTABLE,              /* = */
83         PRINTABLE,              /* > */
84         PRINTABLE,              /* ? */
85         PRINTABLE,              /*  @ */
86         PRINTABLE,              /*  A */
87         PRINTABLE,              /*  B */
88         PRINTABLE,              /*  C */
89         PRINTABLE,              /*  D */
90         PRINTABLE|FLOATC,       /*  E */
91         PRINTABLE,              /*  F */
92         PRINTABLE,              /*  G */
93         PRINTABLE,              /*  H */
94         PRINTABLE,              /*  I */
95         PRINTABLE,              /*  J */
96         PRINTABLE,              /*  K */
97         PRINTABLE,              /*  L */
98         PRINTABLE,              /*  M */
99         PRINTABLE,              /*  N */
100         PRINTABLE,              /*  O */
101         PRINTABLE,              /*  P */
102         PRINTABLE,              /*  Q */
103         PRINTABLE,              /*  R */
104         PRINTABLE,              /*  S */
105         PRINTABLE,              /*  T */
106         PRINTABLE,              /*  U */
107         PRINTABLE,              /*  V */
108         PRINTABLE,              /*  W */
109         PRINTABLE,              /*  X */
110         PRINTABLE,              /*  Y */
111         PRINTABLE,              /*  Z */
112         PRINTABLE,              /*  [ */
113         PRINTABLE|BACKSLASH,    /*  \ */
114         PRINTABLE,              /*  ] */
115         PRINTABLE,              /*  ^ */
116         PRINTABLE,              /*  _ */
117         PRINTABLE|SPECIAL_QUASI,        /*  ` */
118         PRINTABLE,              /*  a */
119         PRINTABLE,              /*  b */
120         PRINTABLE,              /*  c */
121         PRINTABLE,              /*  d */
122         PRINTABLE|FLOATC,       /*  e */
123         PRINTABLE,              /*  f */
124         PRINTABLE,              /*  g */
125         PRINTABLE,              /*  h */
126         PRINTABLE,              /*  i */
127         PRINTABLE,              /*  j */
128         PRINTABLE,              /*  k */
129         PRINTABLE,              /*  l */
130         PRINTABLE,              /*  m */
131         PRINTABLE,              /*  n */
132         PRINTABLE,              /*  o */
133         PRINTABLE,              /*  p */
134         PRINTABLE,              /*  q */
135         PRINTABLE,              /*  r */
136         PRINTABLE,              /*  s */
137         PRINTABLE,              /*  t */
138         PRINTABLE,              /*  u */
139         PRINTABLE,              /*  v */
140         PRINTABLE,              /*  w */
141         PRINTABLE,              /*  x */
142         PRINTABLE,              /*  y */
143         PRINTABLE,              /*  z */
144         PRINTABLE,              /*  { */
145         PRINTABLE,              /*  | */
146         PRINTABLE,              /*  } */
147         PRINTABLE,              /*  ~ */
148         IGNORE,                 /*  ^? */
149 };
150
151 static int lex_unget_c;
152
153 static inline int
154 lex_get(void)
155 {
156         int     c;
157         if (lex_unget_c) {
158                 c = lex_unget_c;
159                 lex_unget_c = 0;
160         } else {
161                 c = ao_scheme_getc();
162         }
163         return c;
164 }
165
166 static inline void
167 lex_unget(int c)
168 {
169         if (c != EOF)
170                 lex_unget_c = c;
171 }
172
173 static uint16_t lex_class;
174
175 static int
176 lexc(void)
177 {
178         int     c;
179         do {
180                 c = lex_get();
181                 if (c == EOF) {
182                         c = 0;
183                         lex_class = ENDOFFILE;
184                 } else {
185                         c &= 0x7f;
186                         lex_class = lex_classes[c];
187                 }
188         } while (lex_class & IGNORE);
189         return c;
190 }
191
192 static int
193 lex_quoted(void)
194 {
195         int     c;
196         int     v;
197         int     count;
198
199         c = lex_get();
200         if (c == EOF) {
201                 lex_class = ENDOFFILE;
202                 return 0;
203         }
204         lex_class = 0;
205         c &= 0x7f;
206         switch (c) {
207         case 'n':
208                 return '\n';
209         case 'f':
210                 return '\f';
211         case 'b':
212                 return '\b';
213         case 'r':
214                 return '\r';
215         case 'v':
216                 return '\v';
217         case 't':
218                 return '\t';
219         case '0':
220         case '1':
221         case '2':
222         case '3':
223         case '4':
224         case '5':
225         case '6':
226         case '7':
227                 v = c - '0';
228                 count = 1;
229                 while (count <= 3) {
230                         c = lex_get();
231                         if (c == EOF)
232                                 return EOF;
233                         c &= 0x7f;
234                         if (c < '0' || '7' < c) {
235                                 lex_unget(c);
236                                 break;
237                         }
238                         v = (v << 3) + c - '0';
239                         ++count;
240                 }
241                 return v;
242         default:
243                 return c;
244         }
245 }
246
247 #ifndef AO_SCHEME_TOKEN_MAX
248 #define AO_SCHEME_TOKEN_MAX     128
249 #endif
250
251 static char     token_string[AO_SCHEME_TOKEN_MAX];
252 static int32_t  token_int;
253 static int      token_len;
254
255 static inline void add_token(int c) {
256         if (c && token_len < AO_SCHEME_TOKEN_MAX - 1)
257                 token_string[token_len++] = c;
258 }
259
260 static inline void del_token(void) {
261         if (token_len > 0)
262                 token_len--;
263 }
264
265 static inline void end_token(void) {
266         token_string[token_len] = '\0';
267 }
268
269 #ifdef AO_SCHEME_FEATURE_FLOAT
270 static float    token_float;
271
272 struct namedfloat {
273         const char      *name;
274         float           value;
275 };
276
277 static const struct namedfloat namedfloats[] = {
278         { .name = "+inf.0", .value = INFINITY },
279         { .name = "-inf.0", .value = -INFINITY },
280         { .name = "+nan.0", .value = NAN },
281         { .name = "-nan.0", .value = NAN },
282 };
283
284 #define NUM_NAMED_FLOATS        (sizeof namedfloats / sizeof namedfloats[0])
285 #endif
286
287 static int
288 _lex(void)
289 {
290         int     c;
291
292         token_len = 0;
293         for (;;) {
294                 c = lexc();
295                 if (lex_class & ENDOFFILE)
296                         return END;
297
298                 if (lex_class & WHITE)
299                         continue;
300
301                 if (lex_class & COMMENT) {
302                         while ((c = lexc()) != '\n') {
303                                 if (lex_class & ENDOFFILE)
304                                         return END;
305                         }
306                         continue;
307                 }
308
309                 if (lex_class & (SPECIAL|DOTC)) {
310                         add_token(c);
311                         end_token();
312                         switch (c) {
313                         case '(':
314                         case '[':
315                                 return OPEN;
316                         case ')':
317                         case ']':
318                                 return CLOSE;
319                         case '\'':
320                                 return QUOTE;
321                         case '.':
322                                 return DOT;
323 #ifdef AO_SCHEME_FEATURE_QUASI
324                         case '`':
325                                 return QUASIQUOTE;
326                         case ',':
327                                 c = lexc();
328                                 if (c == '@') {
329                                         add_token(c);
330                                         end_token();
331                                         return UNQUOTE_SPLICING;
332                                 } else {
333                                         lex_unget(c);
334                                         return UNQUOTE;
335                                 }
336 #endif
337                         }
338                 }
339                 if (lex_class & POUND) {
340                         c = lexc();
341                         switch (c) {
342                         case 't':
343                                 add_token(c);
344                                 end_token();
345                                 return BOOL;
346                         case 'f':
347                                 add_token(c);
348                                 end_token();
349                                 return BOOL;
350 #ifdef AO_SCHEME_FEATURE_VECTOR
351                         case '(':
352                                 return OPEN_VECTOR;
353 #endif
354                         case '\\':
355                                 for (;;) {
356                                         int alphabetic;
357                                         c = lexc();
358                                         alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
359                                         if (token_len == 0) {
360                                                 add_token(c);
361                                                 if (!alphabetic)
362                                                         break;
363                                         } else {
364                                                 if (alphabetic)
365                                                         add_token(c);
366                                                 else {
367                                                         lex_unget(c);
368                                                         break;
369                                                 }
370                                         }
371                                 }
372                                 end_token();
373                                 if (token_len == 1)
374                                         token_int = token_string[0];
375                                 else if (!strcmp(token_string, "space"))
376                                         token_int = ' ';
377                                 else if (!strcmp(token_string, "newline"))
378                                         token_int = '\n';
379                                 else if (!strcmp(token_string, "tab"))
380                                         token_int = '\t';
381                                 else if (!strcmp(token_string, "return"))
382                                         token_int = '\r';
383                                 else if (!strcmp(token_string, "formfeed"))
384                                         token_int = '\f';
385                                 else {
386                                         ao_scheme_error(AO_SCHEME_INVALID, "invalid character token #\\%s", token_string);
387                                         continue;
388                                 }
389                                 return NUM;
390                         }
391                 }
392                 if (lex_class & STRINGC) {
393                         for (;;) {
394                                 c = lexc();
395                                 if (lex_class & BACKSLASH)
396                                         c = lex_quoted();
397                                 if (lex_class & (STRINGC|ENDOFFILE)) {
398                                         end_token();
399                                         return STRING;
400                                 }
401                                 add_token(c);
402                         }
403                 }
404                 if (lex_class & PRINTABLE) {
405 #ifdef AO_SCHEME_FEATURE_FLOAT
406                         int     isfloat = 1;
407                         int     epos = 0;
408 #endif
409                         int     hasdigit = 0;
410                         int     isneg = 0;
411                         int     isint = 1;
412
413                         token_int = 0;
414                         for (;;) {
415                                 if (!(lex_class & NUMBER)) {
416                                         isint = 0;
417 #ifdef AO_SCHEME_FEATURE_FLOAT
418                                         isfloat = 0;
419 #endif
420                                 } else {
421 #ifdef AO_SCHEME_FEATURE_FLOAT
422                                         if (!(lex_class & INTEGER))
423                                                 isint = 0;
424                                         if (token_len != epos &&
425                                             (lex_class & SIGN))
426                                         {
427                                                 isint = 0;
428                                                 isfloat = 0;
429                                         }
430 #endif
431                                         if (c == '-')
432                                                 isneg = 1;
433 #ifdef AO_SCHEME_FEATURE_FLOAT
434                                         if (c == '.' && epos != 0)
435                                                 isfloat = 0;
436                                         if (c == 'e' || c == 'E') {
437                                                 if (token_len == 0)
438                                                         isfloat = 0;
439                                                 else
440                                                         epos = token_len + 1;
441                                         }
442 #endif
443                                         if (lex_class & DIGIT) {
444                                                 hasdigit = 1;
445                                                 if (isint)
446                                                         token_int = token_int * 10 + c - '0';
447                                         }
448                                 }
449                                 add_token (c);
450                                 c = lexc ();
451                                 if ((lex_class & (NOTNAME))
452 #ifdef AO_SCHEME_FEATURE_FLOAT
453                                     && (c != '.' || !isfloat)
454 #endif
455                                         ) {
456 #ifdef AO_SCHEME_FEATURE_FLOAT
457                                         unsigned int u;
458 #endif
459 //                                      if (lex_class & ENDOFFILE)
460 //                                              clearerr (f);
461                                         lex_unget(c);
462                                         end_token ();
463                                         if (isint && hasdigit) {
464                                                 if (isneg)
465                                                         token_int = -token_int;
466                                                 return NUM;
467                                         }
468 #ifdef AO_SCHEME_FEATURE_FLOAT
469                                         if (isfloat && hasdigit) {
470                                                 token_float = strtof(token_string, NULL);
471                                                 return FLOAT;
472                                         }
473                                         for (u = 0; u < NUM_NAMED_FLOATS; u++)
474                                                 if (!strcmp(namedfloats[u].name, token_string)) {
475                                                         token_float = namedfloats[u].value;
476                                                         return FLOAT;
477                                                 }
478 #endif
479                                         return NAME;
480                                 }
481                         }
482                 }
483         }
484 }
485
486 static inline int lex(void)
487 {
488         int     parse_token = _lex();
489         RDBGI("token %d (%s)\n", parse_token, token_string);
490         return parse_token;
491 }
492
493 static int parse_token;
494
495 int                     ao_scheme_read_list;
496 struct ao_scheme_cons   *ao_scheme_read_cons;
497 struct ao_scheme_cons   *ao_scheme_read_cons_tail;
498 struct ao_scheme_cons   *ao_scheme_read_stack;
499 static int              ao_scheme_read_state;
500
501 #define READ_IN_QUOTE   0x01
502 #define READ_SAW_DOT    0x02
503 #define READ_DONE_DOT   0x04
504 #define READ_SAW_VECTOR 0x08
505
506 static int
507 push_read_stack(int read_state)
508 {
509         RDBGI("push read stack %p 0x%x\n", ao_scheme_read_cons, read_state);
510         RDBG_IN();
511         if (ao_scheme_read_list) {
512                 ao_scheme_read_stack = ao_scheme_cons_cons(ao_scheme_cons_poly(ao_scheme_read_cons),
513                                                        ao_scheme__cons(ao_scheme_int_poly(read_state),
514                                                                      ao_scheme_cons_poly(ao_scheme_read_stack)));
515                 if (!ao_scheme_read_stack)
516                         return 0;
517         } else
518                 ao_scheme_read_state = read_state;
519         ao_scheme_read_cons = NULL;
520         ao_scheme_read_cons_tail = NULL;
521         return 1;
522 }
523
524 static int
525 pop_read_stack(void)
526 {
527         int     read_state = 0;
528         if (ao_scheme_read_list) {
529                 ao_scheme_read_cons = ao_scheme_poly_cons(ao_scheme_read_stack->car);
530                 ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
531                 read_state = ao_scheme_poly_int(ao_scheme_read_stack->car);
532                 ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
533                 for (ao_scheme_read_cons_tail = ao_scheme_read_cons;
534                      ao_scheme_read_cons_tail && ao_scheme_read_cons_tail->cdr;
535                      ao_scheme_read_cons_tail = ao_scheme_poly_cons(ao_scheme_read_cons_tail->cdr))
536                         ;
537         } else {
538                 ao_scheme_read_cons = 0;
539                 ao_scheme_read_cons_tail = 0;
540                 ao_scheme_read_stack = 0;
541                 read_state = ao_scheme_read_state;
542         }
543         RDBG_OUT();
544         RDBGI("pop read stack %p %d\n", ao_scheme_read_cons, read_state);
545         return read_state;
546 }
547
548 #ifdef AO_SCHEME_FEATURE_VECTOR
549 #define is_open(t) ((t) == OPEN || (t) == OPEN_VECTOR)
550 #else
551 #define is_open(t) ((t) == OPEN)
552 #endif
553
554 ao_poly
555 ao_scheme_read(void)
556 {
557         struct ao_scheme_atom   *atom;
558         struct ao_scheme_string *string;
559         int                     read_state;
560         ao_poly                 v = AO_SCHEME_NIL;
561
562         ao_scheme_read_list = 0;
563         read_state = 0;
564         ao_scheme_read_cons = ao_scheme_read_cons_tail = ao_scheme_read_stack = 0;
565         for (;;) {
566                 parse_token = lex();
567                 while (is_open(parse_token)) {
568 #ifdef AO_SCHEME_FEATURE_VECTOR
569                         if (parse_token == OPEN_VECTOR)
570                                 read_state |= READ_SAW_VECTOR;
571 #endif
572                         if (!push_read_stack(read_state))
573                                 return AO_SCHEME_NIL;
574                         ao_scheme_read_list++;
575                         read_state = 0;
576                         parse_token = lex();
577                 }
578
579                 switch (parse_token) {
580                 case END:
581                 default:
582                         if (ao_scheme_read_list)
583                                 ao_scheme_error(AO_SCHEME_EOF, "unexpected end of file");
584                         return _ao_scheme_atom_eof;
585                         break;
586                 case NAME:
587                         atom = ao_scheme_atom_intern(token_string);
588                         if (atom)
589                                 v = ao_scheme_atom_poly(atom);
590                         else
591                                 v = AO_SCHEME_NIL;
592                         break;
593                 case NUM:
594                         v = ao_scheme_integer_poly(token_int);
595                         break;
596 #ifdef AO_SCHEME_FEATURE_FLOAT
597                 case FLOAT:
598                         v = ao_scheme_float_get(token_float);
599                         break;
600 #endif
601                 case BOOL:
602                         if (token_string[0] == 't')
603                                 v = _ao_scheme_bool_true;
604                         else
605                                 v = _ao_scheme_bool_false;
606                         break;
607                 case STRING:
608                         string = ao_scheme_string_make(token_string);
609                         if (string)
610                                 v = ao_scheme_string_poly(string);
611                         else
612                                 v = AO_SCHEME_NIL;
613                         break;
614                 case QUOTE:
615 #ifdef AO_SCHEME_FEATURE_QUASI
616                 case QUASIQUOTE:
617                 case UNQUOTE:
618                 case UNQUOTE_SPLICING:
619 #endif
620                         if (!push_read_stack(read_state))
621                                 return AO_SCHEME_NIL;
622                         ao_scheme_read_list++;
623                         read_state = READ_IN_QUOTE;
624                         switch (parse_token) {
625                         case QUOTE:
626                                 v = _ao_scheme_atom_quote;
627                                 break;
628 #ifdef AO_SCHEME_FEATURE_QUASI
629                         case QUASIQUOTE:
630                                 v = _ao_scheme_atom_quasiquote;
631                                 break;
632                         case UNQUOTE:
633                                 v = _ao_scheme_atom_unquote;
634                                 break;
635                         case UNQUOTE_SPLICING:
636                                 v = _ao_scheme_atom_unquote2dsplicing;
637                                 break;
638 #endif
639                         }
640                         break;
641                 case CLOSE:
642                         if (!ao_scheme_read_list) {
643                                 v = AO_SCHEME_NIL;
644                                 break;
645                         }
646                         v = ao_scheme_cons_poly(ao_scheme_read_cons);
647                         --ao_scheme_read_list;
648                         read_state = pop_read_stack();
649 #ifdef AO_SCHEME_FEATURE_VECTOR
650                         if (read_state & READ_SAW_VECTOR)
651                                 v = ao_scheme_vector_poly(ao_scheme_list_to_vector(ao_scheme_poly_cons(v)));
652 #endif
653                         break;
654                 case DOT:
655                         if (!ao_scheme_read_list) {
656                                 ao_scheme_error(AO_SCHEME_INVALID, ". outside of cons");
657                                 return AO_SCHEME_NIL;
658                         }
659                         if (!ao_scheme_read_cons) {
660                                 ao_scheme_error(AO_SCHEME_INVALID, ". first in cons");
661                                 return AO_SCHEME_NIL;
662                         }
663                         read_state |= READ_SAW_DOT;
664                         continue;
665                 }
666
667                 /* loop over QUOTE ends */
668                 for (;;) {
669                         if (!ao_scheme_read_list)
670                                 return v;
671
672                         if (read_state & READ_DONE_DOT) {
673                                 ao_scheme_error(AO_SCHEME_INVALID, ". not last in cons");
674                                 return AO_SCHEME_NIL;
675                         }
676
677                         if (read_state & READ_SAW_DOT) {
678                                 read_state |= READ_DONE_DOT;
679                                 ao_scheme_read_cons_tail->cdr = v;
680                         } else {
681                                 struct ao_scheme_cons   *read = ao_scheme_cons_cons(v, AO_SCHEME_NIL);
682                                 if (!read)
683                                         return AO_SCHEME_NIL;
684
685                                 if (ao_scheme_read_cons_tail)
686                                         ao_scheme_read_cons_tail->cdr = ao_scheme_cons_poly(read);
687                                 else
688                                         ao_scheme_read_cons = read;
689                                 ao_scheme_read_cons_tail = read;
690                         }
691
692                         if (!(read_state & READ_IN_QUOTE) || !ao_scheme_read_cons->cdr)
693                                 break;
694
695                         v = ao_scheme_cons_poly(ao_scheme_read_cons);
696                         --ao_scheme_read_list;
697                         read_state = pop_read_stack();
698                 }
699         }
700         return v;
701 }