update Releasing with changes discovered in 1.8.3 release process
[fw/altos] / src / scheme / ao_scheme_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_scheme.h"
16 #include "ao_scheme_read.h"
17 #include <math.h>
18 #include <stdlib.h>
19
20 static const uint16_t   lex_classes[128] = {
21         IGNORE,         /* ^@ */
22         IGNORE,         /* ^A */
23         IGNORE,         /* ^B */
24         IGNORE,         /* ^C */
25         IGNORE,         /* ^D */
26         IGNORE,         /* ^E */
27         IGNORE,         /* ^F */
28         IGNORE,         /* ^G */
29         IGNORE,         /* ^H */
30         WHITE,          /* ^I */
31         WHITE,          /* ^J */
32         WHITE,          /* ^K */
33         WHITE,          /* ^L */
34         WHITE,          /* ^M */
35         IGNORE,         /* ^N */
36         IGNORE,         /* ^O */
37         IGNORE,         /* ^P */
38         IGNORE,         /* ^Q */
39         IGNORE,         /* ^R */
40         IGNORE,         /* ^S */
41         IGNORE,         /* ^T */
42         IGNORE,         /* ^U */
43         IGNORE,         /* ^V */
44         IGNORE,         /* ^W */
45         IGNORE,         /* ^X */
46         IGNORE,         /* ^Y */
47         IGNORE,         /* ^Z */
48         IGNORE,         /* ^[ */
49         IGNORE,         /* ^\ */
50         IGNORE,         /* ^] */
51         IGNORE,         /* ^^ */
52         IGNORE,         /* ^_ */
53         PRINTABLE|WHITE,        /*    */
54         PRINTABLE,              /* ! */
55         PRINTABLE|STRINGC,      /* " */
56         PRINTABLE|POUND,        /* # */
57         PRINTABLE,              /* $ */
58         PRINTABLE,              /* % */
59         PRINTABLE,              /* & */
60         PRINTABLE|SPECIAL,      /* ' */
61         PRINTABLE|SPECIAL,      /* ( */
62         PRINTABLE|SPECIAL,      /* ) */
63         PRINTABLE,              /* * */
64         PRINTABLE|SIGN,         /* + */
65         PRINTABLE|SPECIAL,      /* , */
66         PRINTABLE|SIGN,         /* - */
67         PRINTABLE|DOTC|FLOATC,  /* . */
68         PRINTABLE,              /* / */
69         PRINTABLE|DIGIT,        /* 0 */
70         PRINTABLE|DIGIT,        /* 1 */
71         PRINTABLE|DIGIT,        /* 2 */
72         PRINTABLE|DIGIT,        /* 3 */
73         PRINTABLE|DIGIT,        /* 4 */
74         PRINTABLE|DIGIT,        /* 5 */
75         PRINTABLE|DIGIT,        /* 6 */
76         PRINTABLE|DIGIT,        /* 7 */
77         PRINTABLE|DIGIT,        /* 8 */
78         PRINTABLE|DIGIT,        /* 9 */
79         PRINTABLE,              /* : */
80         PRINTABLE|COMMENT,      /* ; */
81         PRINTABLE,              /* < */
82         PRINTABLE,              /* = */
83         PRINTABLE,              /* > */
84         PRINTABLE,              /* ? */
85         PRINTABLE,              /*  @ */
86         PRINTABLE,              /*  A */
87         PRINTABLE,              /*  B */
88         PRINTABLE,              /*  C */
89         PRINTABLE,              /*  D */
90         PRINTABLE|FLOATC,       /*  E */
91         PRINTABLE,              /*  F */
92         PRINTABLE,              /*  G */
93         PRINTABLE,              /*  H */
94         PRINTABLE,              /*  I */
95         PRINTABLE,              /*  J */
96         PRINTABLE,              /*  K */
97         PRINTABLE,              /*  L */
98         PRINTABLE,              /*  M */
99         PRINTABLE,              /*  N */
100         PRINTABLE,              /*  O */
101         PRINTABLE,              /*  P */
102         PRINTABLE,              /*  Q */
103         PRINTABLE,              /*  R */
104         PRINTABLE,              /*  S */
105         PRINTABLE,              /*  T */
106         PRINTABLE,              /*  U */
107         PRINTABLE,              /*  V */
108         PRINTABLE,              /*  W */
109         PRINTABLE,              /*  X */
110         PRINTABLE,              /*  Y */
111         PRINTABLE,              /*  Z */
112         PRINTABLE,              /*  [ */
113         PRINTABLE|BACKSLASH,    /*  \ */
114         PRINTABLE,              /*  ] */
115         PRINTABLE,              /*  ^ */
116         PRINTABLE,              /*  _ */
117         PRINTABLE|SPECIAL,      /*  ` */
118         PRINTABLE,              /*  a */
119         PRINTABLE,              /*  b */
120         PRINTABLE,              /*  c */
121         PRINTABLE,              /*  d */
122         PRINTABLE|FLOATC,       /*  e */
123         PRINTABLE,              /*  f */
124         PRINTABLE,              /*  g */
125         PRINTABLE,              /*  h */
126         PRINTABLE,              /*  i */
127         PRINTABLE,              /*  j */
128         PRINTABLE,              /*  k */
129         PRINTABLE,              /*  l */
130         PRINTABLE,              /*  m */
131         PRINTABLE,              /*  n */
132         PRINTABLE,              /*  o */
133         PRINTABLE,              /*  p */
134         PRINTABLE,              /*  q */
135         PRINTABLE,              /*  r */
136         PRINTABLE,              /*  s */
137         PRINTABLE,              /*  t */
138         PRINTABLE,              /*  u */
139         PRINTABLE,              /*  v */
140         PRINTABLE,              /*  w */
141         PRINTABLE,              /*  x */
142         PRINTABLE,              /*  y */
143         PRINTABLE,              /*  z */
144         PRINTABLE,              /*  { */
145         PRINTABLE,              /*  | */
146         PRINTABLE,              /*  } */
147         PRINTABLE,              /*  ~ */
148         IGNORE,                 /*  ^? */
149 };
150
151 static int lex_unget_c;
152
153 static inline int
154 lex_get(void)
155 {
156         int     c;
157         if (lex_unget_c) {
158                 c = lex_unget_c;
159                 lex_unget_c = 0;
160         } else {
161                 c = ao_scheme_getc();
162         }
163         return c;
164 }
165
166 static inline void
167 lex_unget(int c)
168 {
169         if (c != EOF)
170                 lex_unget_c = c;
171 }
172
173 static uint16_t lex_class;
174
175 static int
176 lexc(void)
177 {
178         int     c;
179         do {
180                 c = lex_get();
181                 if (c == EOF) {
182                         c = 0;
183                         lex_class = ENDOFFILE;
184                 } else {
185                         c &= 0x7f;
186                         lex_class = lex_classes[c];
187                 }
188         } while (lex_class & IGNORE);
189         return c;
190 }
191
192 static int
193 lex_quoted(void)
194 {
195         int     c;
196         int     v;
197         int     count;
198
199         c = lex_get();
200         if (c == EOF) {
201                 lex_class = ENDOFFILE;
202                 return 0;
203         }
204         lex_class = 0;
205         c &= 0x7f;
206         switch (c) {
207         case 'n':
208                 return '\n';
209         case 'f':
210                 return '\f';
211         case 'b':
212                 return '\b';
213         case 'r':
214                 return '\r';
215         case 'v':
216                 return '\v';
217         case 't':
218                 return '\t';
219         case '0':
220         case '1':
221         case '2':
222         case '3':
223         case '4':
224         case '5':
225         case '6':
226         case '7':
227                 v = c - '0';
228                 count = 1;
229                 while (count <= 3) {
230                         c = lex_get();
231                         if (c == EOF)
232                                 return EOF;
233                         c &= 0x7f;
234                         if (c < '0' || '7' < c) {
235                                 lex_unget(c);
236                                 break;
237                         }
238                         v = (v << 3) + c - '0';
239                         ++count;
240                 }
241                 return v;
242         default:
243                 return c;
244         }
245 }
246
247 #define AO_SCHEME_TOKEN_MAX     128
248
249 static char     token_string[AO_SCHEME_TOKEN_MAX];
250 static int32_t  token_int;
251 static int      token_len;
252 static float    token_float;
253
254 static inline void add_token(int c) {
255         if (c && token_len < AO_SCHEME_TOKEN_MAX - 1)
256                 token_string[token_len++] = c;
257 }
258
259 static inline void del_token(void) {
260         if (token_len > 0)
261                 token_len--;
262 }
263
264 static inline void end_token(void) {
265         token_string[token_len] = '\0';
266 }
267
268 struct namedfloat {
269         const char      *name;
270         float           value;
271 };
272
273 static const struct namedfloat namedfloats[] = {
274         { .name = "+inf.0", .value = INFINITY },
275         { .name = "-inf.0", .value = -INFINITY },
276         { .name = "+nan.0", .value = NAN },
277         { .name = "-nan.0", .value = NAN },
278 };
279
280 #define NUM_NAMED_FLOATS        (sizeof namedfloats / sizeof namedfloats[0])
281
282 static int
283 _lex(void)
284 {
285         int     c;
286
287         token_len = 0;
288         for (;;) {
289                 c = lexc();
290                 if (lex_class & ENDOFFILE)
291                         return END;
292
293                 if (lex_class & WHITE)
294                         continue;
295
296                 if (lex_class & COMMENT) {
297                         while ((c = lexc()) != '\n') {
298                                 if (lex_class & ENDOFFILE)
299                                         return END;
300                         }
301                         continue;
302                 }
303
304                 if (lex_class & (SPECIAL|DOTC)) {
305                         add_token(c);
306                         end_token();
307                         switch (c) {
308                         case '(':
309                         case '[':
310                                 return OPEN;
311                         case ')':
312                         case ']':
313                                 return CLOSE;
314                         case '\'':
315                                 return QUOTE;
316                         case '.':
317                                 return DOT;
318                         case '`':
319                                 return QUASIQUOTE;
320                         case ',':
321                                 c = lexc();
322                                 if (c == '@') {
323                                         add_token(c);
324                                         end_token();
325                                         return UNQUOTE_SPLICING;
326                                 } else {
327                                         lex_unget(c);
328                                         return UNQUOTE;
329                                 }
330                         }
331                 }
332                 if (lex_class & POUND) {
333                         c = lexc();
334                         switch (c) {
335                         case 't':
336                                 add_token(c);
337                                 end_token();
338                                 return BOOL;
339                         case 'f':
340                                 add_token(c);
341                                 end_token();
342                                 return BOOL;
343                         case '(':
344                                 return OPEN_VECTOR;
345                         case '\\':
346                                 for (;;) {
347                                         int alphabetic;
348                                         c = lexc();
349                                         alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
350                                         if (token_len == 0) {
351                                                 add_token(c);
352                                                 if (!alphabetic)
353                                                         break;
354                                         } else {
355                                                 if (alphabetic)
356                                                         add_token(c);
357                                                 else {
358                                                         lex_unget(c);
359                                                         break;
360                                                 }
361                                         }
362                                 }
363                                 end_token();
364                                 if (token_len == 1)
365                                         token_int = token_string[0];
366                                 else if (!strcmp(token_string, "space"))
367                                         token_int = ' ';
368                                 else if (!strcmp(token_string, "newline"))
369                                         token_int = '\n';
370                                 else if (!strcmp(token_string, "tab"))
371                                         token_int = '\t';
372                                 else if (!strcmp(token_string, "return"))
373                                         token_int = '\r';
374                                 else if (!strcmp(token_string, "formfeed"))
375                                         token_int = '\f';
376                                 else {
377                                         ao_scheme_error(AO_SCHEME_INVALID, "invalid character token #\\%s", token_string);
378                                         continue;
379                                 }
380                                 return NUM;
381                         }
382                 }
383                 if (lex_class & STRINGC) {
384                         for (;;) {
385                                 c = lexc();
386                                 if (lex_class & BACKSLASH)
387                                         c = lex_quoted();
388                                 if (lex_class & (STRINGC|ENDOFFILE)) {
389                                         end_token();
390                                         return STRING;
391                                 }
392                                 add_token(c);
393                         }
394                 }
395                 if (lex_class & PRINTABLE) {
396                         int     isfloat;
397                         int     hasdigit;
398                         int     isneg;
399                         int     isint;
400                         int     epos;
401
402                         isfloat = 1;
403                         isint = 1;
404                         hasdigit = 0;
405                         token_int = 0;
406                         isneg = 0;
407                         epos = 0;
408                         for (;;) {
409                                 if (!(lex_class & NUMBER)) {
410                                         isint = 0;
411                                         isfloat = 0;
412                                 } else {
413                                         if (!(lex_class & INTEGER))
414                                                 isint = 0;
415                                         if (token_len != epos &&
416                                             (lex_class & SIGN))
417                                         {
418                                                 isint = 0;
419                                                 isfloat = 0;
420                                         }
421                                         if (c == '-')
422                                                 isneg = 1;
423                                         if (c == '.' && epos != 0)
424                                                 isfloat = 0;
425                                         if (c == 'e' || c == 'E') {
426                                                 if (token_len == 0)
427                                                         isfloat = 0;
428                                                 else
429                                                         epos = token_len + 1;
430                                         }
431                                         if (lex_class & DIGIT) {
432                                                 hasdigit = 1;
433                                                 if (isint)
434                                                         token_int = token_int * 10 + c - '0';
435                                         }
436                                 }
437                                 add_token (c);
438                                 c = lexc ();
439                                 if ((lex_class & (NOTNAME)) && (c != '.' || !isfloat)) {
440                                         unsigned int u;
441 //                                      if (lex_class & ENDOFFILE)
442 //                                              clearerr (f);
443                                         lex_unget(c);
444                                         end_token ();
445                                         if (isint && hasdigit) {
446                                                 if (isneg)
447                                                         token_int = -token_int;
448                                                 return NUM;
449                                         }
450                                         if (isfloat && hasdigit) {
451                                                 token_float = strtof(token_string, NULL);
452                                                 return FLOAT;
453                                         }
454                                         for (u = 0; u < NUM_NAMED_FLOATS; u++)
455                                                 if (!strcmp(namedfloats[u].name, token_string)) {
456                                                         token_float = namedfloats[u].value;
457                                                         return FLOAT;
458                                                 }
459                                         return NAME;
460                                 }
461                         }
462                 }
463         }
464 }
465
466 static inline int lex(void)
467 {
468         int     parse_token = _lex();
469         RDBGI("token %d (%s)\n", parse_token, token_string);
470         return parse_token;
471 }
472
473 static int parse_token;
474
475 int                     ao_scheme_read_list;
476 struct ao_scheme_cons   *ao_scheme_read_cons;
477 struct ao_scheme_cons   *ao_scheme_read_cons_tail;
478 struct ao_scheme_cons   *ao_scheme_read_stack;
479 static int              ao_scheme_read_state;
480
481 #define READ_IN_QUOTE   0x01
482 #define READ_SAW_DOT    0x02
483 #define READ_DONE_DOT   0x04
484 #define READ_SAW_VECTOR 0x08
485
486 static int
487 push_read_stack(int read_state)
488 {
489         RDBGI("push read stack %p 0x%x\n", ao_scheme_read_cons, read_state);
490         RDBG_IN();
491         if (ao_scheme_read_list) {
492                 ao_scheme_read_stack = ao_scheme_cons_cons(ao_scheme_cons_poly(ao_scheme_read_cons),
493                                                        ao_scheme__cons(ao_scheme_int_poly(read_state),
494                                                                      ao_scheme_cons_poly(ao_scheme_read_stack)));
495                 if (!ao_scheme_read_stack)
496                         return 0;
497         } else
498                 ao_scheme_read_state = read_state;
499         ao_scheme_read_cons = NULL;
500         ao_scheme_read_cons_tail = NULL;
501         return 1;
502 }
503
504 static int
505 pop_read_stack(void)
506 {
507         int     read_state = 0;
508         if (ao_scheme_read_list) {
509                 ao_scheme_read_cons = ao_scheme_poly_cons(ao_scheme_read_stack->car);
510                 ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
511                 read_state = ao_scheme_poly_int(ao_scheme_read_stack->car);
512                 ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
513                 for (ao_scheme_read_cons_tail = ao_scheme_read_cons;
514                      ao_scheme_read_cons_tail && ao_scheme_read_cons_tail->cdr;
515                      ao_scheme_read_cons_tail = ao_scheme_poly_cons(ao_scheme_read_cons_tail->cdr))
516                         ;
517         } else {
518                 ao_scheme_read_cons = 0;
519                 ao_scheme_read_cons_tail = 0;
520                 ao_scheme_read_stack = 0;
521                 read_state = ao_scheme_read_state;
522         }
523         RDBG_OUT();
524         RDBGI("pop read stack %p %d\n", ao_scheme_read_cons, read_state);
525         return read_state;
526 }
527
528 ao_poly
529 ao_scheme_read(void)
530 {
531         struct ao_scheme_atom   *atom;
532         char                    *string;
533         int                     read_state;
534         ao_poly                 v = AO_SCHEME_NIL;
535
536         ao_scheme_read_list = 0;
537         read_state = 0;
538         ao_scheme_read_cons = ao_scheme_read_cons_tail = ao_scheme_read_stack = 0;
539         for (;;) {
540                 parse_token = lex();
541                 while (parse_token == OPEN || parse_token == OPEN_VECTOR) {
542                         if (parse_token == OPEN_VECTOR)
543                                 read_state |= READ_SAW_VECTOR;
544                         if (!push_read_stack(read_state))
545                                 return AO_SCHEME_NIL;
546                         ao_scheme_read_list++;
547                         read_state = 0;
548                         parse_token = lex();
549                 }
550
551                 switch (parse_token) {
552                 case END:
553                 default:
554                         if (ao_scheme_read_list)
555                                 ao_scheme_error(AO_SCHEME_EOF, "unexpected end of file");
556                         return _ao_scheme_atom_eof;
557                         break;
558                 case NAME:
559                         atom = ao_scheme_atom_intern(token_string);
560                         if (atom)
561                                 v = ao_scheme_atom_poly(atom);
562                         else
563                                 v = AO_SCHEME_NIL;
564                         break;
565                 case NUM:
566                         v = ao_scheme_integer_poly(token_int);
567                         break;
568                 case FLOAT:
569                         v = ao_scheme_float_get(token_float);
570                         break;
571                 case BOOL:
572                         if (token_string[0] == 't')
573                                 v = _ao_scheme_bool_true;
574                         else
575                                 v = _ao_scheme_bool_false;
576                         break;
577                 case STRING:
578                         string = ao_scheme_string_copy(token_string);
579                         if (string)
580                                 v = ao_scheme_string_poly(string);
581                         else
582                                 v = AO_SCHEME_NIL;
583                         break;
584                 case QUOTE:
585                 case QUASIQUOTE:
586                 case UNQUOTE:
587                 case UNQUOTE_SPLICING:
588                         if (!push_read_stack(read_state))
589                                 return AO_SCHEME_NIL;
590                         ao_scheme_read_list++;
591                         read_state = READ_IN_QUOTE;
592                         switch (parse_token) {
593                         case QUOTE:
594                                 v = _ao_scheme_atom_quote;
595                                 break;
596                         case QUASIQUOTE:
597                                 v = _ao_scheme_atom_quasiquote;
598                                 break;
599                         case UNQUOTE:
600                                 v = _ao_scheme_atom_unquote;
601                                 break;
602                         case UNQUOTE_SPLICING:
603                                 v = _ao_scheme_atom_unquote2dsplicing;
604                                 break;
605                         }
606                         break;
607                 case CLOSE:
608                         if (!ao_scheme_read_list) {
609                                 v = AO_SCHEME_NIL;
610                                 break;
611                         }
612                         v = ao_scheme_cons_poly(ao_scheme_read_cons);
613                         --ao_scheme_read_list;
614                         read_state = pop_read_stack();
615                         if (read_state & READ_SAW_VECTOR)
616                                 v = ao_scheme_vector_poly(ao_scheme_list_to_vector(ao_scheme_poly_cons(v)));
617                         break;
618                 case DOT:
619                         if (!ao_scheme_read_list) {
620                                 ao_scheme_error(AO_SCHEME_INVALID, ". outside of cons");
621                                 return AO_SCHEME_NIL;
622                         }
623                         if (!ao_scheme_read_cons) {
624                                 ao_scheme_error(AO_SCHEME_INVALID, ". first in cons");
625                                 return AO_SCHEME_NIL;
626                         }
627                         read_state |= READ_SAW_DOT;
628                         continue;
629                 }
630
631                 /* loop over QUOTE ends */
632                 for (;;) {
633                         if (!ao_scheme_read_list)
634                                 return v;
635
636                         if (read_state & READ_DONE_DOT) {
637                                 ao_scheme_error(AO_SCHEME_INVALID, ". not last in cons");
638                                 return AO_SCHEME_NIL;
639                         }
640
641                         if (read_state & READ_SAW_DOT) {
642                                 read_state |= READ_DONE_DOT;
643                                 ao_scheme_read_cons_tail->cdr = v;
644                         } else {
645                                 struct ao_scheme_cons   *read = ao_scheme_cons_cons(v, AO_SCHEME_NIL);
646                                 if (!read)
647                                         return AO_SCHEME_NIL;
648
649                                 if (ao_scheme_read_cons_tail)
650                                         ao_scheme_read_cons_tail->cdr = ao_scheme_cons_poly(read);
651                                 else
652                                         ao_scheme_read_cons = read;
653                                 ao_scheme_read_cons_tail = read;
654                         }
655
656                         if (!(read_state & READ_IN_QUOTE) || !ao_scheme_read_cons->cdr)
657                                 break;
658
659                         v = ao_scheme_cons_poly(ao_scheme_read_cons);
660                         --ao_scheme_read_list;
661                         read_state = pop_read_stack();
662                 }
663         }
664         return v;
665 }