altos/lisp: Add scheme-style bools (#t and #f)
[fw/altos] / src / lisp / ao_lisp_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_lisp.h"
16 #include "ao_lisp_read.h"
17
18 static const uint16_t   lex_classes[128] = {
19         IGNORE,         /* ^@ */
20         IGNORE,         /* ^A */
21         IGNORE,         /* ^B */
22         IGNORE,         /* ^C */
23         IGNORE,         /* ^D */
24         IGNORE,         /* ^E */
25         IGNORE,         /* ^F */
26         IGNORE,         /* ^G */
27         IGNORE,         /* ^H */
28         WHITE,          /* ^I */
29         WHITE,          /* ^J */
30         WHITE,          /* ^K */
31         WHITE,          /* ^L */
32         WHITE,          /* ^M */
33         IGNORE,         /* ^N */
34         IGNORE,         /* ^O */
35         IGNORE,         /* ^P */
36         IGNORE,         /* ^Q */
37         IGNORE,         /* ^R */
38         IGNORE,         /* ^S */
39         IGNORE,         /* ^T */
40         IGNORE,         /* ^U */
41         IGNORE,         /* ^V */
42         IGNORE,         /* ^W */
43         IGNORE,         /* ^X */
44         IGNORE,         /* ^Y */
45         IGNORE,         /* ^Z */
46         IGNORE,         /* ^[ */
47         IGNORE,         /* ^\ */
48         IGNORE,         /* ^] */
49         IGNORE,         /* ^^ */
50         IGNORE,         /* ^_ */
51         PRINTABLE|WHITE,        /*    */
52         PRINTABLE,              /* ! */
53         PRINTABLE|STRINGC,      /* " */
54         PRINTABLE|POUND,        /* # */
55         PRINTABLE,              /* $ */
56         PRINTABLE,              /* % */
57         PRINTABLE,              /* & */
58         PRINTABLE|SPECIAL,      /* ' */
59         PRINTABLE|SPECIAL,      /* ( */
60         PRINTABLE|SPECIAL,      /* ) */
61         PRINTABLE,              /* * */
62         PRINTABLE|SIGN,         /* + */
63         PRINTABLE,              /* , */
64         PRINTABLE|SIGN,         /* - */
65         PRINTABLE|SPECIAL,      /* . */
66         PRINTABLE,              /* / */
67         PRINTABLE|DIGIT,        /* 0 */
68         PRINTABLE|DIGIT,        /* 1 */
69         PRINTABLE|DIGIT,        /* 2 */
70         PRINTABLE|DIGIT,        /* 3 */
71         PRINTABLE|DIGIT,        /* 4 */
72         PRINTABLE|DIGIT,        /* 5 */
73         PRINTABLE|DIGIT,        /* 6 */
74         PRINTABLE|DIGIT,        /* 7 */
75         PRINTABLE|DIGIT,        /* 8 */
76         PRINTABLE|DIGIT,        /* 9 */
77         PRINTABLE,              /* : */
78         PRINTABLE|COMMENT,      /* ; */
79         PRINTABLE,              /* < */
80         PRINTABLE,              /* = */
81         PRINTABLE,              /* > */
82         PRINTABLE,              /* ? */
83         PRINTABLE,              /*  @ */
84         PRINTABLE,              /*  A */
85         PRINTABLE,              /*  B */
86         PRINTABLE,              /*  C */
87         PRINTABLE,              /*  D */
88         PRINTABLE,              /*  E */
89         PRINTABLE,              /*  F */
90         PRINTABLE,              /*  G */
91         PRINTABLE,              /*  H */
92         PRINTABLE,              /*  I */
93         PRINTABLE,              /*  J */
94         PRINTABLE,              /*  K */
95         PRINTABLE,              /*  L */
96         PRINTABLE,              /*  M */
97         PRINTABLE,              /*  N */
98         PRINTABLE,              /*  O */
99         PRINTABLE,              /*  P */
100         PRINTABLE,              /*  Q */
101         PRINTABLE,              /*  R */
102         PRINTABLE,              /*  S */
103         PRINTABLE,              /*  T */
104         PRINTABLE,              /*  U */
105         PRINTABLE,              /*  V */
106         PRINTABLE,              /*  W */
107         PRINTABLE,              /*  X */
108         PRINTABLE,              /*  Y */
109         PRINTABLE,              /*  Z */
110         PRINTABLE,              /*  [ */
111         PRINTABLE|BACKSLASH,    /*  \ */
112         PRINTABLE,              /*  ] */
113         PRINTABLE,              /*  ^ */
114         PRINTABLE,              /*  _ */
115         PRINTABLE,              /*  ` */
116         PRINTABLE,              /*  a */
117         PRINTABLE,              /*  b */
118         PRINTABLE,              /*  c */
119         PRINTABLE,              /*  d */
120         PRINTABLE,              /*  e */
121         PRINTABLE,              /*  f */
122         PRINTABLE,              /*  g */
123         PRINTABLE,              /*  h */
124         PRINTABLE,              /*  i */
125         PRINTABLE,              /*  j */
126         PRINTABLE,              /*  k */
127         PRINTABLE,              /*  l */
128         PRINTABLE,              /*  m */
129         PRINTABLE,              /*  n */
130         PRINTABLE,              /*  o */
131         PRINTABLE,              /*  p */
132         PRINTABLE,              /*  q */
133         PRINTABLE,              /*  r */
134         PRINTABLE,              /*  s */
135         PRINTABLE,              /*  t */
136         PRINTABLE,              /*  u */
137         PRINTABLE,              /*  v */
138         PRINTABLE,              /*  w */
139         PRINTABLE,              /*  x */
140         PRINTABLE,              /*  y */
141         PRINTABLE,              /*  z */
142         PRINTABLE,              /*  { */
143         PRINTABLE|VBAR,         /*  | */
144         PRINTABLE,              /*  } */
145         PRINTABLE|TWIDDLE,      /*  ~ */
146         IGNORE,                 /*  ^? */
147 };
148
149 static int lex_unget_c;
150
151 static inline int
152 lex_get()
153 {
154         int     c;
155         if (lex_unget_c) {
156                 c = lex_unget_c;
157                 lex_unget_c = 0;
158         } else {
159                 c = ao_lisp_getc();
160         }
161         return c;
162 }
163
164 static inline void
165 lex_unget(int c)
166 {
167         if (c != EOF)
168                 lex_unget_c = c;
169 }
170
171 static int
172 lex_quoted (void)
173 {
174         int     c;
175         int     v;
176         int     count;
177
178         c = lex_get();
179         if (c == EOF)
180                 return EOF;
181         c &= 0x7f;
182         switch (c) {
183         case 'n':
184                 return '\n';
185         case 'f':
186                 return '\f';
187         case 'b':
188                 return '\b';
189         case 'r':
190                 return '\r';
191         case 'v':
192                 return '\v';
193         case 't':
194                 return '\t';
195         case '0':
196         case '1':
197         case '2':
198         case '3':
199         case '4':
200         case '5':
201         case '6':
202         case '7':
203                 v = c - '0';
204                 count = 1;
205                 while (count <= 3) {
206                         c = lex_get();
207                         if (c == EOF)
208                                 return EOF;
209                         c &= 0x7f;
210                         if (c < '0' || '7' < c) {
211                                 lex_unget(c);
212                                 break;
213                         }
214                         v = (v << 3) + c - '0';
215                         ++count;
216                 }
217                 return v;
218         default:
219                 return c;
220         }
221 }
222
223 static uint16_t lex_class;
224
225 static int
226 lexc(void)
227 {
228         int     c;
229         do {
230                 c = lex_get();
231                 if (c == EOF) {
232                         lex_class = ENDOFFILE;
233                         c = 0;
234                 } else {
235                         c &= 0x7f;
236                         lex_class = lex_classes[c];
237                         if (lex_class & BACKSLASH) {
238                                 c = lex_quoted();
239                                 if (c == EOF)
240                                         lex_class = ENDOFFILE;
241                                 else
242                                         lex_class = PRINTABLE;
243                         }
244                 }
245         } while (lex_class & IGNORE);
246         return c;
247 }
248
249 #define AO_LISP_TOKEN_MAX       32
250
251 static char     token_string[AO_LISP_TOKEN_MAX];
252 static int      token_int;
253 static int      token_len;
254
255 static inline void add_token(int c) {
256         if (c && token_len < AO_LISP_TOKEN_MAX - 1)
257                 token_string[token_len++] = c;
258 }
259
260 static inline void end_token(void) {
261         token_string[token_len] = '\0';
262 }
263
264 static int
265 _lex(void)
266 {
267         int     c;
268
269         token_len = 0;
270         for (;;) {
271                 c = lexc();
272                 if (lex_class & ENDOFFILE)
273                         return END;
274
275                 if (lex_class & WHITE)
276                         continue;
277
278                 if (lex_class & COMMENT) {
279                         while ((c = lexc()) != '\n') {
280                                 if (lex_class & ENDOFFILE)
281                                         return END;
282                         }
283                         continue;
284                 }
285
286                 if (lex_class & SPECIAL) {
287                         add_token(c);
288                         end_token();
289                         switch (c) {
290                         case '(':
291                         case '[':
292                                 return OPEN;
293                         case ')':
294                         case ']':
295                                 return CLOSE;
296                         case '\'':
297                                 return QUOTE;
298                         case '.':
299                                 return DOT;
300                         }
301                 }
302                 if (lex_class & TWIDDLE) {
303                         token_int = lexc();
304                         return NUM;
305                 }
306                 if (lex_class & POUND) {
307                         for (;;) {
308                                 c = lexc();
309                                 add_token(c);
310                                 switch (c) {
311                                 case 't':
312                                         return BOOL;
313                                 case 'f':
314                                         return BOOL;
315                                 }
316                         }
317                 }
318                 if (lex_class & STRINGC) {
319                         for (;;) {
320                                 c = lexc();
321                                 if (lex_class & (STRINGC|ENDOFFILE)) {
322                                         end_token();
323                                         return STRING;
324                                 }
325                                 add_token(c);
326                         }
327                 }
328                 if (lex_class & PRINTABLE) {
329                         int     isnum;
330                         int     hasdigit;
331                         int     isneg;
332
333                         isnum = 1;
334                         hasdigit = 0;
335                         token_int = 0;
336                         isneg = 0;
337                         for (;;) {
338                                 if (!(lex_class & NUMBER)) {
339                                         isnum = 0;
340                                 } else {
341                                         if (token_len != 0 &&
342                                             (lex_class & SIGN))
343                                         {
344                                                 isnum = 0;
345                                         }
346                                         if (c == '-')
347                                                 isneg = 1;
348                                         if (lex_class & DIGIT) {
349                                                 hasdigit = 1;
350                                                 if (isnum)
351                                                         token_int = token_int * 10 + c - '0';
352                                         }
353                                 }
354                                 add_token (c);
355                                 c = lexc ();
356                                 if (lex_class & (NOTNAME)) {
357 //                                      if (lex_class & ENDOFFILE)
358 //                                              clearerr (f);
359                                         lex_unget(c);
360                                         end_token ();
361                                         if (isnum && hasdigit) {
362                                                 if (isneg)
363                                                         token_int = -token_int;
364                                                 return NUM;
365                                         }
366                                         return NAME;
367                                 }
368                         }
369
370                 }
371         }
372 }
373
374 static inline int lex(void)
375 {
376         int     parse_token = _lex();
377         DBGI("token %d (%s)\n", parse_token, token_string);
378         return parse_token;
379 }
380
381 static int parse_token;
382
383 struct ao_lisp_cons     *ao_lisp_read_cons;
384 struct ao_lisp_cons     *ao_lisp_read_cons_tail;
385 struct ao_lisp_cons     *ao_lisp_read_stack;
386
387 #define READ_IN_QUOTE   0x01
388 #define READ_SAW_DOT    0x02
389 #define READ_DONE_DOT   0x04
390
391 static int
392 push_read_stack(int cons, int read_state)
393 {
394         DBGI("push read stack %p 0x%x\n", ao_lisp_read_cons, read_state);
395         DBG_IN();
396         if (cons) {
397                 ao_lisp_read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(ao_lisp_read_cons),
398                                                        ao_lisp__cons(ao_lisp_int_poly(read_state),
399                                                                      ao_lisp_cons_poly(ao_lisp_read_stack)));
400                 if (!ao_lisp_read_stack)
401                         return 0;
402         }
403         ao_lisp_read_cons = NULL;
404         ao_lisp_read_cons_tail = NULL;
405         return 1;
406 }
407
408 static int
409 pop_read_stack(int cons)
410 {
411         int     read_state = 0;
412         if (cons) {
413                 ao_lisp_read_cons = ao_lisp_poly_cons(ao_lisp_read_stack->car);
414                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
415                 read_state = ao_lisp_poly_int(ao_lisp_read_stack->car);
416                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
417                 for (ao_lisp_read_cons_tail = ao_lisp_read_cons;
418                      ao_lisp_read_cons_tail && ao_lisp_read_cons_tail->cdr;
419                      ao_lisp_read_cons_tail = ao_lisp_poly_cons(ao_lisp_read_cons_tail->cdr))
420                         ;
421         } else {
422                 ao_lisp_read_cons = 0;
423                 ao_lisp_read_cons_tail = 0;
424                 ao_lisp_read_stack = 0;
425         }
426         DBG_OUT();
427         DBGI("pop read stack %p %d\n", ao_lisp_read_cons, read_state);
428         return read_state;
429 }
430
431 ao_poly
432 ao_lisp_read(void)
433 {
434         struct ao_lisp_atom     *atom;
435         char                    *string;
436         int                     cons;
437         int                     read_state;
438         ao_poly                 v;
439
440
441         cons = 0;
442         read_state = 0;
443         ao_lisp_read_cons = ao_lisp_read_cons_tail = ao_lisp_read_stack = 0;
444         for (;;) {
445                 parse_token = lex();
446                 while (parse_token == OPEN) {
447                         if (!push_read_stack(cons, read_state))
448                                 return AO_LISP_NIL;
449                         cons++;
450                         read_state = 0;
451                         parse_token = lex();
452                 }
453
454                 switch (parse_token) {
455                 case END:
456                 default:
457                         if (cons)
458                                 ao_lisp_error(AO_LISP_EOF, "unexpected end of file");
459                         return _ao_lisp_atom_eof;
460                         break;
461                 case NAME:
462                         atom = ao_lisp_atom_intern(token_string);
463                         if (atom)
464                                 v = ao_lisp_atom_poly(atom);
465                         else
466                                 v = AO_LISP_NIL;
467                         break;
468                 case NUM:
469                         v = ao_lisp_int_poly(token_int);
470                         break;
471                 case BOOL:
472                         if (token_string[0] == 't')
473                                 v = _ao_lisp_bool_true;
474                         else
475                                 v = _ao_lisp_bool_false;
476                         break;
477                 case STRING:
478                         string = ao_lisp_string_copy(token_string);
479                         if (string)
480                                 v = ao_lisp_string_poly(string);
481                         else
482                                 v = AO_LISP_NIL;
483                         break;
484                 case QUOTE:
485                         if (!push_read_stack(cons, read_state))
486                                 return AO_LISP_NIL;
487                         cons++;
488                         read_state |= READ_IN_QUOTE;
489                         v = _ao_lisp_atom_quote;
490                         break;
491                 case CLOSE:
492                         if (!cons) {
493                                 v = AO_LISP_NIL;
494                                 break;
495                         }
496                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
497                         --cons;
498                         read_state = pop_read_stack(cons);
499                         break;
500                 case DOT:
501                         if (!cons) {
502                                 ao_lisp_error(AO_LISP_INVALID, ". outside of cons");
503                                 return AO_LISP_NIL;
504                         }
505                         if (!ao_lisp_read_cons) {
506                                 ao_lisp_error(AO_LISP_INVALID, ". first in cons");
507                                 return AO_LISP_NIL;
508                         }
509                         read_state |= READ_SAW_DOT;
510                         continue;
511                 }
512
513                 /* loop over QUOTE ends */
514                 for (;;) {
515                         if (!cons)
516                                 return v;
517
518                         if (read_state & READ_DONE_DOT) {
519                                 ao_lisp_error(AO_LISP_INVALID, ". not last in cons");
520                                 return AO_LISP_NIL;
521                         }
522
523                         if (read_state & READ_SAW_DOT) {
524                                 read_state |= READ_DONE_DOT;
525                                 ao_lisp_read_cons_tail->cdr = v;
526                         } else {
527                                 struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, AO_LISP_NIL);
528                                 if (!read)
529                                         return AO_LISP_NIL;
530
531                                 if (ao_lisp_read_cons_tail)
532                                         ao_lisp_read_cons_tail->cdr = ao_lisp_cons_poly(read);
533                                 else
534                                         ao_lisp_read_cons = read;
535                                 ao_lisp_read_cons_tail = read;
536                         }
537
538                         if (!(read_state & READ_IN_QUOTE) || !ao_lisp_read_cons->cdr)
539                                 break;
540
541                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
542                         --cons;
543                         read_state = pop_read_stack(cons);
544                 }
545         }
546         return v;
547 }