b792c2f19a123f55138df67da8557d635dc5de99
[fw/altos] / src / lisp / ao_lisp_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #define DBG_EVAL 0
16 #include "ao_lisp.h"
17 #include "ao_lisp_read.h"
18
19 static const uint16_t   lex_classes[128] = {
20         IGNORE,         /* ^@ */
21         IGNORE,         /* ^A */
22         IGNORE,         /* ^B */
23         IGNORE,         /* ^C */
24         IGNORE,         /* ^D */
25         IGNORE,         /* ^E */
26         IGNORE,         /* ^F */
27         IGNORE,         /* ^G */
28         IGNORE,         /* ^H */
29         WHITE,          /* ^I */
30         WHITE,          /* ^J */
31         WHITE,          /* ^K */
32         WHITE,          /* ^L */
33         WHITE,          /* ^M */
34         IGNORE,         /* ^N */
35         IGNORE,         /* ^O */
36         IGNORE,         /* ^P */
37         IGNORE,         /* ^Q */
38         IGNORE,         /* ^R */
39         IGNORE,         /* ^S */
40         IGNORE,         /* ^T */
41         IGNORE,         /* ^U */
42         IGNORE,         /* ^V */
43         IGNORE,         /* ^W */
44         IGNORE,         /* ^X */
45         IGNORE,         /* ^Y */
46         IGNORE,         /* ^Z */
47         IGNORE,         /* ^[ */
48         IGNORE,         /* ^\ */
49         IGNORE,         /* ^] */
50         IGNORE,         /* ^^ */
51         IGNORE,         /* ^_ */
52         PRINTABLE|WHITE,        /*    */
53         PRINTABLE,              /* ! */
54         PRINTABLE|STRINGC,      /* " */
55         PRINTABLE|COMMENT,      /* # */
56         PRINTABLE,              /* $ */
57         PRINTABLE,              /* % */
58         PRINTABLE,              /* & */
59         PRINTABLE|QUOTEC,       /* ' */
60         PRINTABLE|BRA,          /* ( */
61         PRINTABLE|KET,          /* ) */
62         PRINTABLE,              /* * */
63         PRINTABLE|SIGN,         /* + */
64         PRINTABLE,              /* , */
65         PRINTABLE|SIGN,         /* - */
66         PRINTABLE,              /* . */
67         PRINTABLE,              /* / */
68         PRINTABLE|DIGIT,        /* 0 */
69         PRINTABLE|DIGIT,        /* 1 */
70         PRINTABLE|DIGIT,        /* 2 */
71         PRINTABLE|DIGIT,        /* 3 */
72         PRINTABLE|DIGIT,        /* 4 */
73         PRINTABLE|DIGIT,        /* 5 */
74         PRINTABLE|DIGIT,        /* 6 */
75         PRINTABLE|DIGIT,        /* 7 */
76         PRINTABLE|DIGIT,        /* 8 */
77         PRINTABLE|DIGIT,        /* 9 */
78         PRINTABLE,              /* : */
79         PRINTABLE|COMMENT,      /* ; */
80         PRINTABLE,              /* < */
81         PRINTABLE,              /* = */
82         PRINTABLE,              /* > */
83         PRINTABLE,              /* ? */
84         PRINTABLE,              /*  @ */
85         PRINTABLE,              /*  A */
86         PRINTABLE,              /*  B */
87         PRINTABLE,              /*  C */
88         PRINTABLE,              /*  D */
89         PRINTABLE,              /*  E */
90         PRINTABLE,              /*  F */
91         PRINTABLE,              /*  G */
92         PRINTABLE,              /*  H */
93         PRINTABLE,              /*  I */
94         PRINTABLE,              /*  J */
95         PRINTABLE,              /*  K */
96         PRINTABLE,              /*  L */
97         PRINTABLE,              /*  M */
98         PRINTABLE,              /*  N */
99         PRINTABLE,              /*  O */
100         PRINTABLE,              /*  P */
101         PRINTABLE,              /*  Q */
102         PRINTABLE,              /*  R */
103         PRINTABLE,              /*  S */
104         PRINTABLE,              /*  T */
105         PRINTABLE,              /*  U */
106         PRINTABLE,              /*  V */
107         PRINTABLE,              /*  W */
108         PRINTABLE,              /*  X */
109         PRINTABLE,              /*  Y */
110         PRINTABLE,              /*  Z */
111         PRINTABLE,              /*  [ */
112         PRINTABLE|BACKSLASH,    /*  \ */
113         PRINTABLE,              /*  ] */
114         PRINTABLE,              /*  ^ */
115         PRINTABLE,              /*  _ */
116         PRINTABLE,              /*  ` */
117         PRINTABLE,              /*  a */
118         PRINTABLE,              /*  b */
119         PRINTABLE,              /*  c */
120         PRINTABLE,              /*  d */
121         PRINTABLE,              /*  e */
122         PRINTABLE,              /*  f */
123         PRINTABLE,              /*  g */
124         PRINTABLE,              /*  h */
125         PRINTABLE,              /*  i */
126         PRINTABLE,              /*  j */
127         PRINTABLE,              /*  k */
128         PRINTABLE,              /*  l */
129         PRINTABLE,              /*  m */
130         PRINTABLE,              /*  n */
131         PRINTABLE,              /*  o */
132         PRINTABLE,              /*  p */
133         PRINTABLE,              /*  q */
134         PRINTABLE,              /*  r */
135         PRINTABLE,              /*  s */
136         PRINTABLE,              /*  t */
137         PRINTABLE,              /*  u */
138         PRINTABLE,              /*  v */
139         PRINTABLE,              /*  w */
140         PRINTABLE,              /*  x */
141         PRINTABLE,              /*  y */
142         PRINTABLE,              /*  z */
143         PRINTABLE,              /*  { */
144         PRINTABLE|VBAR,         /*  | */
145         PRINTABLE,              /*  } */
146         PRINTABLE|TWIDDLE,      /*  ~ */
147         IGNORE,                 /*  ^? */
148 };
149
150 static int lex_unget_c;
151
152 static inline int
153 lex_get()
154 {
155         int     c;
156         if (lex_unget_c) {
157                 c = lex_unget_c;
158                 lex_unget_c = 0;
159         } else {
160                 c = ao_lisp_getc();
161         }
162         return c;
163 }
164
165 static inline void
166 lex_unget(int c)
167 {
168         if (c != EOF)
169                 lex_unget_c = c;
170 }
171
172 static int
173 lex_quoted (void)
174 {
175         int     c;
176         int     v;
177         int     count;
178
179         c = lex_get();
180         if (c == EOF)
181                 return EOF;
182         c &= 0x7f;
183         switch (c) {
184         case 'n':
185                 return '\n';
186         case 'f':
187                 return '\f';
188         case 'b':
189                 return '\b';
190         case 'r':
191                 return '\r';
192         case 'v':
193                 return '\v';
194         case 't':
195                 return '\t';
196         case '0':
197         case '1':
198         case '2':
199         case '3':
200         case '4':
201         case '5':
202         case '6':
203         case '7':
204                 v = c - '0';
205                 count = 1;
206                 while (count <= 3) {
207                         c = lex_get();
208                         if (c == EOF)
209                                 return EOF;
210                         c &= 0x7f;
211                         if (c < '0' || '7' < c) {
212                                 lex_unget(c);
213                                 break;
214                         }
215                         v = (v << 3) + c - '0';
216                         ++count;
217                 }
218                 return v;
219         default:
220                 return c;
221         }
222 }
223
224 static uint16_t lex_class;
225
226 static int
227 lexc(void)
228 {
229         int     c;
230         do {
231                 c = lex_get();
232                 if (c == EOF) {
233                         lex_class = ENDOFFILE;
234                         c = 0;
235                 } else {
236                         c &= 0x7f;
237                         lex_class = lex_classes[c];
238                         if (lex_class & BACKSLASH) {
239                                 c = lex_quoted();
240                                 if (c == EOF)
241                                         lex_class = ENDOFFILE;
242                                 else
243                                         lex_class = PRINTABLE;
244                         }
245                 }
246         } while (lex_class & IGNORE);
247         return c;
248 }
249
250 #define AO_LISP_TOKEN_MAX       32
251
252 static char     token_string[AO_LISP_TOKEN_MAX];
253 static int      token_int;
254 static int      token_len;
255
256 static inline void add_token(int c) {
257         if (c && token_len < AO_LISP_TOKEN_MAX - 1)
258                 token_string[token_len++] = c;
259 }
260
261 static inline void end_token(void) {
262         token_string[token_len] = '\0';
263 }
264
265 static int
266 lex(void)
267 {
268         int     c;
269
270         token_len = 0;
271         for (;;) {
272                 c = lexc();
273                 if (lex_class & ENDOFFILE)
274                         return END;
275
276                 if (lex_class & WHITE)
277                         continue;
278
279                 if (lex_class & COMMENT) {
280                         while ((c = lexc()) != '\n') {
281                                 if (lex_class & ENDOFFILE)
282                                         return END;
283                         }
284                         continue;
285                 }
286
287                 if (lex_class & (BRA|KET|QUOTEC)) {
288                         add_token(c);
289                         end_token();
290                         switch (c) {
291                         case '(':
292                                 return OPEN;
293                         case ')':
294                                 return CLOSE;
295                         case '\'':
296                                 return QUOTE;
297                         }
298                 }
299                 if (lex_class & TWIDDLE) {
300                         token_int = lexc();
301                         return NUM;
302                 }
303                 if (lex_class & STRINGC) {
304                         for (;;) {
305                                 c = lexc();
306                                 if (lex_class & (STRINGC|ENDOFFILE)) {
307                                         end_token();
308                                         return STRING;
309                                 }
310                                 add_token(c);
311                         }
312                 }
313                 if (lex_class & PRINTABLE) {
314                         int     isnum;
315                         int     hasdigit;
316                         int     isneg;
317
318                         isnum = 1;
319                         hasdigit = 0;
320                         token_int = 0;
321                         isneg = 0;
322                         for (;;) {
323                                 if (!(lex_class & NUMBER)) {
324                                         isnum = 0;
325                                 } else {
326                                         if (token_len != 0 &&
327                                             (lex_class & SIGN))
328                                         {
329                                                 isnum = 0;
330                                         }
331                                         if (c == '-')
332                                                 isneg = 1;
333                                         if (lex_class & DIGIT) {
334                                                 hasdigit = 1;
335                                                 if (isnum)
336                                                         token_int = token_int * 10 + c - '0';
337                                         }
338                                 }
339                                 add_token (c);
340                                 c = lexc ();
341                                 if (lex_class & (NOTNAME)) {
342 //                                      if (lex_class & ENDOFFILE)
343 //                                              clearerr (f);
344                                         lex_unget(c);
345                                         end_token ();
346                                         if (isnum && hasdigit) {
347                                                 if (isneg)
348                                                         token_int = -token_int;
349                                                 return NUM;
350                                         }
351                                         return NAME;
352                                 }
353                         }
354
355                 }
356         }
357 }
358
359 static int parse_token;
360
361 struct ao_lisp_cons     *ao_lisp_read_cons;
362 struct ao_lisp_cons     *ao_lisp_read_cons_tail;
363 struct ao_lisp_cons     *ao_lisp_read_stack;
364
365 static int
366 push_read_stack(int cons, int in_quote)
367 {
368         DBGI("push read stack %p %d\n", ao_lisp_read_cons, in_quote);
369         DBG_IN();
370         if (cons) {
371                 ao_lisp_read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(ao_lisp_read_cons),
372                                                ao_lisp_cons_cons(ao_lisp_int_poly(in_quote),
373                                                                  ao_lisp_read_stack));
374                 if (!ao_lisp_read_stack)
375                         return 0;
376         }
377         ao_lisp_read_cons = NULL;
378         ao_lisp_read_cons_tail = NULL;
379         return 1;
380 }
381
382 static int
383 pop_read_stack(int cons)
384 {
385         int     in_quote = 0;
386         if (cons) {
387                 ao_lisp_read_cons = ao_lisp_poly_cons(ao_lisp_read_stack->car);
388                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
389                 in_quote = ao_lisp_poly_int(ao_lisp_read_stack->car);
390                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
391                 for (ao_lisp_read_cons_tail = ao_lisp_read_cons;
392                      ao_lisp_read_cons_tail && ao_lisp_read_cons_tail->cdr;
393                      ao_lisp_read_cons_tail = ao_lisp_poly_cons(ao_lisp_read_cons_tail->cdr))
394                         ;
395         } else {
396                 ao_lisp_read_cons = 0;
397                 ao_lisp_read_cons_tail = 0;
398                 ao_lisp_read_stack = 0;
399         }
400         DBG_OUT();
401         DBGI("pop read stack %p %d\n", ao_lisp_read_cons, in_quote);
402         return in_quote;
403 }
404
405 ao_poly
406 ao_lisp_read(void)
407 {
408         struct ao_lisp_atom     *atom;
409         char                    *string;
410         int                     cons;
411         int                     in_quote;
412         ao_poly                 v;
413
414         parse_token = lex();
415         DBGI("token %d (%s)\n", parse_token, token_string);
416
417         cons = 0;
418         in_quote = 0;
419         ao_lisp_read_cons = ao_lisp_read_cons_tail = ao_lisp_read_stack = 0;
420         for (;;) {
421                 while (parse_token == OPEN) {
422                         if (!push_read_stack(cons, in_quote))
423                                 return AO_LISP_NIL;
424                         cons++;
425                         in_quote = 0;
426                         parse_token = lex();
427                         DBGI("token %d (%s)\n", parse_token, token_string);
428                 }
429
430                 switch (parse_token) {
431                 case END:
432                 default:
433                         if (cons)
434                                 ao_lisp_error(AO_LISP_EOF, "unexpected end of file");
435                         return _ao_lisp_atom_eof;
436                         break;
437                 case NAME:
438                         atom = ao_lisp_atom_intern(token_string);
439                         if (atom)
440                                 v = ao_lisp_atom_poly(atom);
441                         else
442                                 v = AO_LISP_NIL;
443                         break;
444                 case NUM:
445                         v = ao_lisp_int_poly(token_int);
446                         break;
447                 case STRING:
448                         string = ao_lisp_string_copy(token_string);
449                         if (string)
450                                 v = ao_lisp_string_poly(string);
451                         else
452                                 v = AO_LISP_NIL;
453                         break;
454                 case QUOTE:
455                         if (!push_read_stack(cons, in_quote))
456                                 return AO_LISP_NIL;
457                         cons++;
458                         in_quote = 1;
459                         v = _ao_lisp_atom_quote;
460                         break;
461                 case CLOSE:
462                         if (!cons) {
463                                 v = AO_LISP_NIL;
464                                 break;
465                         }
466                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
467                         --cons;
468                         in_quote = pop_read_stack(cons);
469                         break;
470                 }
471
472                 /* loop over QUOTE ends */
473                 for (;;) {
474                         if (!cons)
475                                 return v;
476
477                         struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, NULL);
478                         if (!read)
479                                 return AO_LISP_NIL;
480
481                         if (ao_lisp_read_cons_tail)
482                                 ao_lisp_read_cons_tail->cdr = ao_lisp_cons_poly(read);
483                         else
484                                 ao_lisp_read_cons = read;
485                         ao_lisp_read_cons_tail = read;
486
487                         if (!in_quote || !ao_lisp_read_cons->cdr)
488                                 break;
489
490                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
491                         --cons;
492                         in_quote = pop_read_stack(cons);
493                 }
494
495                 parse_token = lex();
496                 DBGI("token %d (%s)\n", parse_token, token_string);
497         }
498         return v;
499 }