altos/lisp: Clean up OS integration bits, add defun
[fw/altos] / src / lisp / ao_lisp_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_lisp.h"
16 #include "ao_lisp_read.h"
17
18 static const uint16_t   lex_classes[128] = {
19         IGNORE,         /* ^@ */
20         IGNORE,         /* ^A */
21         IGNORE,         /* ^B */
22         IGNORE,         /* ^C */
23         IGNORE,         /* ^D */
24         IGNORE,         /* ^E */
25         IGNORE,         /* ^F */
26         IGNORE,         /* ^G */
27         IGNORE,         /* ^H */
28         WHITE,          /* ^I */
29         WHITE,          /* ^J */
30         WHITE,          /* ^K */
31         WHITE,          /* ^L */
32         WHITE,          /* ^M */
33         IGNORE,         /* ^N */
34         IGNORE,         /* ^O */
35         IGNORE,         /* ^P */
36         IGNORE,         /* ^Q */
37         IGNORE,         /* ^R */
38         IGNORE,         /* ^S */
39         IGNORE,         /* ^T */
40         IGNORE,         /* ^U */
41         IGNORE,         /* ^V */
42         IGNORE,         /* ^W */
43         IGNORE,         /* ^X */
44         IGNORE,         /* ^Y */
45         IGNORE,         /* ^Z */
46         IGNORE,         /* ^[ */
47         IGNORE,         /* ^\ */
48         IGNORE,         /* ^] */
49         IGNORE,         /* ^^ */
50         IGNORE,         /* ^_ */
51         PRINTABLE|WHITE,        /*    */
52         PRINTABLE,              /* ! */
53         PRINTABLE|STRINGC,      /* " */
54         PRINTABLE|COMMENT,      /* # */
55         PRINTABLE,              /* $ */
56         PRINTABLE,              /* % */
57         PRINTABLE,              /* & */
58         PRINTABLE|QUOTEC,       /* ' */
59         PRINTABLE|BRA,          /* ( */
60         PRINTABLE|KET,          /* ) */
61         PRINTABLE,              /* * */
62         PRINTABLE|SIGN,         /* + */
63         PRINTABLE,              /* , */
64         PRINTABLE|SIGN,         /* - */
65         PRINTABLE,              /* . */
66         PRINTABLE,              /* / */
67         PRINTABLE|DIGIT,        /* 0 */
68         PRINTABLE|DIGIT,        /* 1 */
69         PRINTABLE|DIGIT,        /* 2 */
70         PRINTABLE|DIGIT,        /* 3 */
71         PRINTABLE|DIGIT,        /* 4 */
72         PRINTABLE|DIGIT,        /* 5 */
73         PRINTABLE|DIGIT,        /* 6 */
74         PRINTABLE|DIGIT,        /* 7 */
75         PRINTABLE|DIGIT,        /* 8 */
76         PRINTABLE|DIGIT,        /* 9 */
77         PRINTABLE,              /* : */
78         PRINTABLE|COMMENT,      /* ; */
79         PRINTABLE,              /* < */
80         PRINTABLE,              /* = */
81         PRINTABLE,              /* > */
82         PRINTABLE,              /* ? */
83         PRINTABLE,              /*  @ */
84         PRINTABLE,              /*  A */
85         PRINTABLE,              /*  B */
86         PRINTABLE,              /*  C */
87         PRINTABLE,              /*  D */
88         PRINTABLE,              /*  E */
89         PRINTABLE,              /*  F */
90         PRINTABLE,              /*  G */
91         PRINTABLE,              /*  H */
92         PRINTABLE,              /*  I */
93         PRINTABLE,              /*  J */
94         PRINTABLE,              /*  K */
95         PRINTABLE,              /*  L */
96         PRINTABLE,              /*  M */
97         PRINTABLE,              /*  N */
98         PRINTABLE,              /*  O */
99         PRINTABLE,              /*  P */
100         PRINTABLE,              /*  Q */
101         PRINTABLE,              /*  R */
102         PRINTABLE,              /*  S */
103         PRINTABLE,              /*  T */
104         PRINTABLE,              /*  U */
105         PRINTABLE,              /*  V */
106         PRINTABLE,              /*  W */
107         PRINTABLE,              /*  X */
108         PRINTABLE,              /*  Y */
109         PRINTABLE,              /*  Z */
110         PRINTABLE,              /*  [ */
111         PRINTABLE|BACKSLASH,    /*  \ */
112         PRINTABLE,              /*  ] */
113         PRINTABLE,              /*  ^ */
114         PRINTABLE,              /*  _ */
115         PRINTABLE,              /*  ` */
116         PRINTABLE,              /*  a */
117         PRINTABLE,              /*  b */
118         PRINTABLE,              /*  c */
119         PRINTABLE,              /*  d */
120         PRINTABLE,              /*  e */
121         PRINTABLE,              /*  f */
122         PRINTABLE,              /*  g */
123         PRINTABLE,              /*  h */
124         PRINTABLE,              /*  i */
125         PRINTABLE,              /*  j */
126         PRINTABLE,              /*  k */
127         PRINTABLE,              /*  l */
128         PRINTABLE,              /*  m */
129         PRINTABLE,              /*  n */
130         PRINTABLE,              /*  o */
131         PRINTABLE,              /*  p */
132         PRINTABLE,              /*  q */
133         PRINTABLE,              /*  r */
134         PRINTABLE,              /*  s */
135         PRINTABLE,              /*  t */
136         PRINTABLE,              /*  u */
137         PRINTABLE,              /*  v */
138         PRINTABLE,              /*  w */
139         PRINTABLE,              /*  x */
140         PRINTABLE,              /*  y */
141         PRINTABLE,              /*  z */
142         PRINTABLE,              /*  { */
143         PRINTABLE|VBAR,         /*  | */
144         PRINTABLE,              /*  } */
145         PRINTABLE|TWIDDLE,      /*  ~ */
146         IGNORE,                 /*  ^? */
147 };
148
149 static int lex_unget_c;
150
151 static inline int
152 lex_get()
153 {
154         int     c;
155         if (lex_unget_c) {
156                 c = lex_unget_c;
157                 lex_unget_c = 0;
158         } else {
159                 c = ao_lisp_getc();
160         }
161         return c;
162 }
163
164 static inline void
165 lex_unget(int c)
166 {
167         if (c != EOF)
168                 lex_unget_c = c;
169 }
170
171 static int
172 lex_quoted (void)
173 {
174         int     c;
175         int     v;
176         int     count;
177
178         c = lex_get();
179         if (c == EOF)
180                 return EOF;
181         c &= 0x7f;
182         switch (c) {
183         case 'n':
184                 return '\n';
185         case 'f':
186                 return '\f';
187         case 'b':
188                 return '\b';
189         case 'r':
190                 return '\r';
191         case 'v':
192                 return '\v';
193         case 't':
194                 return '\t';
195         case '0':
196         case '1':
197         case '2':
198         case '3':
199         case '4':
200         case '5':
201         case '6':
202         case '7':
203                 v = c - '0';
204                 count = 1;
205                 while (count <= 3) {
206                         c = lex_get();
207                         if (c == EOF)
208                                 return EOF;
209                         c &= 0x7f;
210                         if (c < '0' || '7' < c) {
211                                 lex_unget(c);
212                                 break;
213                         }
214                         v = (v << 3) + c - '0';
215                         ++count;
216                 }
217                 return v;
218         default:
219                 return c;
220         }
221 }
222
223 static uint16_t lex_class;
224
225 static int
226 lexc(void)
227 {
228         int     c;
229         do {
230                 c = lex_get();
231                 if (c == EOF) {
232                         lex_class = ENDOFFILE;
233                         c = 0;
234                 } else {
235                         c &= 0x7f;
236                         lex_class = lex_classes[c];
237                         if (lex_class & BACKSLASH) {
238                                 c = lex_quoted();
239                                 if (c == EOF)
240                                         lex_class = ENDOFFILE;
241                                 else
242                                         lex_class = PRINTABLE;
243                         }
244                 }
245         } while (lex_class & IGNORE);
246         return c;
247 }
248
249 #define AO_LISP_TOKEN_MAX       32
250
251 static char     token_string[AO_LISP_TOKEN_MAX];
252 static int      token_int;
253 static int      token_len;
254
255 static inline void add_token(int c) {
256         if (c && token_len < AO_LISP_TOKEN_MAX - 1)
257                 token_string[token_len++] = c;
258 }
259
260 static inline void end_token(void) {
261         token_string[token_len] = '\0';
262 }
263
264 static int
265 lex(void)
266 {
267         int     c;
268
269         token_len = 0;
270         for (;;) {
271                 c = lexc();
272                 if (lex_class & ENDOFFILE)
273                         return AO_LISP_NIL;
274
275                 if (lex_class & WHITE)
276                         continue;
277
278                 if (lex_class & COMMENT) {
279                         while ((c = lexc()) != '\n') {
280                                 if (lex_class & ENDOFFILE)
281                                         return AO_LISP_NIL;
282                         }
283                         continue;
284                 }
285
286                 if (lex_class & (BRA|KET|QUOTEC)) {
287                         add_token(c);
288                         end_token();
289                         switch (c) {
290                         case '(':
291                                 return OPEN;
292                         case ')':
293                                 return CLOSE;
294                         case '\'':
295                                 return QUOTE;
296                         }
297                 }
298                 if (lex_class & TWIDDLE) {
299                         token_int = lexc();
300                         return NUM;
301                 }
302                 if (lex_class & STRINGC) {
303                         for (;;) {
304                                 c = lexc();
305                                 if (lex_class & (STRINGC|ENDOFFILE)) {
306                                         end_token();
307                                         return STRING;
308                                 }
309                                 add_token(c);
310                         }
311                 }
312                 if (lex_class & PRINTABLE) {
313                         int     isnum;
314                         int     hasdigit;
315                         int     isneg;
316
317                         isnum = 1;
318                         hasdigit = 0;
319                         token_int = 0;
320                         isneg = 0;
321                         for (;;) {
322                                 if (!(lex_class & NUMBER)) {
323                                         isnum = 0;
324                                 } else {
325                                         if (token_len != 0 &&
326                                             (lex_class & SIGN))
327                                         {
328                                                 isnum = 0;
329                                         }
330                                         if (c == '-')
331                                                 isneg = 1;
332                                         if (lex_class & DIGIT) {
333                                                 hasdigit = 1;
334                                                 if (isnum)
335                                                         token_int = token_int * 10 + c - '0';
336                                         }
337                                 }
338                                 add_token (c);
339                                 c = lexc ();
340                                 if (lex_class & (NOTNAME)) {
341 //                                      if (lex_class & ENDOFFILE)
342 //                                              clearerr (f);
343                                         lex_unget(c);
344                                         end_token ();
345                                         if (isnum && hasdigit) {
346                                                 if (isneg)
347                                                         token_int = -token_int;
348                                                 return NUM;
349                                         }
350                                         return NAME;
351                                 }
352                         }
353
354                 }
355         }
356 }
357
358 static int parse_token;
359 static uint8_t                  been_here;
360 static struct ao_lisp_cons      *read_cons;
361 static struct ao_lisp_cons      *read_cons_tail;
362 static struct ao_lisp_cons      *read_stack;
363
364 static int
365 push_read_stack(int cons, int in_quote)
366 {
367         if (cons) {
368                 read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(read_cons),
369                                                ao_lisp_cons_cons(ao_lisp_int_poly(in_quote),
370                                                                  read_stack));
371                 if (!read_stack)
372                         return 0;
373         }
374         read_cons = NULL;
375         read_cons_tail = NULL;
376         return 1;
377 }
378
379 static int
380 pop_read_stack(int cons)
381 {
382         int     in_quote = 0;
383         if (cons) {
384                 read_cons = ao_lisp_poly_cons(read_stack->car);
385                 read_stack = ao_lisp_poly_cons(read_stack->cdr);
386                 in_quote = ao_lisp_poly_int(read_stack->car);
387                 read_stack = ao_lisp_poly_cons(read_stack->cdr);
388                 for (read_cons_tail = read_cons;
389                      read_cons_tail && read_cons_tail->cdr;
390                      read_cons_tail = ao_lisp_poly_cons(read_cons_tail->cdr))
391                         ;
392         } else {
393                 read_cons = 0;
394                 read_cons_tail = 0;
395                 read_stack = 0;
396         }
397         return in_quote;
398 }
399
400 ao_poly
401 ao_lisp_read(void)
402 {
403         struct ao_lisp_atom     *atom;
404         char                    *string;
405         int                     cons;
406         int                     in_quote;
407         ao_poly                 v;
408
409         if (!been_here) {
410                 ao_lisp_root_add(&ao_lisp_cons_type, &read_cons);
411                 ao_lisp_root_add(&ao_lisp_cons_type, &read_cons_tail);
412                 ao_lisp_root_add(&ao_lisp_cons_type, &read_stack);
413                 been_here = 1;
414         }
415         parse_token = lex();
416
417         cons = 0;
418         in_quote = 0;
419         read_cons = read_cons_tail = read_stack = 0;
420         for (;;) {
421                 while (parse_token == OPEN) {
422                         if (!push_read_stack(cons, in_quote))
423                                 return AO_LISP_NIL;
424                         cons++;
425                         in_quote = 0;
426                         parse_token = lex();
427                 }
428
429                 switch (parse_token) {
430                 case ENDOFFILE:
431                 default:
432                         v = AO_LISP_NIL;
433                         break;
434                 case NAME:
435                         atom = ao_lisp_atom_intern(token_string);
436                         if (atom)
437                                 v = ao_lisp_atom_poly(atom);
438                         else
439                                 v = AO_LISP_NIL;
440                         break;
441                 case NUM:
442                         v = ao_lisp_int_poly(token_int);
443                         break;
444                 case STRING:
445                         string = ao_lisp_string_copy(token_string);
446                         if (string)
447                                 v = ao_lisp_string_poly(string);
448                         else
449                                 v = AO_LISP_NIL;
450                         break;
451                 case QUOTE:
452                         if (!push_read_stack(cons, in_quote))
453                                 return AO_LISP_NIL;
454                         cons++;
455                         in_quote = 1;
456                         v = _ao_lisp_atom_quote;
457                         break;
458                 case CLOSE:
459                         if (!cons) {
460                                 v = AO_LISP_NIL;
461                                 break;
462                         }
463                         v = ao_lisp_cons_poly(read_cons);
464                         --cons;
465                         in_quote = pop_read_stack(cons);
466                         break;
467                 }
468
469                 /* loop over QUOTE ends */
470                 for (;;) {
471                         if (!cons)
472                                 return v;
473
474                         struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, NULL);
475                         if (!read)
476                                 return AO_LISP_NIL;
477
478                         if (read_cons_tail)
479                                 read_cons_tail->cdr = ao_lisp_cons_poly(read);
480                         else
481                                 read_cons = read;
482                         read_cons_tail = read;
483
484                         if (!in_quote || !read_cons->cdr)
485                                 break;
486
487                         v = ao_lisp_cons_poly(read_cons);
488                         --cons;
489                         in_quote = pop_read_stack(cons);
490                 }
491
492                 parse_token = lex();
493         }
494         return v;
495 }