altos: Add lisp reader
[fw/altos] / src / lisp / ao_lisp_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_lisp.h"
16 #include "ao_lisp_read.h"
17
18 static const uint16_t   lex_classes[128] = {
19         IGNORE,         /* ^@ */
20         IGNORE,         /* ^A */
21         IGNORE,         /* ^B */
22         IGNORE,         /* ^C */
23         IGNORE,         /* ^D */
24         IGNORE,         /* ^E */
25         IGNORE,         /* ^F */
26         IGNORE,         /* ^G */
27         IGNORE,         /* ^H */
28         WHITE,          /* ^I */
29         WHITE,          /* ^J */
30         WHITE,          /* ^K */
31         WHITE,          /* ^L */
32         WHITE,          /* ^M */
33         IGNORE,         /* ^N */
34         IGNORE,         /* ^O */
35         IGNORE,         /* ^P */
36         IGNORE,         /* ^Q */
37         IGNORE,         /* ^R */
38         IGNORE,         /* ^S */
39         IGNORE,         /* ^T */
40         IGNORE,         /* ^U */
41         IGNORE,         /* ^V */
42         IGNORE,         /* ^W */
43         IGNORE,         /* ^X */
44         IGNORE,         /* ^Y */
45         IGNORE,         /* ^Z */
46         IGNORE,         /* ^[ */
47         IGNORE,         /* ^\ */
48         IGNORE,         /* ^] */
49         IGNORE,         /* ^^ */
50         IGNORE,         /* ^_ */
51         PRINTABLE|WHITE,        /*    */
52         PRINTABLE,              /* ! */
53         PRINTABLE|STRINGC,      /* " */
54         PRINTABLE|COMMENT,      /* # */
55         PRINTABLE,              /* $ */
56         PRINTABLE,              /* % */
57         PRINTABLE,              /* & */
58         PRINTABLE|QUOTEC,       /* ' */
59         PRINTABLE|BRA,          /* ( */
60         PRINTABLE|KET,          /* ) */
61         PRINTABLE,              /* * */
62         PRINTABLE|SIGN,         /* + */
63         PRINTABLE,              /* , */
64         PRINTABLE|SIGN,         /* - */
65         PRINTABLE,              /* . */
66         PRINTABLE,              /* / */
67         PRINTABLE|DIGIT,        /* 0 */
68         PRINTABLE|DIGIT,        /* 1 */
69         PRINTABLE|DIGIT,        /* 2 */
70         PRINTABLE|DIGIT,        /* 3 */
71         PRINTABLE|DIGIT,        /* 4 */
72         PRINTABLE|DIGIT,        /* 5 */
73         PRINTABLE|DIGIT,        /* 6 */
74         PRINTABLE|DIGIT,        /* 7 */
75         PRINTABLE|DIGIT,        /* 8 */
76         PRINTABLE|DIGIT,        /* 9 */
77         PRINTABLE,              /* : */
78         PRINTABLE|COMMENT,      /* ; */
79         PRINTABLE,              /* < */
80         PRINTABLE,              /* = */
81         PRINTABLE,              /* > */
82         PRINTABLE,              /* ? */
83         PRINTABLE,              /*  @ */
84         PRINTABLE,              /*  A */
85         PRINTABLE,              /*  B */
86         PRINTABLE,              /*  C */
87         PRINTABLE,              /*  D */
88         PRINTABLE,              /*  E */
89         PRINTABLE,              /*  F */
90         PRINTABLE,              /*  G */
91         PRINTABLE,              /*  H */
92         PRINTABLE,              /*  I */
93         PRINTABLE,              /*  J */
94         PRINTABLE,              /*  K */
95         PRINTABLE,              /*  L */
96         PRINTABLE,              /*  M */
97         PRINTABLE,              /*  N */
98         PRINTABLE,              /*  O */
99         PRINTABLE,              /*  P */
100         PRINTABLE,              /*  Q */
101         PRINTABLE,              /*  R */
102         PRINTABLE,              /*  S */
103         PRINTABLE,              /*  T */
104         PRINTABLE,              /*  U */
105         PRINTABLE,              /*  V */
106         PRINTABLE,              /*  W */
107         PRINTABLE,              /*  X */
108         PRINTABLE,              /*  Y */
109         PRINTABLE,              /*  Z */
110         PRINTABLE,              /*  [ */
111         PRINTABLE|BACKSLASH,    /*  \ */
112         PRINTABLE,              /*  ] */
113         PRINTABLE,              /*  ^ */
114         PRINTABLE,              /*  _ */
115         PRINTABLE,              /*  ` */
116         PRINTABLE,              /*  a */
117         PRINTABLE,              /*  b */
118         PRINTABLE,              /*  c */
119         PRINTABLE,              /*  d */
120         PRINTABLE,              /*  e */
121         PRINTABLE,              /*  f */
122         PRINTABLE,              /*  g */
123         PRINTABLE,              /*  h */
124         PRINTABLE,              /*  i */
125         PRINTABLE,              /*  j */
126         PRINTABLE,              /*  k */
127         PRINTABLE,              /*  l */
128         PRINTABLE,              /*  m */
129         PRINTABLE,              /*  n */
130         PRINTABLE,              /*  o */
131         PRINTABLE,              /*  p */
132         PRINTABLE,              /*  q */
133         PRINTABLE,              /*  r */
134         PRINTABLE,              /*  s */
135         PRINTABLE,              /*  t */
136         PRINTABLE,              /*  u */
137         PRINTABLE,              /*  v */
138         PRINTABLE,              /*  w */
139         PRINTABLE,              /*  x */
140         PRINTABLE,              /*  y */
141         PRINTABLE,              /*  z */
142         PRINTABLE,              /*  { */
143         PRINTABLE|VBAR,         /*  | */
144         PRINTABLE,              /*  } */
145         PRINTABLE|TWIDDLE,      /*  ~ */
146         IGNORE,                 /*  ^? */
147 };
148
149 static int lex_unget_c;
150
151 static inline int
152 lex_get()
153 {
154         int     c;
155         if (lex_unget_c) {
156                 c = lex_unget_c;
157                 lex_unget_c = 0;
158         } else
159                 c = getchar();
160         return c;
161 }
162
163 static inline void
164 lex_unget(int c)
165 {
166         if (c != EOF)
167                 lex_unget_c = c;
168 }
169
170 static int
171 lex_quoted (void)
172 {
173         int     c;
174         int     v;
175         int     count;
176
177         c = lex_get();
178 //      if (jumping)
179 //              return nil;
180         if (c == EOF)
181                 return EOF;
182         c &= 0x7f;
183         switch (c) {
184         case 'n':
185                 return '\n';
186         case 'f':
187                 return '\f';
188         case 'b':
189                 return '\b';
190         case 'r':
191                 return '\r';
192         case 'v':
193                 return '\v';
194         case 't':
195                 return '\t';
196         case '0':
197         case '1':
198         case '2':
199         case '3':
200         case '4':
201         case '5':
202         case '6':
203         case '7':
204                 v = c - '0';
205                 count = 1;
206                 while (count <= 3) {
207                         c = lex_get();
208 //                      if (jumping)
209 //                              return nil;
210                         if (c == EOF)
211                                 return EOF;
212                         c &= 0x7f;
213                         if (c < '0' || '7' < c) {
214                                 lex_unget(c);
215                                 break;
216                         }
217                         v = (v << 3) + c - '0';
218                         ++count;
219                 }
220                 return v;
221         default:
222                 return c;
223         }
224 }
225
226 static uint16_t lex_class;
227
228 static int
229 lexc(void)
230 {
231         int     c;
232         do {
233                 c = lex_get();
234                 if (c == EOF) {
235                         lex_class = ENDOFFILE;
236                         c = 0;
237                 } else {
238                         c &= 0x7f;
239                         lex_class = lex_classes[c];
240                         if (lex_class & BACKSLASH) {
241                                 c = lex_quoted();
242                                 if (c == EOF)
243                                         lex_class = ENDOFFILE;
244                                 else
245                                         lex_class = PRINTABLE;
246                         }
247                 }
248         } while (lex_class & IGNORE);
249         return c;
250 }
251
252 #define AO_LISP_TOKEN_MAX       32
253
254 static char     token_string[AO_LISP_TOKEN_MAX];
255 static int      token_int;
256 static int      token_len;
257
258 static inline void add_token(int c) {
259         if (c && token_len < AO_LISP_TOKEN_MAX - 1)
260                 token_string[token_len++] = c;
261 }
262
263 static inline void end_token(void) {
264         token_string[token_len] = '\0';
265 }
266
267 static int
268 lex(void)
269 {
270         int     c;
271
272         token_len = 0;
273         for (;;) {
274                 c = lexc();
275                 if (lex_class & ENDOFFILE)
276                         return AO_LISP_NIL;
277
278 //              if (jumping)
279 //                      return nil;
280                 if (lex_class & WHITE)
281                         continue;
282
283                 if (lex_class & (BRA|KET|QUOTEC)) {
284                         add_token(c);
285                         end_token();
286                         switch (c) {
287                         case '(':
288                                 return OPEN;
289                         case ')':
290                                 return CLOSE;
291                         case '\'':
292                                 return QUOTE;
293                         }
294                 }
295                 if (lex_class & TWIDDLE) {
296                         token_int = lexc();
297                         return NUM;
298                 }
299                 if (lex_class & STRINGC) {
300                         for (;;) {
301                                 c = lexc();
302 //                              if (jumping)
303 //                                      return nil;
304                                 if (lex_class & (STRINGC|ENDOFFILE)) {
305                                         end_token();
306                                         return STRING;
307                                 }
308                                 add_token(c);
309                         }
310                 }
311                 if (lex_class & PRINTABLE) {
312                         int     isnum;
313                         int     hasdigit;
314                         int     isneg;
315
316                         isnum = 1;
317                         hasdigit = 0;
318                         token_int = 0;
319                         isneg = 0;
320                         for (;;) {
321                                 if (!(lex_class & NUMBER)) {
322                                         isnum = 0;
323                                 } else {
324                                         if (token_len != 0 &&
325                                             (lex_class & SIGN))
326                                         {
327                                                 isnum = 0;
328                                         }
329                                         if (c == '-')
330                                                 isneg = 1;
331                                         if (lex_class & DIGIT) {
332                                                 hasdigit = 1;
333                                                 if (isnum)
334                                                         token_int = token_int * 10 + c - '0';
335                                         }
336                                 }
337                                 add_token (c);
338                                 c = lexc ();
339 //                              if (jumping)
340 //                                      return nil;
341                                 if (lex_class & (NOTNAME)) {
342 //                                      if (lex_class & ENDOFFILE)
343 //                                              clearerr (f);
344                                         lex_unget(c);
345                                         end_token ();
346                                         if (isnum && hasdigit) {
347                                                 if (isneg)
348                                                         token_int = -token_int;
349                                                 return NUM;
350                                         }
351                                         return NAME;
352                                 }
353                         }
354
355                 }
356         }
357 }
358
359 static int parse_token;
360 static uint8_t                  been_here;
361 static struct ao_lisp_cons      *read_cons;
362 static struct ao_lisp_cons      *read_cons_tail;
363 static struct ao_lisp_cons      *read_stack;
364
365 static ao_lisp_poly
366 read_item(void)
367 {
368         struct ao_lisp_atom     *atom;
369         char                    *string;
370         int                     cons;
371         ao_lisp_poly            v;
372
373         if (!been_here) {
374                 ao_lisp_root_add(&ao_lisp_cons_type, &read_cons);
375                 ao_lisp_root_add(&ao_lisp_cons_type, &read_cons_tail);
376                 ao_lisp_root_add(&ao_lisp_cons_type, &read_stack);
377         }
378
379         cons = 0;
380         read_cons = read_cons_tail = read_stack = 0;
381         for (;;) {
382                 while (parse_token == OPEN) {
383                         if (cons++)
384                                 read_stack = ao_lisp_cons(ao_lisp_cons_poly(read_cons), read_stack);
385                         read_cons = NULL;
386                         read_cons_tail = NULL;
387                         parse_token = lex();
388                 }
389
390                 switch (parse_token) {
391                 case ENDOFFILE:
392                 default:
393                         v = AO_LISP_NIL;
394                         break;
395                 case NAME:
396                         atom = ao_lisp_atom_intern(token_string);
397                         if (atom)
398                                 v = ao_lisp_atom_poly(atom);
399                         else
400                                 v = AO_LISP_NIL;
401                         break;
402                 case NUM:
403                         v = ao_lisp_int_poly(token_int);
404                         break;
405                 case STRING:
406                         string = ao_lisp_string_copy(token_string);
407                         if (string)
408                                 v = ao_lisp_string_poly(string);
409                         else
410                                 v = AO_LISP_NIL;
411                         break;
412                 case CLOSE:
413                         if (cons)
414                                 v = ao_lisp_cons_poly(read_cons);
415                         else
416                                 v = AO_LISP_NIL;
417                         if (--cons) {
418                                 read_cons = ao_lisp_poly_cons(read_stack->car);
419                                 read_stack = read_stack->cdr;
420                                 for (read_cons_tail = read_cons;
421                                      read_cons_tail && read_cons_tail->cdr;
422                                      read_cons_tail = read_cons_tail->cdr)
423                                         ;
424                         }
425                         break;
426                 }
427
428                 if (!cons)
429                         break;
430
431                 struct ao_lisp_cons     *read = ao_lisp_cons(v, NULL);
432                 if (read_cons_tail)
433                         read_cons_tail->cdr = read;
434                 else
435                         read_cons = read;
436                 read_cons_tail = read;
437
438                 parse_token = lex();
439         }
440         return v;
441 }
442
443 ao_lisp_poly
444 ao_lisp_read(void)
445 {
446         parse_token = lex();
447         return read_item();
448 }