bc1eb36b7a25c3931501e97e02fda73353848461
[fw/altos] / src / lisp / ao_lisp_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_lisp.h"
16 #include "ao_lisp_read.h"
17
18 static const uint16_t   lex_classes[128] = {
19         IGNORE,         /* ^@ */
20         IGNORE,         /* ^A */
21         IGNORE,         /* ^B */
22         IGNORE,         /* ^C */
23         IGNORE,         /* ^D */
24         IGNORE,         /* ^E */
25         IGNORE,         /* ^F */
26         IGNORE,         /* ^G */
27         IGNORE,         /* ^H */
28         WHITE,          /* ^I */
29         WHITE,          /* ^J */
30         WHITE,          /* ^K */
31         WHITE,          /* ^L */
32         WHITE,          /* ^M */
33         IGNORE,         /* ^N */
34         IGNORE,         /* ^O */
35         IGNORE,         /* ^P */
36         IGNORE,         /* ^Q */
37         IGNORE,         /* ^R */
38         IGNORE,         /* ^S */
39         IGNORE,         /* ^T */
40         IGNORE,         /* ^U */
41         IGNORE,         /* ^V */
42         IGNORE,         /* ^W */
43         IGNORE,         /* ^X */
44         IGNORE,         /* ^Y */
45         IGNORE,         /* ^Z */
46         IGNORE,         /* ^[ */
47         IGNORE,         /* ^\ */
48         IGNORE,         /* ^] */
49         IGNORE,         /* ^^ */
50         IGNORE,         /* ^_ */
51         PRINTABLE|WHITE,        /*    */
52         PRINTABLE,              /* ! */
53         PRINTABLE|STRINGC,      /* " */
54         PRINTABLE|COMMENT,      /* # */
55         PRINTABLE,              /* $ */
56         PRINTABLE,              /* % */
57         PRINTABLE,              /* & */
58         PRINTABLE|QUOTEC,       /* ' */
59         PRINTABLE|BRA,          /* ( */
60         PRINTABLE|KET,          /* ) */
61         PRINTABLE,              /* * */
62         PRINTABLE|SIGN,         /* + */
63         PRINTABLE,              /* , */
64         PRINTABLE|SIGN,         /* - */
65         PRINTABLE,              /* . */
66         PRINTABLE,              /* / */
67         PRINTABLE|DIGIT,        /* 0 */
68         PRINTABLE|DIGIT,        /* 1 */
69         PRINTABLE|DIGIT,        /* 2 */
70         PRINTABLE|DIGIT,        /* 3 */
71         PRINTABLE|DIGIT,        /* 4 */
72         PRINTABLE|DIGIT,        /* 5 */
73         PRINTABLE|DIGIT,        /* 6 */
74         PRINTABLE|DIGIT,        /* 7 */
75         PRINTABLE|DIGIT,        /* 8 */
76         PRINTABLE|DIGIT,        /* 9 */
77         PRINTABLE,              /* : */
78         PRINTABLE|COMMENT,      /* ; */
79         PRINTABLE,              /* < */
80         PRINTABLE,              /* = */
81         PRINTABLE,              /* > */
82         PRINTABLE,              /* ? */
83         PRINTABLE,              /*  @ */
84         PRINTABLE,              /*  A */
85         PRINTABLE,              /*  B */
86         PRINTABLE,              /*  C */
87         PRINTABLE,              /*  D */
88         PRINTABLE,              /*  E */
89         PRINTABLE,              /*  F */
90         PRINTABLE,              /*  G */
91         PRINTABLE,              /*  H */
92         PRINTABLE,              /*  I */
93         PRINTABLE,              /*  J */
94         PRINTABLE,              /*  K */
95         PRINTABLE,              /*  L */
96         PRINTABLE,              /*  M */
97         PRINTABLE,              /*  N */
98         PRINTABLE,              /*  O */
99         PRINTABLE,              /*  P */
100         PRINTABLE,              /*  Q */
101         PRINTABLE,              /*  R */
102         PRINTABLE,              /*  S */
103         PRINTABLE,              /*  T */
104         PRINTABLE,              /*  U */
105         PRINTABLE,              /*  V */
106         PRINTABLE,              /*  W */
107         PRINTABLE,              /*  X */
108         PRINTABLE,              /*  Y */
109         PRINTABLE,              /*  Z */
110         PRINTABLE,              /*  [ */
111         PRINTABLE|BACKSLASH,    /*  \ */
112         PRINTABLE,              /*  ] */
113         PRINTABLE,              /*  ^ */
114         PRINTABLE,              /*  _ */
115         PRINTABLE,              /*  ` */
116         PRINTABLE,              /*  a */
117         PRINTABLE,              /*  b */
118         PRINTABLE,              /*  c */
119         PRINTABLE,              /*  d */
120         PRINTABLE,              /*  e */
121         PRINTABLE,              /*  f */
122         PRINTABLE,              /*  g */
123         PRINTABLE,              /*  h */
124         PRINTABLE,              /*  i */
125         PRINTABLE,              /*  j */
126         PRINTABLE,              /*  k */
127         PRINTABLE,              /*  l */
128         PRINTABLE,              /*  m */
129         PRINTABLE,              /*  n */
130         PRINTABLE,              /*  o */
131         PRINTABLE,              /*  p */
132         PRINTABLE,              /*  q */
133         PRINTABLE,              /*  r */
134         PRINTABLE,              /*  s */
135         PRINTABLE,              /*  t */
136         PRINTABLE,              /*  u */
137         PRINTABLE,              /*  v */
138         PRINTABLE,              /*  w */
139         PRINTABLE,              /*  x */
140         PRINTABLE,              /*  y */
141         PRINTABLE,              /*  z */
142         PRINTABLE,              /*  { */
143         PRINTABLE|VBAR,         /*  | */
144         PRINTABLE,              /*  } */
145         PRINTABLE|TWIDDLE,      /*  ~ */
146         IGNORE,                 /*  ^? */
147 };
148
149 static int lex_unget_c;
150
151 static inline int
152 lex_get()
153 {
154         int     c;
155         if (lex_unget_c) {
156                 c = lex_unget_c;
157                 lex_unget_c = 0;
158         } else {
159 #if AO_LISP_ALTOS
160                 static uint8_t  at_eol;
161
162                 if (at_eol) {
163                         ao_cmd_readline();
164                         at_eol = 0;
165                 }
166                 c = ao_cmd_lex();
167                 if (c == '\n')
168                         at_eol = 1;
169 #else
170                 c = getchar();
171 #endif
172         }
173         return c;
174 }
175
176 static inline void
177 lex_unget(int c)
178 {
179         if (c != EOF)
180                 lex_unget_c = c;
181 }
182
183 static int
184 lex_quoted (void)
185 {
186         int     c;
187         int     v;
188         int     count;
189
190         c = lex_get();
191         if (c == EOF)
192                 return EOF;
193         c &= 0x7f;
194         switch (c) {
195         case 'n':
196                 return '\n';
197         case 'f':
198                 return '\f';
199         case 'b':
200                 return '\b';
201         case 'r':
202                 return '\r';
203         case 'v':
204                 return '\v';
205         case 't':
206                 return '\t';
207         case '0':
208         case '1':
209         case '2':
210         case '3':
211         case '4':
212         case '5':
213         case '6':
214         case '7':
215                 v = c - '0';
216                 count = 1;
217                 while (count <= 3) {
218                         c = lex_get();
219                         if (c == EOF)
220                                 return EOF;
221                         c &= 0x7f;
222                         if (c < '0' || '7' < c) {
223                                 lex_unget(c);
224                                 break;
225                         }
226                         v = (v << 3) + c - '0';
227                         ++count;
228                 }
229                 return v;
230         default:
231                 return c;
232         }
233 }
234
235 static uint16_t lex_class;
236
237 static int
238 lexc(void)
239 {
240         int     c;
241         do {
242                 c = lex_get();
243                 if (c == EOF) {
244                         lex_class = ENDOFFILE;
245                         c = 0;
246                 } else {
247                         c &= 0x7f;
248                         lex_class = lex_classes[c];
249                         if (lex_class & BACKSLASH) {
250                                 c = lex_quoted();
251                                 if (c == EOF)
252                                         lex_class = ENDOFFILE;
253                                 else
254                                         lex_class = PRINTABLE;
255                         }
256                 }
257         } while (lex_class & IGNORE);
258         return c;
259 }
260
261 #define AO_LISP_TOKEN_MAX       32
262
263 static char     token_string[AO_LISP_TOKEN_MAX];
264 static int      token_int;
265 static int      token_len;
266
267 static inline void add_token(int c) {
268         if (c && token_len < AO_LISP_TOKEN_MAX - 1)
269                 token_string[token_len++] = c;
270 }
271
272 static inline void end_token(void) {
273         token_string[token_len] = '\0';
274 }
275
276 static int
277 lex(void)
278 {
279         int     c;
280
281         token_len = 0;
282         for (;;) {
283                 c = lexc();
284                 if (lex_class & ENDOFFILE)
285                         return AO_LISP_NIL;
286
287                 if (lex_class & WHITE)
288                         continue;
289
290                 if (lex_class & COMMENT) {
291                         while ((c = lexc()) != '\n') {
292                                 if (lex_class & ENDOFFILE)
293                                         return AO_LISP_NIL;
294                         }
295                         continue;
296                 }
297
298                 if (lex_class & (BRA|KET|QUOTEC)) {
299                         add_token(c);
300                         end_token();
301                         switch (c) {
302                         case '(':
303                                 return OPEN;
304                         case ')':
305                                 return CLOSE;
306                         case '\'':
307                                 return QUOTE;
308                         }
309                 }
310                 if (lex_class & TWIDDLE) {
311                         token_int = lexc();
312                         return NUM;
313                 }
314                 if (lex_class & STRINGC) {
315                         for (;;) {
316                                 c = lexc();
317                                 if (lex_class & (STRINGC|ENDOFFILE)) {
318                                         end_token();
319                                         return STRING;
320                                 }
321                                 add_token(c);
322                         }
323                 }
324                 if (lex_class & PRINTABLE) {
325                         int     isnum;
326                         int     hasdigit;
327                         int     isneg;
328
329                         isnum = 1;
330                         hasdigit = 0;
331                         token_int = 0;
332                         isneg = 0;
333                         for (;;) {
334                                 if (!(lex_class & NUMBER)) {
335                                         isnum = 0;
336                                 } else {
337                                         if (token_len != 0 &&
338                                             (lex_class & SIGN))
339                                         {
340                                                 isnum = 0;
341                                         }
342                                         if (c == '-')
343                                                 isneg = 1;
344                                         if (lex_class & DIGIT) {
345                                                 hasdigit = 1;
346                                                 if (isnum)
347                                                         token_int = token_int * 10 + c - '0';
348                                         }
349                                 }
350                                 add_token (c);
351                                 c = lexc ();
352                                 if (lex_class & (NOTNAME)) {
353 //                                      if (lex_class & ENDOFFILE)
354 //                                              clearerr (f);
355                                         lex_unget(c);
356                                         end_token ();
357                                         if (isnum && hasdigit) {
358                                                 if (isneg)
359                                                         token_int = -token_int;
360                                                 return NUM;
361                                         }
362                                         return NAME;
363                                 }
364                         }
365
366                 }
367         }
368 }
369
370 static int parse_token;
371 static uint8_t                  been_here;
372 static struct ao_lisp_cons      *read_cons;
373 static struct ao_lisp_cons      *read_cons_tail;
374 static struct ao_lisp_cons      *read_stack;
375
376 static int
377 push_read_stack(int cons, int in_quote)
378 {
379         if (cons) {
380                 read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(read_cons),
381                                                ao_lisp_cons_cons(ao_lisp_int_poly(in_quote),
382                                                                  read_stack));
383                 if (!read_stack)
384                         return 0;
385         }
386         read_cons = NULL;
387         read_cons_tail = NULL;
388         return 1;
389 }
390
391 static int
392 pop_read_stack(int cons)
393 {
394         int     in_quote = 0;
395         if (cons) {
396                 read_cons = ao_lisp_poly_cons(read_stack->car);
397                 read_stack = ao_lisp_poly_cons(read_stack->cdr);
398                 in_quote = ao_lisp_poly_int(read_stack->car);
399                 read_stack = ao_lisp_poly_cons(read_stack->cdr);
400                 for (read_cons_tail = read_cons;
401                      read_cons_tail && read_cons_tail->cdr;
402                      read_cons_tail = ao_lisp_poly_cons(read_cons_tail->cdr))
403                         ;
404         } else {
405                 read_cons = 0;
406                 read_cons_tail = 0;
407                 read_stack = 0;
408         }
409         return in_quote;
410 }
411
412 ao_poly
413 ao_lisp_read(void)
414 {
415         struct ao_lisp_atom     *atom;
416         char                    *string;
417         int                     cons;
418         int                     in_quote;
419         ao_poly                 v;
420
421         if (!been_here) {
422                 ao_lisp_root_add(&ao_lisp_cons_type, &read_cons);
423                 ao_lisp_root_add(&ao_lisp_cons_type, &read_cons_tail);
424                 ao_lisp_root_add(&ao_lisp_cons_type, &read_stack);
425                 been_here = 1;
426         }
427         parse_token = lex();
428
429         cons = 0;
430         in_quote = 0;
431         read_cons = read_cons_tail = read_stack = 0;
432         for (;;) {
433                 while (parse_token == OPEN) {
434                         if (!push_read_stack(cons, in_quote))
435                                 return AO_LISP_NIL;
436                         cons++;
437                         in_quote = 0;
438                         parse_token = lex();
439                 }
440
441                 switch (parse_token) {
442                 case ENDOFFILE:
443                 default:
444                         v = AO_LISP_NIL;
445                         break;
446                 case NAME:
447                         atom = ao_lisp_atom_intern(token_string);
448                         if (atom)
449                                 v = ao_lisp_atom_poly(atom);
450                         else
451                                 v = AO_LISP_NIL;
452                         break;
453                 case NUM:
454                         v = ao_lisp_int_poly(token_int);
455                         break;
456                 case STRING:
457                         string = ao_lisp_string_copy(token_string);
458                         if (string)
459                                 v = ao_lisp_string_poly(string);
460                         else
461                                 v = AO_LISP_NIL;
462                         break;
463                 case QUOTE:
464                         if (!push_read_stack(cons, in_quote))
465                                 return AO_LISP_NIL;
466                         cons++;
467                         in_quote = 1;
468                         v = _ao_lisp_atom_quote;
469                         break;
470                 case CLOSE:
471                         if (!cons) {
472                                 v = AO_LISP_NIL;
473                                 break;
474                         }
475                         v = ao_lisp_cons_poly(read_cons);
476                         --cons;
477                         in_quote = pop_read_stack(cons);
478                         break;
479                 }
480
481                 /* loop over QUOTE ends */
482                 for (;;) {
483                         if (!cons)
484                                 return v;
485
486                         struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, NULL);
487                         if (!read)
488                                 return AO_LISP_NIL;
489
490                         if (read_cons_tail)
491                                 read_cons_tail->cdr = ao_lisp_cons_poly(read);
492                         else
493                                 read_cons = read;
494                         read_cons_tail = read;
495
496                         if (!in_quote || !read_cons->cdr)
497                                 break;
498
499                         v = ao_lisp_cons_poly(read_cons);
500                         --cons;
501                         in_quote = pop_read_stack(cons);
502                 }
503
504                 parse_token = lex();
505         }
506         return v;
507 }