5115f46e28af8b989c3ca199f852298c4ae70cb6
[fw/altos] / src / lisp / ao_lisp_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_lisp.h"
16 #include "ao_lisp_read.h"
17
18 static const uint16_t   lex_classes[128] = {
19         IGNORE,         /* ^@ */
20         IGNORE,         /* ^A */
21         IGNORE,         /* ^B */
22         IGNORE,         /* ^C */
23         IGNORE,         /* ^D */
24         IGNORE,         /* ^E */
25         IGNORE,         /* ^F */
26         IGNORE,         /* ^G */
27         IGNORE,         /* ^H */
28         WHITE,          /* ^I */
29         WHITE,          /* ^J */
30         WHITE,          /* ^K */
31         WHITE,          /* ^L */
32         WHITE,          /* ^M */
33         IGNORE,         /* ^N */
34         IGNORE,         /* ^O */
35         IGNORE,         /* ^P */
36         IGNORE,         /* ^Q */
37         IGNORE,         /* ^R */
38         IGNORE,         /* ^S */
39         IGNORE,         /* ^T */
40         IGNORE,         /* ^U */
41         IGNORE,         /* ^V */
42         IGNORE,         /* ^W */
43         IGNORE,         /* ^X */
44         IGNORE,         /* ^Y */
45         IGNORE,         /* ^Z */
46         IGNORE,         /* ^[ */
47         IGNORE,         /* ^\ */
48         IGNORE,         /* ^] */
49         IGNORE,         /* ^^ */
50         IGNORE,         /* ^_ */
51         PRINTABLE|WHITE,        /*    */
52         PRINTABLE,              /* ! */
53         PRINTABLE|STRINGC,      /* " */
54         PRINTABLE|POUND,        /* # */
55         PRINTABLE,              /* $ */
56         PRINTABLE,              /* % */
57         PRINTABLE,              /* & */
58         PRINTABLE|SPECIAL,      /* ' */
59         PRINTABLE|SPECIAL,      /* ( */
60         PRINTABLE|SPECIAL,      /* ) */
61         PRINTABLE,              /* * */
62         PRINTABLE|SIGN,         /* + */
63         PRINTABLE,              /* , */
64         PRINTABLE|SIGN,         /* - */
65         PRINTABLE|SPECIAL,      /* . */
66         PRINTABLE,              /* / */
67         PRINTABLE|DIGIT,        /* 0 */
68         PRINTABLE|DIGIT,        /* 1 */
69         PRINTABLE|DIGIT,        /* 2 */
70         PRINTABLE|DIGIT,        /* 3 */
71         PRINTABLE|DIGIT,        /* 4 */
72         PRINTABLE|DIGIT,        /* 5 */
73         PRINTABLE|DIGIT,        /* 6 */
74         PRINTABLE|DIGIT,        /* 7 */
75         PRINTABLE|DIGIT,        /* 8 */
76         PRINTABLE|DIGIT,        /* 9 */
77         PRINTABLE,              /* : */
78         PRINTABLE|COMMENT,      /* ; */
79         PRINTABLE,              /* < */
80         PRINTABLE,              /* = */
81         PRINTABLE,              /* > */
82         PRINTABLE,              /* ? */
83         PRINTABLE,              /*  @ */
84         PRINTABLE,              /*  A */
85         PRINTABLE,              /*  B */
86         PRINTABLE,              /*  C */
87         PRINTABLE,              /*  D */
88         PRINTABLE,              /*  E */
89         PRINTABLE,              /*  F */
90         PRINTABLE,              /*  G */
91         PRINTABLE,              /*  H */
92         PRINTABLE,              /*  I */
93         PRINTABLE,              /*  J */
94         PRINTABLE,              /*  K */
95         PRINTABLE,              /*  L */
96         PRINTABLE,              /*  M */
97         PRINTABLE,              /*  N */
98         PRINTABLE,              /*  O */
99         PRINTABLE,              /*  P */
100         PRINTABLE,              /*  Q */
101         PRINTABLE,              /*  R */
102         PRINTABLE,              /*  S */
103         PRINTABLE,              /*  T */
104         PRINTABLE,              /*  U */
105         PRINTABLE,              /*  V */
106         PRINTABLE,              /*  W */
107         PRINTABLE,              /*  X */
108         PRINTABLE,              /*  Y */
109         PRINTABLE,              /*  Z */
110         PRINTABLE,              /*  [ */
111         PRINTABLE|BACKSLASH,    /*  \ */
112         PRINTABLE,              /*  ] */
113         PRINTABLE,              /*  ^ */
114         PRINTABLE,              /*  _ */
115         PRINTABLE,              /*  ` */
116         PRINTABLE,              /*  a */
117         PRINTABLE,              /*  b */
118         PRINTABLE,              /*  c */
119         PRINTABLE,              /*  d */
120         PRINTABLE,              /*  e */
121         PRINTABLE,              /*  f */
122         PRINTABLE,              /*  g */
123         PRINTABLE,              /*  h */
124         PRINTABLE,              /*  i */
125         PRINTABLE,              /*  j */
126         PRINTABLE,              /*  k */
127         PRINTABLE,              /*  l */
128         PRINTABLE,              /*  m */
129         PRINTABLE,              /*  n */
130         PRINTABLE,              /*  o */
131         PRINTABLE,              /*  p */
132         PRINTABLE,              /*  q */
133         PRINTABLE,              /*  r */
134         PRINTABLE,              /*  s */
135         PRINTABLE,              /*  t */
136         PRINTABLE,              /*  u */
137         PRINTABLE,              /*  v */
138         PRINTABLE,              /*  w */
139         PRINTABLE,              /*  x */
140         PRINTABLE,              /*  y */
141         PRINTABLE,              /*  z */
142         PRINTABLE,              /*  { */
143         PRINTABLE|VBAR,         /*  | */
144         PRINTABLE,              /*  } */
145         PRINTABLE,              /*  ~ */
146         IGNORE,                 /*  ^? */
147 };
148
149 static int lex_unget_c;
150
151 static inline int
152 lex_get()
153 {
154         int     c;
155         if (lex_unget_c) {
156                 c = lex_unget_c;
157                 lex_unget_c = 0;
158         } else {
159                 c = ao_lisp_getc();
160         }
161         return c;
162 }
163
164 static inline void
165 lex_unget(int c)
166 {
167         if (c != EOF)
168                 lex_unget_c = c;
169 }
170
171 static uint16_t lex_class;
172
173 static int
174 lexc(void)
175 {
176         int     c;
177         do {
178                 c = lex_get();
179                 if (c == EOF) {
180                         c = 0;
181                         lex_class = ENDOFFILE;
182                 } else {
183                         c &= 0x7f;
184                         lex_class = lex_classes[c];
185                 }
186         } while (lex_class & IGNORE);
187         return c;
188 }
189
190 static int
191 lex_quoted(void)
192 {
193         int     c;
194         int     v;
195         int     count;
196
197         c = lex_get();
198         if (c == EOF) {
199                 lex_class = ENDOFFILE;
200                 return 0;
201         }
202         lex_class = 0;
203         c &= 0x7f;
204         switch (c) {
205         case 'n':
206                 return '\n';
207         case 'f':
208                 return '\f';
209         case 'b':
210                 return '\b';
211         case 'r':
212                 return '\r';
213         case 'v':
214                 return '\v';
215         case 't':
216                 return '\t';
217         case '0':
218         case '1':
219         case '2':
220         case '3':
221         case '4':
222         case '5':
223         case '6':
224         case '7':
225                 v = c - '0';
226                 count = 1;
227                 while (count <= 3) {
228                         c = lex_get();
229                         if (c == EOF)
230                                 return EOF;
231                         c &= 0x7f;
232                         if (c < '0' || '7' < c) {
233                                 lex_unget(c);
234                                 break;
235                         }
236                         v = (v << 3) + c - '0';
237                         ++count;
238                 }
239                 return v;
240         default:
241                 return c;
242         }
243 }
244
245 #define AO_LISP_TOKEN_MAX       32
246
247 static char     token_string[AO_LISP_TOKEN_MAX];
248 static int32_t  token_int;
249 static int      token_len;
250
251 static inline void add_token(int c) {
252         if (c && token_len < AO_LISP_TOKEN_MAX - 1)
253                 token_string[token_len++] = c;
254 }
255
256 static inline void end_token(void) {
257         token_string[token_len] = '\0';
258 }
259
260 static int
261 _lex(void)
262 {
263         int     c;
264
265         token_len = 0;
266         for (;;) {
267                 c = lexc();
268                 if (lex_class & ENDOFFILE)
269                         return END;
270
271                 if (lex_class & WHITE)
272                         continue;
273
274                 if (lex_class & COMMENT) {
275                         while ((c = lexc()) != '\n') {
276                                 if (lex_class & ENDOFFILE)
277                                         return END;
278                         }
279                         continue;
280                 }
281
282                 if (lex_class & SPECIAL) {
283                         add_token(c);
284                         end_token();
285                         switch (c) {
286                         case '(':
287                         case '[':
288                                 return OPEN;
289                         case ')':
290                         case ']':
291                                 return CLOSE;
292                         case '\'':
293                                 return QUOTE;
294                         case '.':
295                                 return DOT;
296                         }
297                 }
298                 if (lex_class & POUND) {
299                         c = lexc();
300                         switch (c) {
301                         case 't':
302                                 add_token(c);
303                                 end_token();
304                                 return BOOL;
305                         case 'f':
306                                 add_token(c);
307                                 end_token();
308                                 return BOOL;
309                         case '\\':
310                                 for (;;) {
311                                         int alphabetic;
312                                         c = lexc();
313                                         alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
314                                         if (token_len == 0) {
315                                                 add_token(c);
316                                                 if (!alphabetic)
317                                                         break;
318                                         } else {
319                                                 if (alphabetic)
320                                                         add_token(c);
321                                                 else {
322                                                         lex_unget(c);
323                                                         break;
324                                                 }
325                                         }
326                                 }
327                                 end_token();
328                                 if (token_len == 1)
329                                         token_int = token_string[0];
330                                 else if (!strcmp(token_string, "space"))
331                                         token_int = ' ';
332                                 else if (!strcmp(token_string, "newline"))
333                                         token_int = '\n';
334                                 else if (!strcmp(token_string, "tab"))
335                                         token_int = '\t';
336                                 else if (!strcmp(token_string, "return"))
337                                         token_int = '\r';
338                                 else if (!strcmp(token_string, "formfeed"))
339                                         token_int = '\f';
340                                 else {
341                                         ao_lisp_error(AO_LISP_INVALID, "invalid character token #\\%s", token_string);
342                                         continue;
343                                 }
344                                 return NUM;
345                         }
346                 }
347                 if (lex_class & STRINGC) {
348                         for (;;) {
349                                 c = lexc();
350                                 if (lex_class & BACKSLASH)
351                                         c = lex_quoted();
352                                 if (lex_class & (STRINGC|ENDOFFILE)) {
353                                         end_token();
354                                         return STRING;
355                                 }
356                                 add_token(c);
357                         }
358                 }
359                 if (lex_class & PRINTABLE) {
360                         int     isnum;
361                         int     hasdigit;
362                         int     isneg;
363
364                         isnum = 1;
365                         hasdigit = 0;
366                         token_int = 0;
367                         isneg = 0;
368                         for (;;) {
369                                 if (!(lex_class & NUMBER)) {
370                                         isnum = 0;
371                                 } else {
372                                         if (token_len != 0 &&
373                                             (lex_class & SIGN))
374                                         {
375                                                 isnum = 0;
376                                         }
377                                         if (c == '-')
378                                                 isneg = 1;
379                                         if (lex_class & DIGIT) {
380                                                 hasdigit = 1;
381                                                 if (isnum)
382                                                         token_int = token_int * 10 + c - '0';
383                                         }
384                                 }
385                                 add_token (c);
386                                 c = lexc ();
387                                 if (lex_class & (NOTNAME)) {
388 //                                      if (lex_class & ENDOFFILE)
389 //                                              clearerr (f);
390                                         lex_unget(c);
391                                         end_token ();
392                                         if (isnum && hasdigit) {
393                                                 if (isneg)
394                                                         token_int = -token_int;
395                                                 return NUM;
396                                         }
397                                         return NAME;
398                                 }
399                         }
400
401                 }
402         }
403 }
404
405 static inline int lex(void)
406 {
407         int     parse_token = _lex();
408         DBGI("token %d (%s)\n", parse_token, token_string);
409         return parse_token;
410 }
411
412 static int parse_token;
413
414 struct ao_lisp_cons     *ao_lisp_read_cons;
415 struct ao_lisp_cons     *ao_lisp_read_cons_tail;
416 struct ao_lisp_cons     *ao_lisp_read_stack;
417
418 #define READ_IN_QUOTE   0x01
419 #define READ_SAW_DOT    0x02
420 #define READ_DONE_DOT   0x04
421
422 static int
423 push_read_stack(int cons, int read_state)
424 {
425         DBGI("push read stack %p 0x%x\n", ao_lisp_read_cons, read_state);
426         DBG_IN();
427         if (cons) {
428                 ao_lisp_read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(ao_lisp_read_cons),
429                                                        ao_lisp__cons(ao_lisp_int_poly(read_state),
430                                                                      ao_lisp_cons_poly(ao_lisp_read_stack)));
431                 if (!ao_lisp_read_stack)
432                         return 0;
433         }
434         ao_lisp_read_cons = NULL;
435         ao_lisp_read_cons_tail = NULL;
436         return 1;
437 }
438
439 static int
440 pop_read_stack(int cons)
441 {
442         int     read_state = 0;
443         if (cons) {
444                 ao_lisp_read_cons = ao_lisp_poly_cons(ao_lisp_read_stack->car);
445                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
446                 read_state = ao_lisp_poly_int(ao_lisp_read_stack->car);
447                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
448                 for (ao_lisp_read_cons_tail = ao_lisp_read_cons;
449                      ao_lisp_read_cons_tail && ao_lisp_read_cons_tail->cdr;
450                      ao_lisp_read_cons_tail = ao_lisp_poly_cons(ao_lisp_read_cons_tail->cdr))
451                         ;
452         } else {
453                 ao_lisp_read_cons = 0;
454                 ao_lisp_read_cons_tail = 0;
455                 ao_lisp_read_stack = 0;
456         }
457         DBG_OUT();
458         DBGI("pop read stack %p %d\n", ao_lisp_read_cons, read_state);
459         return read_state;
460 }
461
462 ao_poly
463 ao_lisp_read(void)
464 {
465         struct ao_lisp_atom     *atom;
466         char                    *string;
467         int                     cons;
468         int                     read_state;
469         ao_poly                 v;
470
471
472         cons = 0;
473         read_state = 0;
474         ao_lisp_read_cons = ao_lisp_read_cons_tail = ao_lisp_read_stack = 0;
475         for (;;) {
476                 parse_token = lex();
477                 while (parse_token == OPEN) {
478                         if (!push_read_stack(cons, read_state))
479                                 return AO_LISP_NIL;
480                         cons++;
481                         read_state = 0;
482                         parse_token = lex();
483                 }
484
485                 switch (parse_token) {
486                 case END:
487                 default:
488                         if (cons)
489                                 ao_lisp_error(AO_LISP_EOF, "unexpected end of file");
490                         return _ao_lisp_atom_eof;
491                         break;
492                 case NAME:
493                         atom = ao_lisp_atom_intern(token_string);
494                         if (atom)
495                                 v = ao_lisp_atom_poly(atom);
496                         else
497                                 v = AO_LISP_NIL;
498                         break;
499                 case NUM:
500                         v = ao_lisp_integer_poly(token_int);
501                         break;
502                 case BOOL:
503                         if (token_string[0] == 't')
504                                 v = _ao_lisp_bool_true;
505                         else
506                                 v = _ao_lisp_bool_false;
507                         break;
508                 case STRING:
509                         string = ao_lisp_string_copy(token_string);
510                         if (string)
511                                 v = ao_lisp_string_poly(string);
512                         else
513                                 v = AO_LISP_NIL;
514                         break;
515                 case QUOTE:
516                         if (!push_read_stack(cons, read_state))
517                                 return AO_LISP_NIL;
518                         cons++;
519                         read_state = READ_IN_QUOTE;
520                         v = _ao_lisp_atom_quote;
521                         break;
522                 case CLOSE:
523                         if (!cons) {
524                                 v = AO_LISP_NIL;
525                                 break;
526                         }
527                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
528                         --cons;
529                         read_state = pop_read_stack(cons);
530                         break;
531                 case DOT:
532                         if (!cons) {
533                                 ao_lisp_error(AO_LISP_INVALID, ". outside of cons");
534                                 return AO_LISP_NIL;
535                         }
536                         if (!ao_lisp_read_cons) {
537                                 ao_lisp_error(AO_LISP_INVALID, ". first in cons");
538                                 return AO_LISP_NIL;
539                         }
540                         read_state |= READ_SAW_DOT;
541                         continue;
542                 }
543
544                 /* loop over QUOTE ends */
545                 for (;;) {
546                         if (!cons)
547                                 return v;
548
549                         if (read_state & READ_DONE_DOT) {
550                                 ao_lisp_error(AO_LISP_INVALID, ". not last in cons");
551                                 return AO_LISP_NIL;
552                         }
553
554                         if (read_state & READ_SAW_DOT) {
555                                 read_state |= READ_DONE_DOT;
556                                 ao_lisp_read_cons_tail->cdr = v;
557                         } else {
558                                 struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, AO_LISP_NIL);
559                                 if (!read)
560                                         return AO_LISP_NIL;
561
562                                 if (ao_lisp_read_cons_tail)
563                                         ao_lisp_read_cons_tail->cdr = ao_lisp_cons_poly(read);
564                                 else
565                                         ao_lisp_read_cons = read;
566                                 ao_lisp_read_cons_tail = read;
567                         }
568
569                         if (!(read_state & READ_IN_QUOTE) || !ao_lisp_read_cons->cdr)
570                                 break;
571
572                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
573                         --cons;
574                         read_state = pop_read_stack(cons);
575                 }
576         }
577         return v;
578 }