550f62c2c19d94c3c5431ea2fa0eb0fedf1a330e
[fw/altos] / src / lisp / ao_lisp_read.c
1 /*
2  * Copyright © 2016 Keith Packard <keithp@keithp.com>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include "ao_lisp.h"
16 #include "ao_lisp_read.h"
17
18 static const uint16_t   lex_classes[128] = {
19         IGNORE,         /* ^@ */
20         IGNORE,         /* ^A */
21         IGNORE,         /* ^B */
22         IGNORE,         /* ^C */
23         IGNORE,         /* ^D */
24         IGNORE,         /* ^E */
25         IGNORE,         /* ^F */
26         IGNORE,         /* ^G */
27         IGNORE,         /* ^H */
28         WHITE,          /* ^I */
29         WHITE,          /* ^J */
30         WHITE,          /* ^K */
31         WHITE,          /* ^L */
32         WHITE,          /* ^M */
33         IGNORE,         /* ^N */
34         IGNORE,         /* ^O */
35         IGNORE,         /* ^P */
36         IGNORE,         /* ^Q */
37         IGNORE,         /* ^R */
38         IGNORE,         /* ^S */
39         IGNORE,         /* ^T */
40         IGNORE,         /* ^U */
41         IGNORE,         /* ^V */
42         IGNORE,         /* ^W */
43         IGNORE,         /* ^X */
44         IGNORE,         /* ^Y */
45         IGNORE,         /* ^Z */
46         IGNORE,         /* ^[ */
47         IGNORE,         /* ^\ */
48         IGNORE,         /* ^] */
49         IGNORE,         /* ^^ */
50         IGNORE,         /* ^_ */
51         PRINTABLE|WHITE,        /*    */
52         PRINTABLE,              /* ! */
53         PRINTABLE|STRINGC,      /* " */
54         PRINTABLE|COMMENT,      /* # */
55         PRINTABLE,              /* $ */
56         PRINTABLE,              /* % */
57         PRINTABLE,              /* & */
58         PRINTABLE|QUOTEC,       /* ' */
59         PRINTABLE|BRA,          /* ( */
60         PRINTABLE|KET,          /* ) */
61         PRINTABLE,              /* * */
62         PRINTABLE|SIGN,         /* + */
63         PRINTABLE,              /* , */
64         PRINTABLE|SIGN,         /* - */
65         PRINTABLE|DOTC,         /* . */
66         PRINTABLE,              /* / */
67         PRINTABLE|DIGIT,        /* 0 */
68         PRINTABLE|DIGIT,        /* 1 */
69         PRINTABLE|DIGIT,        /* 2 */
70         PRINTABLE|DIGIT,        /* 3 */
71         PRINTABLE|DIGIT,        /* 4 */
72         PRINTABLE|DIGIT,        /* 5 */
73         PRINTABLE|DIGIT,        /* 6 */
74         PRINTABLE|DIGIT,        /* 7 */
75         PRINTABLE|DIGIT,        /* 8 */
76         PRINTABLE|DIGIT,        /* 9 */
77         PRINTABLE,              /* : */
78         PRINTABLE|COMMENT,      /* ; */
79         PRINTABLE,              /* < */
80         PRINTABLE,              /* = */
81         PRINTABLE,              /* > */
82         PRINTABLE,              /* ? */
83         PRINTABLE,              /*  @ */
84         PRINTABLE,              /*  A */
85         PRINTABLE,              /*  B */
86         PRINTABLE,              /*  C */
87         PRINTABLE,              /*  D */
88         PRINTABLE,              /*  E */
89         PRINTABLE,              /*  F */
90         PRINTABLE,              /*  G */
91         PRINTABLE,              /*  H */
92         PRINTABLE,              /*  I */
93         PRINTABLE,              /*  J */
94         PRINTABLE,              /*  K */
95         PRINTABLE,              /*  L */
96         PRINTABLE,              /*  M */
97         PRINTABLE,              /*  N */
98         PRINTABLE,              /*  O */
99         PRINTABLE,              /*  P */
100         PRINTABLE,              /*  Q */
101         PRINTABLE,              /*  R */
102         PRINTABLE,              /*  S */
103         PRINTABLE,              /*  T */
104         PRINTABLE,              /*  U */
105         PRINTABLE,              /*  V */
106         PRINTABLE,              /*  W */
107         PRINTABLE,              /*  X */
108         PRINTABLE,              /*  Y */
109         PRINTABLE,              /*  Z */
110         PRINTABLE,              /*  [ */
111         PRINTABLE|BACKSLASH,    /*  \ */
112         PRINTABLE,              /*  ] */
113         PRINTABLE,              /*  ^ */
114         PRINTABLE,              /*  _ */
115         PRINTABLE,              /*  ` */
116         PRINTABLE,              /*  a */
117         PRINTABLE,              /*  b */
118         PRINTABLE,              /*  c */
119         PRINTABLE,              /*  d */
120         PRINTABLE,              /*  e */
121         PRINTABLE,              /*  f */
122         PRINTABLE,              /*  g */
123         PRINTABLE,              /*  h */
124         PRINTABLE,              /*  i */
125         PRINTABLE,              /*  j */
126         PRINTABLE,              /*  k */
127         PRINTABLE,              /*  l */
128         PRINTABLE,              /*  m */
129         PRINTABLE,              /*  n */
130         PRINTABLE,              /*  o */
131         PRINTABLE,              /*  p */
132         PRINTABLE,              /*  q */
133         PRINTABLE,              /*  r */
134         PRINTABLE,              /*  s */
135         PRINTABLE,              /*  t */
136         PRINTABLE,              /*  u */
137         PRINTABLE,              /*  v */
138         PRINTABLE,              /*  w */
139         PRINTABLE,              /*  x */
140         PRINTABLE,              /*  y */
141         PRINTABLE,              /*  z */
142         PRINTABLE,              /*  { */
143         PRINTABLE|VBAR,         /*  | */
144         PRINTABLE,              /*  } */
145         PRINTABLE|TWIDDLE,      /*  ~ */
146         IGNORE,                 /*  ^? */
147 };
148
149 static int lex_unget_c;
150
151 static inline int
152 lex_get()
153 {
154         int     c;
155         if (lex_unget_c) {
156                 c = lex_unget_c;
157                 lex_unget_c = 0;
158         } else {
159                 c = ao_lisp_getc();
160         }
161         return c;
162 }
163
164 static inline void
165 lex_unget(int c)
166 {
167         if (c != EOF)
168                 lex_unget_c = c;
169 }
170
171 static int
172 lex_quoted (void)
173 {
174         int     c;
175         int     v;
176         int     count;
177
178         c = lex_get();
179         if (c == EOF)
180                 return EOF;
181         c &= 0x7f;
182         switch (c) {
183         case 'n':
184                 return '\n';
185         case 'f':
186                 return '\f';
187         case 'b':
188                 return '\b';
189         case 'r':
190                 return '\r';
191         case 'v':
192                 return '\v';
193         case 't':
194                 return '\t';
195         case '0':
196         case '1':
197         case '2':
198         case '3':
199         case '4':
200         case '5':
201         case '6':
202         case '7':
203                 v = c - '0';
204                 count = 1;
205                 while (count <= 3) {
206                         c = lex_get();
207                         if (c == EOF)
208                                 return EOF;
209                         c &= 0x7f;
210                         if (c < '0' || '7' < c) {
211                                 lex_unget(c);
212                                 break;
213                         }
214                         v = (v << 3) + c - '0';
215                         ++count;
216                 }
217                 return v;
218         default:
219                 return c;
220         }
221 }
222
223 static uint16_t lex_class;
224
225 static int
226 lexc(void)
227 {
228         int     c;
229         do {
230                 c = lex_get();
231                 if (c == EOF) {
232                         lex_class = ENDOFFILE;
233                         c = 0;
234                 } else {
235                         c &= 0x7f;
236                         lex_class = lex_classes[c];
237                         if (lex_class & BACKSLASH) {
238                                 c = lex_quoted();
239                                 if (c == EOF)
240                                         lex_class = ENDOFFILE;
241                                 else
242                                         lex_class = PRINTABLE;
243                         }
244                 }
245         } while (lex_class & IGNORE);
246         return c;
247 }
248
249 #define AO_LISP_TOKEN_MAX       32
250
251 static char     token_string[AO_LISP_TOKEN_MAX];
252 static int      token_int;
253 static int      token_len;
254
255 static inline void add_token(int c) {
256         if (c && token_len < AO_LISP_TOKEN_MAX - 1)
257                 token_string[token_len++] = c;
258 }
259
260 static inline void end_token(void) {
261         token_string[token_len] = '\0';
262 }
263
264 static int
265 _lex(void)
266 {
267         int     c;
268
269         token_len = 0;
270         for (;;) {
271                 c = lexc();
272                 if (lex_class & ENDOFFILE)
273                         return END;
274
275                 if (lex_class & WHITE)
276                         continue;
277
278                 if (lex_class & COMMENT) {
279                         while ((c = lexc()) != '\n') {
280                                 if (lex_class & ENDOFFILE)
281                                         return END;
282                         }
283                         continue;
284                 }
285
286                 if (lex_class & (BRA|KET|QUOTEC)) {
287                         add_token(c);
288                         end_token();
289                         switch (c) {
290                         case '(':
291                                 return OPEN;
292                         case ')':
293                                 return CLOSE;
294                         case '\'':
295                                 return QUOTE;
296                         }
297                 }
298                 if (lex_class & (DOTC)) {
299                         add_token(c);
300                         end_token();
301                         return DOT;
302                 }
303                 if (lex_class & TWIDDLE) {
304                         token_int = lexc();
305                         return NUM;
306                 }
307                 if (lex_class & STRINGC) {
308                         for (;;) {
309                                 c = lexc();
310                                 if (lex_class & (STRINGC|ENDOFFILE)) {
311                                         end_token();
312                                         return STRING;
313                                 }
314                                 add_token(c);
315                         }
316                 }
317                 if (lex_class & PRINTABLE) {
318                         int     isnum;
319                         int     hasdigit;
320                         int     isneg;
321
322                         isnum = 1;
323                         hasdigit = 0;
324                         token_int = 0;
325                         isneg = 0;
326                         for (;;) {
327                                 if (!(lex_class & NUMBER)) {
328                                         isnum = 0;
329                                 } else {
330                                         if (token_len != 0 &&
331                                             (lex_class & SIGN))
332                                         {
333                                                 isnum = 0;
334                                         }
335                                         if (c == '-')
336                                                 isneg = 1;
337                                         if (lex_class & DIGIT) {
338                                                 hasdigit = 1;
339                                                 if (isnum)
340                                                         token_int = token_int * 10 + c - '0';
341                                         }
342                                 }
343                                 add_token (c);
344                                 c = lexc ();
345                                 if (lex_class & (NOTNAME)) {
346 //                                      if (lex_class & ENDOFFILE)
347 //                                              clearerr (f);
348                                         lex_unget(c);
349                                         end_token ();
350                                         if (isnum && hasdigit) {
351                                                 if (isneg)
352                                                         token_int = -token_int;
353                                                 return NUM;
354                                         }
355                                         return NAME;
356                                 }
357                         }
358
359                 }
360         }
361 }
362
363 static inline int lex(void)
364 {
365         int     parse_token = _lex();
366         DBGI("token %d (%s)\n", parse_token, token_string);
367         return parse_token;
368 }
369
370 static int parse_token;
371
372 struct ao_lisp_cons     *ao_lisp_read_cons;
373 struct ao_lisp_cons     *ao_lisp_read_cons_tail;
374 struct ao_lisp_cons     *ao_lisp_read_stack;
375
376 #define READ_IN_QUOTE   0x01
377 #define READ_SAW_DOT    0x02
378 #define READ_DONE_DOT   0x04
379
380 static int
381 push_read_stack(int cons, int read_state)
382 {
383         DBGI("push read stack %p 0x%x\n", ao_lisp_read_cons, read_state);
384         DBG_IN();
385         if (cons) {
386                 ao_lisp_read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(ao_lisp_read_cons),
387                                                        ao_lisp__cons(ao_lisp_int_poly(read_state),
388                                                                      ao_lisp_cons_poly(ao_lisp_read_stack)));
389                 if (!ao_lisp_read_stack)
390                         return 0;
391         }
392         ao_lisp_read_cons = NULL;
393         ao_lisp_read_cons_tail = NULL;
394         return 1;
395 }
396
397 static int
398 pop_read_stack(int cons)
399 {
400         int     read_state = 0;
401         if (cons) {
402                 ao_lisp_read_cons = ao_lisp_poly_cons(ao_lisp_read_stack->car);
403                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
404                 read_state = ao_lisp_poly_int(ao_lisp_read_stack->car);
405                 ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
406                 for (ao_lisp_read_cons_tail = ao_lisp_read_cons;
407                      ao_lisp_read_cons_tail && ao_lisp_read_cons_tail->cdr;
408                      ao_lisp_read_cons_tail = ao_lisp_poly_cons(ao_lisp_read_cons_tail->cdr))
409                         ;
410         } else {
411                 ao_lisp_read_cons = 0;
412                 ao_lisp_read_cons_tail = 0;
413                 ao_lisp_read_stack = 0;
414         }
415         DBG_OUT();
416         DBGI("pop read stack %p %d\n", ao_lisp_read_cons, read_state);
417         return read_state;
418 }
419
420 ao_poly
421 ao_lisp_read(void)
422 {
423         struct ao_lisp_atom     *atom;
424         char                    *string;
425         int                     cons;
426         int                     read_state;
427         ao_poly                 v;
428
429
430         cons = 0;
431         read_state = 0;
432         ao_lisp_read_cons = ao_lisp_read_cons_tail = ao_lisp_read_stack = 0;
433         for (;;) {
434                 parse_token = lex();
435                 while (parse_token == OPEN) {
436                         if (!push_read_stack(cons, read_state))
437                                 return AO_LISP_NIL;
438                         cons++;
439                         read_state = 0;
440                         parse_token = lex();
441                 }
442
443                 switch (parse_token) {
444                 case END:
445                 default:
446                         if (cons)
447                                 ao_lisp_error(AO_LISP_EOF, "unexpected end of file");
448                         return _ao_lisp_atom_eof;
449                         break;
450                 case NAME:
451                         atom = ao_lisp_atom_intern(token_string);
452                         if (atom)
453                                 v = ao_lisp_atom_poly(atom);
454                         else
455                                 v = AO_LISP_NIL;
456                         break;
457                 case NUM:
458                         v = ao_lisp_int_poly(token_int);
459                         break;
460                 case STRING:
461                         string = ao_lisp_string_copy(token_string);
462                         if (string)
463                                 v = ao_lisp_string_poly(string);
464                         else
465                                 v = AO_LISP_NIL;
466                         break;
467                 case QUOTE:
468                         if (!push_read_stack(cons, read_state))
469                                 return AO_LISP_NIL;
470                         cons++;
471                         read_state |= READ_IN_QUOTE;
472                         v = _ao_lisp_atom_quote;
473                         break;
474                 case CLOSE:
475                         if (!cons) {
476                                 v = AO_LISP_NIL;
477                                 break;
478                         }
479                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
480                         --cons;
481                         read_state = pop_read_stack(cons);
482                         break;
483                 case DOT:
484                         if (!cons) {
485                                 ao_lisp_error(AO_LISP_INVALID, ". outside of cons");
486                                 return AO_LISP_NIL;
487                         }
488                         if (!ao_lisp_read_cons) {
489                                 ao_lisp_error(AO_LISP_INVALID, ". first in cons");
490                                 return AO_LISP_NIL;
491                         }
492                         read_state |= READ_SAW_DOT;
493                         continue;
494                 }
495
496                 /* loop over QUOTE ends */
497                 for (;;) {
498                         if (!cons)
499                                 return v;
500
501                         if (read_state & READ_DONE_DOT) {
502                                 ao_lisp_error(AO_LISP_INVALID, ". not last in cons");
503                                 return AO_LISP_NIL;
504                         }
505
506                         if (read_state & READ_SAW_DOT) {
507                                 read_state |= READ_DONE_DOT;
508                                 ao_lisp_read_cons_tail->cdr = v;
509                         } else {
510                                 struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, AO_LISP_NIL);
511                                 if (!read)
512                                         return AO_LISP_NIL;
513
514                                 if (ao_lisp_read_cons_tail)
515                                         ao_lisp_read_cons_tail->cdr = ao_lisp_cons_poly(read);
516                                 else
517                                         ao_lisp_read_cons = read;
518                                 ao_lisp_read_cons_tail = read;
519                         }
520
521                         if (!(read_state & READ_IN_QUOTE) || !ao_lisp_read_cons->cdr)
522                                 break;
523
524                         v = ao_lisp_cons_poly(ao_lisp_read_cons);
525                         --cons;
526                         read_state = pop_read_stack(cons);
527                 }
528         }
529         return v;
530 }