altos/scheme: Rename to 'scheme', clean up build
[fw/altos] / src / scheme / ao_scheme_read.c
diff --git a/src/scheme/ao_scheme_read.c b/src/scheme/ao_scheme_read.c
new file mode 100644 (file)
index 0000000..6b1e9d6
--- /dev/null
@@ -0,0 +1,655 @@
+/*
+ * Copyright © 2016 Keith Packard <keithp@keithp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "ao_scheme.h"
+#include "ao_scheme_read.h"
+#include <math.h>
+#include <stdlib.h>
+
+static const uint16_t  lex_classes[128] = {
+       IGNORE,         /* ^@ */
+       IGNORE,         /* ^A */
+       IGNORE,         /* ^B */
+       IGNORE,         /* ^C */
+       IGNORE,         /* ^D */
+       IGNORE,         /* ^E */
+       IGNORE,         /* ^F */
+       IGNORE,         /* ^G */
+       IGNORE,         /* ^H */
+       WHITE,          /* ^I */
+       WHITE,          /* ^J */
+       WHITE,          /* ^K */
+       WHITE,          /* ^L */
+       WHITE,          /* ^M */
+       IGNORE,         /* ^N */
+       IGNORE,         /* ^O */
+       IGNORE,         /* ^P */
+       IGNORE,         /* ^Q */
+       IGNORE,         /* ^R */
+       IGNORE,         /* ^S */
+       IGNORE,         /* ^T */
+       IGNORE,         /* ^U */
+       IGNORE,         /* ^V */
+       IGNORE,         /* ^W */
+       IGNORE,         /* ^X */
+       IGNORE,         /* ^Y */
+       IGNORE,         /* ^Z */
+       IGNORE,         /* ^[ */
+       IGNORE,         /* ^\ */
+       IGNORE,         /* ^] */
+       IGNORE,         /* ^^ */
+       IGNORE,         /* ^_ */
+       PRINTABLE|WHITE,        /*    */
+       PRINTABLE,              /* ! */
+       PRINTABLE|STRINGC,      /* " */
+       PRINTABLE|POUND,        /* # */
+       PRINTABLE,              /* $ */
+       PRINTABLE,              /* % */
+       PRINTABLE,              /* & */
+       PRINTABLE|SPECIAL,      /* ' */
+       PRINTABLE|SPECIAL,      /* ( */
+       PRINTABLE|SPECIAL,      /* ) */
+       PRINTABLE,              /* * */
+       PRINTABLE|SIGN,         /* + */
+       PRINTABLE|SPECIAL,      /* , */
+       PRINTABLE|SIGN,         /* - */
+       PRINTABLE|DOTC|FLOATC,  /* . */
+       PRINTABLE,              /* / */
+       PRINTABLE|DIGIT,        /* 0 */
+       PRINTABLE|DIGIT,        /* 1 */
+       PRINTABLE|DIGIT,        /* 2 */
+       PRINTABLE|DIGIT,        /* 3 */
+       PRINTABLE|DIGIT,        /* 4 */
+       PRINTABLE|DIGIT,        /* 5 */
+       PRINTABLE|DIGIT,        /* 6 */
+       PRINTABLE|DIGIT,        /* 7 */
+       PRINTABLE|DIGIT,        /* 8 */
+       PRINTABLE|DIGIT,        /* 9 */
+       PRINTABLE,              /* : */
+       PRINTABLE|COMMENT,      /* ; */
+       PRINTABLE,              /* < */
+       PRINTABLE,              /* = */
+       PRINTABLE,              /* > */
+       PRINTABLE,              /* ? */
+       PRINTABLE,              /*  @ */
+       PRINTABLE,              /*  A */
+       PRINTABLE,              /*  B */
+       PRINTABLE,              /*  C */
+       PRINTABLE,              /*  D */
+       PRINTABLE|FLOATC,       /*  E */
+       PRINTABLE,              /*  F */
+       PRINTABLE,              /*  G */
+       PRINTABLE,              /*  H */
+       PRINTABLE,              /*  I */
+       PRINTABLE,              /*  J */
+       PRINTABLE,              /*  K */
+       PRINTABLE,              /*  L */
+       PRINTABLE,              /*  M */
+       PRINTABLE,              /*  N */
+       PRINTABLE,              /*  O */
+       PRINTABLE,              /*  P */
+       PRINTABLE,              /*  Q */
+       PRINTABLE,              /*  R */
+       PRINTABLE,              /*  S */
+       PRINTABLE,              /*  T */
+       PRINTABLE,              /*  U */
+       PRINTABLE,              /*  V */
+       PRINTABLE,              /*  W */
+       PRINTABLE,              /*  X */
+       PRINTABLE,              /*  Y */
+       PRINTABLE,              /*  Z */
+       PRINTABLE,              /*  [ */
+       PRINTABLE|BACKSLASH,    /*  \ */
+       PRINTABLE,              /*  ] */
+       PRINTABLE,              /*  ^ */
+       PRINTABLE,              /*  _ */
+       PRINTABLE|SPECIAL,      /*  ` */
+       PRINTABLE,              /*  a */
+       PRINTABLE,              /*  b */
+       PRINTABLE,              /*  c */
+       PRINTABLE,              /*  d */
+       PRINTABLE|FLOATC,       /*  e */
+       PRINTABLE,              /*  f */
+       PRINTABLE,              /*  g */
+       PRINTABLE,              /*  h */
+       PRINTABLE,              /*  i */
+       PRINTABLE,              /*  j */
+       PRINTABLE,              /*  k */
+       PRINTABLE,              /*  l */
+       PRINTABLE,              /*  m */
+       PRINTABLE,              /*  n */
+       PRINTABLE,              /*  o */
+       PRINTABLE,              /*  p */
+       PRINTABLE,              /*  q */
+       PRINTABLE,              /*  r */
+       PRINTABLE,              /*  s */
+       PRINTABLE,              /*  t */
+       PRINTABLE,              /*  u */
+       PRINTABLE,              /*  v */
+       PRINTABLE,              /*  w */
+       PRINTABLE,              /*  x */
+       PRINTABLE,              /*  y */
+       PRINTABLE,              /*  z */
+       PRINTABLE,              /*  { */
+       PRINTABLE,              /*  | */
+       PRINTABLE,              /*  } */
+       PRINTABLE,              /*  ~ */
+       IGNORE,                 /*  ^? */
+};
+
+static int lex_unget_c;
+
+static inline int
+lex_get()
+{
+       int     c;
+       if (lex_unget_c) {
+               c = lex_unget_c;
+               lex_unget_c = 0;
+       } else {
+               c = ao_scheme_getc();
+       }
+       return c;
+}
+
+static inline void
+lex_unget(int c)
+{
+       if (c != EOF)
+               lex_unget_c = c;
+}
+
+static uint16_t        lex_class;
+
+static int
+lexc(void)
+{
+       int     c;
+       do {
+               c = lex_get();
+               if (c == EOF) {
+                       c = 0;
+                       lex_class = ENDOFFILE;
+               } else {
+                       c &= 0x7f;
+                       lex_class = lex_classes[c];
+               }
+       } while (lex_class & IGNORE);
+       return c;
+}
+
+static int
+lex_quoted(void)
+{
+       int     c;
+       int     v;
+       int     count;
+
+       c = lex_get();
+       if (c == EOF) {
+               lex_class = ENDOFFILE;
+               return 0;
+       }
+       lex_class = 0;
+       c &= 0x7f;
+       switch (c) {
+       case 'n':
+               return '\n';
+       case 'f':
+               return '\f';
+       case 'b':
+               return '\b';
+       case 'r':
+               return '\r';
+       case 'v':
+               return '\v';
+       case 't':
+               return '\t';
+       case '0':
+       case '1':
+       case '2':
+       case '3':
+       case '4':
+       case '5':
+       case '6':
+       case '7':
+               v = c - '0';
+               count = 1;
+               while (count <= 3) {
+                       c = lex_get();
+                       if (c == EOF)
+                               return EOF;
+                       c &= 0x7f;
+                       if (c < '0' || '7' < c) {
+                               lex_unget(c);
+                               break;
+                       }
+                       v = (v << 3) + c - '0';
+                       ++count;
+               }
+               return v;
+       default:
+               return c;
+       }
+}
+
+#define AO_SCHEME_TOKEN_MAX    32
+
+static char    token_string[AO_SCHEME_TOKEN_MAX];
+static int32_t token_int;
+static int     token_len;
+static float   token_float;
+
+static inline void add_token(int c) {
+       if (c && token_len < AO_SCHEME_TOKEN_MAX - 1)
+               token_string[token_len++] = c;
+}
+
+static inline void del_token(void) {
+       if (token_len > 0)
+               token_len--;
+}
+
+static inline void end_token(void) {
+       token_string[token_len] = '\0';
+}
+
+struct namedfloat {
+       const char      *name;
+       float           value;
+};
+
+static const struct namedfloat namedfloats[] = {
+       { .name = "+inf.0", .value = INFINITY },
+       { .name = "-inf.0", .value = -INFINITY },
+       { .name = "+nan.0", .value = NAN },
+       { .name = "-nan.0", .value = NAN },
+};
+
+#define NUM_NAMED_FLOATS       (sizeof namedfloats / sizeof namedfloats[0])
+
+static int
+_lex(void)
+{
+       int     c;
+
+       token_len = 0;
+       for (;;) {
+               c = lexc();
+               if (lex_class & ENDOFFILE)
+                       return END;
+
+               if (lex_class & WHITE)
+                       continue;
+
+               if (lex_class & COMMENT) {
+                       while ((c = lexc()) != '\n') {
+                               if (lex_class & ENDOFFILE)
+                                       return END;
+                       }
+                       continue;
+               }
+
+               if (lex_class & (SPECIAL|DOTC)) {
+                       add_token(c);
+                       end_token();
+                       switch (c) {
+                       case '(':
+                       case '[':
+                               return OPEN;
+                       case ')':
+                       case ']':
+                               return CLOSE;
+                       case '\'':
+                               return QUOTE;
+                       case '.':
+                               return DOT;
+                       case '`':
+                               return QUASIQUOTE;
+                       case ',':
+                               c = lexc();
+                               if (c == '@') {
+                                       add_token(c);
+                                       end_token();
+                                       return UNQUOTE_SPLICING;
+                               } else {
+                                       lex_unget(c);
+                                       return UNQUOTE;
+                               }
+                       }
+               }
+               if (lex_class & POUND) {
+                       c = lexc();
+                       switch (c) {
+                       case 't':
+                               add_token(c);
+                               end_token();
+                               return BOOL;
+                       case 'f':
+                               add_token(c);
+                               end_token();
+                               return BOOL;
+                       case '\\':
+                               for (;;) {
+                                       int alphabetic;
+                                       c = lexc();
+                                       alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
+                                       if (token_len == 0) {
+                                               add_token(c);
+                                               if (!alphabetic)
+                                                       break;
+                                       } else {
+                                               if (alphabetic)
+                                                       add_token(c);
+                                               else {
+                                                       lex_unget(c);
+                                                       break;
+                                               }
+                                       }
+                               }
+                               end_token();
+                               if (token_len == 1)
+                                       token_int = token_string[0];
+                               else if (!strcmp(token_string, "space"))
+                                       token_int = ' ';
+                               else if (!strcmp(token_string, "newline"))
+                                       token_int = '\n';
+                               else if (!strcmp(token_string, "tab"))
+                                       token_int = '\t';
+                               else if (!strcmp(token_string, "return"))
+                                       token_int = '\r';
+                               else if (!strcmp(token_string, "formfeed"))
+                                       token_int = '\f';
+                               else {
+                                       ao_scheme_error(AO_SCHEME_INVALID, "invalid character token #\\%s", token_string);
+                                       continue;
+                               }
+                               return NUM;
+                       }
+               }
+               if (lex_class & STRINGC) {
+                       for (;;) {
+                               c = lexc();
+                               if (lex_class & BACKSLASH)
+                                       c = lex_quoted();
+                               if (lex_class & (STRINGC|ENDOFFILE)) {
+                                       end_token();
+                                       return STRING;
+                               }
+                               add_token(c);
+                       }
+               }
+               if (lex_class & PRINTABLE) {
+                       int     isfloat;
+                       int     hasdigit;
+                       int     isneg;
+                       int     isint;
+                       int     epos;
+
+                       isfloat = 1;
+                       isint = 1;
+                       hasdigit = 0;
+                       token_int = 0;
+                       isneg = 0;
+                       epos = 0;
+                       for (;;) {
+                               if (!(lex_class & NUMBER)) {
+                                       isint = 0;
+                                       isfloat = 0;
+                               } else {
+                                       if (!(lex_class & INTEGER))
+                                               isint = 0;
+                                       if (token_len != epos &&
+                                           (lex_class & SIGN))
+                                       {
+                                               isint = 0;
+                                               isfloat = 0;
+                                       }
+                                       if (c == '-')
+                                               isneg = 1;
+                                       if (c == '.' && epos != 0)
+                                               isfloat = 0;
+                                       if (c == 'e' || c == 'E') {
+                                               if (token_len == 0)
+                                                       isfloat = 0;
+                                               else
+                                                       epos = token_len + 1;
+                                       }
+                                       if (lex_class & DIGIT) {
+                                               hasdigit = 1;
+                                               if (isint)
+                                                       token_int = token_int * 10 + c - '0';
+                                       }
+                               }
+                               add_token (c);
+                               c = lexc ();
+                               if ((lex_class & (NOTNAME)) && (c != '.' || !isfloat)) {
+                                       unsigned int u;
+//                                     if (lex_class & ENDOFFILE)
+//                                             clearerr (f);
+                                       lex_unget(c);
+                                       end_token ();
+                                       if (isint && hasdigit) {
+                                               if (isneg)
+                                                       token_int = -token_int;
+                                               return NUM;
+                                       }
+                                       if (isfloat && hasdigit) {
+                                               token_float = strtof(token_string, NULL);
+                                               return FLOAT;
+                                       }
+                                       for (u = 0; u < NUM_NAMED_FLOATS; u++)
+                                               if (!strcmp(namedfloats[u].name, token_string)) {
+                                                       token_float = namedfloats[u].value;
+                                                       return FLOAT;
+                                               }
+                                       return NAME;
+                               }
+                       }
+               }
+       }
+}
+
+static inline int lex(void)
+{
+       int     parse_token = _lex();
+       RDBGI("token %d (%s)\n", parse_token, token_string);
+       return parse_token;
+}
+
+static int parse_token;
+
+struct ao_scheme_cons  *ao_scheme_read_cons;
+struct ao_scheme_cons  *ao_scheme_read_cons_tail;
+struct ao_scheme_cons  *ao_scheme_read_stack;
+
+#define READ_IN_QUOTE  0x01
+#define READ_SAW_DOT   0x02
+#define READ_DONE_DOT  0x04
+
+static int
+push_read_stack(int cons, int read_state)
+{
+       RDBGI("push read stack %p 0x%x\n", ao_scheme_read_cons, read_state);
+       RDBG_IN();
+       if (cons) {
+               ao_scheme_read_stack = ao_scheme_cons_cons(ao_scheme_cons_poly(ao_scheme_read_cons),
+                                                      ao_scheme__cons(ao_scheme_int_poly(read_state),
+                                                                    ao_scheme_cons_poly(ao_scheme_read_stack)));
+               if (!ao_scheme_read_stack)
+                       return 0;
+       }
+       ao_scheme_read_cons = NULL;
+       ao_scheme_read_cons_tail = NULL;
+       return 1;
+}
+
+static int
+pop_read_stack(int cons)
+{
+       int     read_state = 0;
+       if (cons) {
+               ao_scheme_read_cons = ao_scheme_poly_cons(ao_scheme_read_stack->car);
+               ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
+               read_state = ao_scheme_poly_int(ao_scheme_read_stack->car);
+               ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
+               for (ao_scheme_read_cons_tail = ao_scheme_read_cons;
+                    ao_scheme_read_cons_tail && ao_scheme_read_cons_tail->cdr;
+                    ao_scheme_read_cons_tail = ao_scheme_poly_cons(ao_scheme_read_cons_tail->cdr))
+                       ;
+       } else {
+               ao_scheme_read_cons = 0;
+               ao_scheme_read_cons_tail = 0;
+               ao_scheme_read_stack = 0;
+       }
+       RDBG_OUT();
+       RDBGI("pop read stack %p %d\n", ao_scheme_read_cons, read_state);
+       return read_state;
+}
+
+ao_poly
+ao_scheme_read(void)
+{
+       struct ao_scheme_atom   *atom;
+       char                    *string;
+       int                     cons;
+       int                     read_state;
+       ao_poly                 v = AO_SCHEME_NIL;
+
+       cons = 0;
+       read_state = 0;
+       ao_scheme_read_cons = ao_scheme_read_cons_tail = ao_scheme_read_stack = 0;
+       for (;;) {
+               parse_token = lex();
+               while (parse_token == OPEN) {
+                       if (!push_read_stack(cons, read_state))
+                               return AO_SCHEME_NIL;
+                       cons++;
+                       read_state = 0;
+                       parse_token = lex();
+               }
+
+               switch (parse_token) {
+               case END:
+               default:
+                       if (cons)
+                               ao_scheme_error(AO_SCHEME_EOF, "unexpected end of file");
+                       return _ao_scheme_atom_eof;
+                       break;
+               case NAME:
+                       atom = ao_scheme_atom_intern(token_string);
+                       if (atom)
+                               v = ao_scheme_atom_poly(atom);
+                       else
+                               v = AO_SCHEME_NIL;
+                       break;
+               case NUM:
+                       v = ao_scheme_integer_poly(token_int);
+                       break;
+               case FLOAT:
+                       v = ao_scheme_float_get(token_float);
+                       break;
+               case BOOL:
+                       if (token_string[0] == 't')
+                               v = _ao_scheme_bool_true;
+                       else
+                               v = _ao_scheme_bool_false;
+                       break;
+               case STRING:
+                       string = ao_scheme_string_copy(token_string);
+                       if (string)
+                               v = ao_scheme_string_poly(string);
+                       else
+                               v = AO_SCHEME_NIL;
+                       break;
+               case QUOTE:
+               case QUASIQUOTE:
+               case UNQUOTE:
+               case UNQUOTE_SPLICING:
+                       if (!push_read_stack(cons, read_state))
+                               return AO_SCHEME_NIL;
+                       cons++;
+                       read_state = READ_IN_QUOTE;
+                       switch (parse_token) {
+                       case QUOTE:
+                               v = _ao_scheme_atom_quote;
+                               break;
+                       case QUASIQUOTE:
+                               v = _ao_scheme_atom_quasiquote;
+                               break;
+                       case UNQUOTE:
+                               v = _ao_scheme_atom_unquote;
+                               break;
+                       case UNQUOTE_SPLICING:
+                               v = _ao_scheme_atom_unquote2dsplicing;
+                               break;
+                       }
+                       break;
+               case CLOSE:
+                       if (!cons) {
+                               v = AO_SCHEME_NIL;
+                               break;
+                       }
+                       v = ao_scheme_cons_poly(ao_scheme_read_cons);
+                       --cons;
+                       read_state = pop_read_stack(cons);
+                       break;
+               case DOT:
+                       if (!cons) {
+                               ao_scheme_error(AO_SCHEME_INVALID, ". outside of cons");
+                               return AO_SCHEME_NIL;
+                       }
+                       if (!ao_scheme_read_cons) {
+                               ao_scheme_error(AO_SCHEME_INVALID, ". first in cons");
+                               return AO_SCHEME_NIL;
+                       }
+                       read_state |= READ_SAW_DOT;
+                       continue;
+               }
+
+               /* loop over QUOTE ends */
+               for (;;) {
+                       if (!cons)
+                               return v;
+
+                       if (read_state & READ_DONE_DOT) {
+                               ao_scheme_error(AO_SCHEME_INVALID, ". not last in cons");
+                               return AO_SCHEME_NIL;
+                       }
+
+                       if (read_state & READ_SAW_DOT) {
+                               read_state |= READ_DONE_DOT;
+                               ao_scheme_read_cons_tail->cdr = v;
+                       } else {
+                               struct ao_scheme_cons   *read = ao_scheme_cons_cons(v, AO_SCHEME_NIL);
+                               if (!read)
+                                       return AO_SCHEME_NIL;
+
+                               if (ao_scheme_read_cons_tail)
+                                       ao_scheme_read_cons_tail->cdr = ao_scheme_cons_poly(read);
+                               else
+                                       ao_scheme_read_cons = read;
+                               ao_scheme_read_cons_tail = read;
+                       }
+
+                       if (!(read_state & READ_IN_QUOTE) || !ao_scheme_read_cons->cdr)
+                               break;
+
+                       v = ao_scheme_cons_poly(ao_scheme_read_cons);
+                       --cons;
+                       read_state = pop_read_stack(cons);
+               }
+       }
+       return v;
+}