altos/lisp: Split out read debug, add memory validation
[fw/altos] / src / lisp / ao_lisp_read.c
index 7a5751cef0315a7ec4662b3a49ab6ab0490a6592..0ca12a81f6fcc843c24ba2f046970f7e8a83a37c 100644 (file)
  * General Public License for more details.
  */
 
-#define DBG_EVAL 0
 #include "ao_lisp.h"
 #include "ao_lisp_read.h"
+#include <math.h>
+#include <stdlib.h>
 
 static const uint16_t  lex_classes[128] = {
        IGNORE,         /* ^@ */
@@ -52,18 +53,18 @@ static const uint16_t       lex_classes[128] = {
        PRINTABLE|WHITE,        /*    */
        PRINTABLE,              /* ! */
        PRINTABLE|STRINGC,      /* " */
-       PRINTABLE|COMMENT,      /* # */
+       PRINTABLE|POUND,        /* # */
        PRINTABLE,              /* $ */
        PRINTABLE,              /* % */
        PRINTABLE,              /* & */
-       PRINTABLE|QUOTEC,       /* ' */
-       PRINTABLE|BRA,          /* ( */
-       PRINTABLE|KET,          /* ) */
+       PRINTABLE|SPECIAL,      /* ' */
+       PRINTABLE|SPECIAL,      /* ( */
+       PRINTABLE|SPECIAL,      /* ) */
        PRINTABLE,              /* * */
        PRINTABLE|SIGN,         /* + */
-       PRINTABLE,              /* , */
+       PRINTABLE|SPECIAL,      /* , */
        PRINTABLE|SIGN,         /* - */
-       PRINTABLE,              /* . */
+       PRINTABLE|DOTC|FLOATC,  /* . */
        PRINTABLE,              /* / */
        PRINTABLE|DIGIT,        /* 0 */
        PRINTABLE|DIGIT,        /* 1 */
@@ -86,7 +87,7 @@ static const uint16_t lex_classes[128] = {
        PRINTABLE,              /*  B */
        PRINTABLE,              /*  C */
        PRINTABLE,              /*  D */
-       PRINTABLE,              /*  E */
+       PRINTABLE|FLOATC,       /*  E */
        PRINTABLE,              /*  F */
        PRINTABLE,              /*  G */
        PRINTABLE,              /*  H */
@@ -113,12 +114,12 @@ static const uint16_t     lex_classes[128] = {
        PRINTABLE,              /*  ] */
        PRINTABLE,              /*  ^ */
        PRINTABLE,              /*  _ */
-       PRINTABLE,              /*  ` */
+       PRINTABLE|SPECIAL,      /*  ` */
        PRINTABLE,              /*  a */
        PRINTABLE,              /*  b */
        PRINTABLE,              /*  c */
        PRINTABLE,              /*  d */
-       PRINTABLE,              /*  e */
+       PRINTABLE|FLOATC,       /*  e */
        PRINTABLE,              /*  f */
        PRINTABLE,              /*  g */
        PRINTABLE,              /*  h */
@@ -141,9 +142,9 @@ static const uint16_t       lex_classes[128] = {
        PRINTABLE,              /*  y */
        PRINTABLE,              /*  z */
        PRINTABLE,              /*  { */
-       PRINTABLE|VBAR,         /*  | */
+       PRINTABLE,              /*  | */
        PRINTABLE,              /*  } */
-       PRINTABLE|TWIDDLE,      /*  ~ */
+       PRINTABLE,              /*  ~ */
        IGNORE,                 /*  ^? */
 };
 
@@ -169,16 +170,38 @@ lex_unget(int c)
                lex_unget_c = c;
 }
 
+static uint16_t        lex_class;
+
 static int
-lex_quoted (void)
+lexc(void)
+{
+       int     c;
+       do {
+               c = lex_get();
+               if (c == EOF) {
+                       c = 0;
+                       lex_class = ENDOFFILE;
+               } else {
+                       c &= 0x7f;
+                       lex_class = lex_classes[c];
+               }
+       } while (lex_class & IGNORE);
+       return c;
+}
+
+static int
+lex_quoted(void)
 {
        int     c;
        int     v;
        int     count;
 
        c = lex_get();
-       if (c == EOF)
-               return EOF;
+       if (c == EOF) {
+               lex_class = ENDOFFILE;
+               return 0;
+       }
+       lex_class = 0;
        c &= 0x7f;
        switch (c) {
        case 'n':
@@ -221,49 +244,43 @@ lex_quoted (void)
        }
 }
 
-static uint16_t        lex_class;
-
-static int
-lexc(void)
-{
-       int     c;
-       do {
-               c = lex_get();
-               if (c == EOF) {
-                       lex_class = ENDOFFILE;
-                       c = 0;
-               } else {
-                       c &= 0x7f;
-                       lex_class = lex_classes[c];
-                       if (lex_class & BACKSLASH) {
-                               c = lex_quoted();
-                               if (c == EOF)
-                                       lex_class = ENDOFFILE;
-                               else
-                                       lex_class = PRINTABLE;
-                       }
-               }
-       } while (lex_class & IGNORE);
-       return c;
-}
-
 #define AO_LISP_TOKEN_MAX      32
 
 static char    token_string[AO_LISP_TOKEN_MAX];
-static int     token_int;
+static int32_t token_int;
 static int     token_len;
+static float   token_float;
 
 static inline void add_token(int c) {
        if (c && token_len < AO_LISP_TOKEN_MAX - 1)
                token_string[token_len++] = c;
 }
 
+static inline void del_token(void) {
+       if (token_len > 0)
+               token_len--;
+}
+
 static inline void end_token(void) {
        token_string[token_len] = '\0';
 }
 
+struct namedfloat {
+       const char      *name;
+       float           value;
+};
+
+static const struct namedfloat namedfloats[] = {
+       { .name = "+inf.0", .value = INFINITY },
+       { .name = "-inf.0", .value = -INFINITY },
+       { .name = "+nan.0", .value = NAN },
+       { .name = "-nan.0", .value = NAN },
+};
+
+#define NUM_NAMED_FLOATS       (sizeof namedfloats / sizeof namedfloats[0])
+
 static int
-lex(void)
+_lex(void)
 {
        int     c;
 
@@ -284,25 +301,88 @@ lex(void)
                        continue;
                }
 
-               if (lex_class & (BRA|KET|QUOTEC)) {
+               if (lex_class & (SPECIAL|DOTC)) {
                        add_token(c);
                        end_token();
                        switch (c) {
                        case '(':
+                       case '[':
                                return OPEN;
                        case ')':
+                       case ']':
                                return CLOSE;
                        case '\'':
                                return QUOTE;
+                       case '.':
+                               return DOT;
+                       case '`':
+                               return QUASIQUOTE;
+                       case ',':
+                               c = lexc();
+                               if (c == '@') {
+                                       add_token(c);
+                                       end_token();
+                                       return UNQUOTE_SPLICING;
+                               } else {
+                                       lex_unget(c);
+                                       return UNQUOTE;
+                               }
                        }
                }
-               if (lex_class & TWIDDLE) {
-                       token_int = lexc();
-                       return NUM;
+               if (lex_class & POUND) {
+                       c = lexc();
+                       switch (c) {
+                       case 't':
+                               add_token(c);
+                               end_token();
+                               return BOOL;
+                       case 'f':
+                               add_token(c);
+                               end_token();
+                               return BOOL;
+                       case '\\':
+                               for (;;) {
+                                       int alphabetic;
+                                       c = lexc();
+                                       alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
+                                       if (token_len == 0) {
+                                               add_token(c);
+                                               if (!alphabetic)
+                                                       break;
+                                       } else {
+                                               if (alphabetic)
+                                                       add_token(c);
+                                               else {
+                                                       lex_unget(c);
+                                                       break;
+                                               }
+                                       }
+                               }
+                               end_token();
+                               if (token_len == 1)
+                                       token_int = token_string[0];
+                               else if (!strcmp(token_string, "space"))
+                                       token_int = ' ';
+                               else if (!strcmp(token_string, "newline"))
+                                       token_int = '\n';
+                               else if (!strcmp(token_string, "tab"))
+                                       token_int = '\t';
+                               else if (!strcmp(token_string, "return"))
+                                       token_int = '\r';
+                               else if (!strcmp(token_string, "formfeed"))
+                                       token_int = '\f';
+                               else {
+                                       ao_lisp_error(AO_LISP_INVALID, "invalid character token #\\%s", token_string);
+                                       continue;
+                               }
+                               return NUM;
+                       }
                }
                if (lex_class & STRINGC) {
                        for (;;) {
                                c = lexc();
+                               if (lex_class & BACKSLASH)
+                                       c = lex_quoted();
                                if (lex_class & (STRINGC|ENDOFFILE)) {
                                        end_token();
                                        return STRING;
@@ -311,95 +391,131 @@ lex(void)
                        }
                }
                if (lex_class & PRINTABLE) {
-                       int     isnum;
+                       int     isfloat;
                        int     hasdigit;
                        int     isneg;
+                       int     isint;
+                       int     epos;
 
-                       isnum = 1;
+                       isfloat = 1;
+                       isint = 1;
                        hasdigit = 0;
                        token_int = 0;
                        isneg = 0;
+                       epos = 0;
                        for (;;) {
                                if (!(lex_class & NUMBER)) {
-                                       isnum = 0;
+                                       isint = 0;
+                                       isfloat = 0;
                                } else {
-                                       if (token_len != 0 &&
+                                       if (!(lex_class & INTEGER))
+                                               isint = 0;
+                                       if (token_len != epos &&
                                            (lex_class & SIGN))
                                        {
-                                               isnum = 0;
+                                               isint = 0;
+                                               isfloat = 0;
                                        }
                                        if (c == '-')
                                                isneg = 1;
+                                       if (c == '.' && epos != 0)
+                                               isfloat = 0;
+                                       if (c == 'e' || c == 'E') {
+                                               if (token_len == 0)
+                                                       isfloat = 0;
+                                               else
+                                                       epos = token_len + 1;
+                                       }
                                        if (lex_class & DIGIT) {
                                                hasdigit = 1;
-                                               if (isnum)
+                                               if (isint)
                                                        token_int = token_int * 10 + c - '0';
                                        }
                                }
                                add_token (c);
                                c = lexc ();
-                               if (lex_class & (NOTNAME)) {
+                               if ((lex_class & (NOTNAME)) && (c != '.' || !isfloat)) {
+                                       unsigned int u;
 //                                     if (lex_class & ENDOFFILE)
 //                                             clearerr (f);
                                        lex_unget(c);
                                        end_token ();
-                                       if (isnum && hasdigit) {
+                                       if (isint && hasdigit) {
                                                if (isneg)
                                                        token_int = -token_int;
                                                return NUM;
                                        }
+                                       if (isfloat && hasdigit) {
+                                               token_float = strtof(token_string, NULL);
+                                               return FLOAT;
+                                       }
+                                       for (u = 0; u < NUM_NAMED_FLOATS; u++)
+                                               if (!strcmp(namedfloats[u].name, token_string)) {
+                                                       token_float = namedfloats[u].value;
+                                                       return FLOAT;
+                                               }
                                        return NAME;
                                }
                        }
-
                }
        }
 }
 
+static inline int lex(void)
+{
+       int     parse_token = _lex();
+       RDBGI("token %d (%s)\n", parse_token, token_string);
+       return parse_token;
+}
+
 static int parse_token;
-static uint8_t                 been_here;
-static struct ao_lisp_cons     *read_cons;
-static struct ao_lisp_cons     *read_cons_tail;
-static struct ao_lisp_cons     *read_stack;
+
+struct ao_lisp_cons    *ao_lisp_read_cons;
+struct ao_lisp_cons    *ao_lisp_read_cons_tail;
+struct ao_lisp_cons    *ao_lisp_read_stack;
+
+#define READ_IN_QUOTE  0x01
+#define READ_SAW_DOT   0x02
+#define READ_DONE_DOT  0x04
 
 static int
-push_read_stack(int cons, int in_quote)
+push_read_stack(int cons, int read_state)
 {
-       DBGI("push read stack %p %d\n", read_cons, in_quote);
-       DBG_IN();
+       RDBGI("push read stack %p 0x%x\n", ao_lisp_read_cons, read_state);
+       RDBG_IN();
        if (cons) {
-               read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(read_cons),
-                                              ao_lisp_cons_cons(ao_lisp_int_poly(in_quote),
-                                                                read_stack));
-               if (!read_stack)
+               ao_lisp_read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(ao_lisp_read_cons),
+                                                      ao_lisp__cons(ao_lisp_int_poly(read_state),
+                                                                    ao_lisp_cons_poly(ao_lisp_read_stack)));
+               if (!ao_lisp_read_stack)
                        return 0;
        }
-       read_cons = NULL;
-       read_cons_tail = NULL;
+       ao_lisp_read_cons = NULL;
+       ao_lisp_read_cons_tail = NULL;
        return 1;
 }
 
 static int
 pop_read_stack(int cons)
 {
-       int     in_quote = 0;
+       int     read_state = 0;
        if (cons) {
-               read_cons = ao_lisp_poly_cons(read_stack->car);
-               read_stack = ao_lisp_poly_cons(read_stack->cdr);
-               in_quote = ao_lisp_poly_int(read_stack->car);
-               read_stack = ao_lisp_poly_cons(read_stack->cdr);
-               for (read_cons_tail = read_cons;
-                    read_cons_tail && read_cons_tail->cdr;
-                    read_cons_tail = ao_lisp_poly_cons(read_cons_tail->cdr))
+               ao_lisp_read_cons = ao_lisp_poly_cons(ao_lisp_read_stack->car);
+               ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
+               read_state = ao_lisp_poly_int(ao_lisp_read_stack->car);
+               ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr);
+               for (ao_lisp_read_cons_tail = ao_lisp_read_cons;
+                    ao_lisp_read_cons_tail && ao_lisp_read_cons_tail->cdr;
+                    ao_lisp_read_cons_tail = ao_lisp_poly_cons(ao_lisp_read_cons_tail->cdr))
                        ;
        } else {
-               read_cons = 0;
-               read_cons_tail = 0;
-               read_stack = 0;
+               ao_lisp_read_cons = 0;
+               ao_lisp_read_cons_tail = 0;
+               ao_lisp_read_stack = 0;
        }
-       DBG_OUT();
-       DBGI("pop read stack %p %d\n", read_cons, in_quote);
-       return in_quote;
+       RDBG_OUT();
+       RDBGI("pop read stack %p %d\n", ao_lisp_read_cons, read_state);
+       return read_state;
 }
 
 ao_poly
@@ -408,29 +524,20 @@ ao_lisp_read(void)
        struct ao_lisp_atom     *atom;
        char                    *string;
        int                     cons;
-       int                     in_quote;
-       ao_poly                 v;
-
-       if (!been_here) {
-               ao_lisp_root_add(&ao_lisp_cons_type, &read_cons);
-               ao_lisp_root_add(&ao_lisp_cons_type, &read_cons_tail);
-               ao_lisp_root_add(&ao_lisp_cons_type, &read_stack);
-               been_here = 1;
-       }
-       parse_token = lex();
-       DBGI("token %d (%s)\n", parse_token, token_string);
+       int                     read_state;
+       ao_poly                 v = AO_LISP_NIL;
 
        cons = 0;
-       in_quote = 0;
-       read_cons = read_cons_tail = read_stack = 0;
+       read_state = 0;
+       ao_lisp_read_cons = ao_lisp_read_cons_tail = ao_lisp_read_stack = 0;
        for (;;) {
+               parse_token = lex();
                while (parse_token == OPEN) {
-                       if (!push_read_stack(cons, in_quote))
+                       if (!push_read_stack(cons, read_state))
                                return AO_LISP_NIL;
                        cons++;
-                       in_quote = 0;
+                       read_state = 0;
                        parse_token = lex();
-                       DBGI("token %d (%s)\n", parse_token, token_string);
                }
 
                switch (parse_token) {
@@ -448,7 +555,16 @@ ao_lisp_read(void)
                                v = AO_LISP_NIL;
                        break;
                case NUM:
-                       v = ao_lisp_int_poly(token_int);
+                       v = ao_lisp_integer_poly(token_int);
+                       break;
+               case FLOAT:
+                       v = ao_lisp_float_get(token_float);
+                       break;
+               case BOOL:
+                       if (token_string[0] == 't')
+                               v = _ao_lisp_bool_true;
+                       else
+                               v = _ao_lisp_bool_false;
                        break;
                case STRING:
                        string = ao_lisp_string_copy(token_string);
@@ -458,21 +574,48 @@ ao_lisp_read(void)
                                v = AO_LISP_NIL;
                        break;
                case QUOTE:
-                       if (!push_read_stack(cons, in_quote))
+               case QUASIQUOTE:
+               case UNQUOTE:
+               case UNQUOTE_SPLICING:
+                       if (!push_read_stack(cons, read_state))
                                return AO_LISP_NIL;
                        cons++;
-                       in_quote = 1;
-                       v = _ao_lisp_atom_quote;
+                       read_state = READ_IN_QUOTE;
+                       switch (parse_token) {
+                       case QUOTE:
+                               v = _ao_lisp_atom_quote;
+                               break;
+                       case QUASIQUOTE:
+                               v = _ao_lisp_atom_quasiquote;
+                               break;
+                       case UNQUOTE:
+                               v = _ao_lisp_atom_unquote;
+                               break;
+                       case UNQUOTE_SPLICING:
+                               v = _ao_lisp_atom_unquote2dsplicing;
+                               break;
+                       }
                        break;
                case CLOSE:
                        if (!cons) {
                                v = AO_LISP_NIL;
                                break;
                        }
-                       v = ao_lisp_cons_poly(read_cons);
+                       v = ao_lisp_cons_poly(ao_lisp_read_cons);
                        --cons;
-                       in_quote = pop_read_stack(cons);
+                       read_state = pop_read_stack(cons);
                        break;
+               case DOT:
+                       if (!cons) {
+                               ao_lisp_error(AO_LISP_INVALID, ". outside of cons");
+                               return AO_LISP_NIL;
+                       }
+                       if (!ao_lisp_read_cons) {
+                               ao_lisp_error(AO_LISP_INVALID, ". first in cons");
+                               return AO_LISP_NIL;
+                       }
+                       read_state |= READ_SAW_DOT;
+                       continue;
                }
 
                /* loop over QUOTE ends */
@@ -480,26 +623,33 @@ ao_lisp_read(void)
                        if (!cons)
                                return v;
 
-                       struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, NULL);
-                       if (!read)
+                       if (read_state & READ_DONE_DOT) {
+                               ao_lisp_error(AO_LISP_INVALID, ". not last in cons");
                                return AO_LISP_NIL;
+                       }
 
-                       if (read_cons_tail)
-                               read_cons_tail->cdr = ao_lisp_cons_poly(read);
-                       else
-                               read_cons = read;
-                       read_cons_tail = read;
+                       if (read_state & READ_SAW_DOT) {
+                               read_state |= READ_DONE_DOT;
+                               ao_lisp_read_cons_tail->cdr = v;
+                       } else {
+                               struct ao_lisp_cons     *read = ao_lisp_cons_cons(v, AO_LISP_NIL);
+                               if (!read)
+                                       return AO_LISP_NIL;
 
-                       if (!in_quote || !read_cons->cdr)
+                               if (ao_lisp_read_cons_tail)
+                                       ao_lisp_read_cons_tail->cdr = ao_lisp_cons_poly(read);
+                               else
+                                       ao_lisp_read_cons = read;
+                               ao_lisp_read_cons_tail = read;
+                       }
+
+                       if (!(read_state & READ_IN_QUOTE) || !ao_lisp_read_cons->cdr)
                                break;
 
-                       v = ao_lisp_cons_poly(read_cons);
+                       v = ao_lisp_cons_poly(ao_lisp_read_cons);
                        --cons;
-                       in_quote = pop_read_stack(cons);
+                       read_state = pop_read_stack(cons);
                }
-
-               parse_token = lex();
-               DBGI("token %d (%s)\n", parse_token, token_string);
        }
        return v;
 }