X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=src%2Flisp%2Fao_lisp_read.c;h=0ca12a81f6fcc843c24ba2f046970f7e8a83a37c;hb=ed6967cef5d82baacafe1c23229f44d58c838326;hp=3a2ef7f1f15cef109e81589a1adcd6e33132f61a;hpb=417161dbb36323b5a6572859dedad02ca92fc65c;p=fw%2Faltos diff --git a/src/lisp/ao_lisp_read.c b/src/lisp/ao_lisp_read.c index 3a2ef7f1..0ca12a81 100644 --- a/src/lisp/ao_lisp_read.c +++ b/src/lisp/ao_lisp_read.c @@ -14,6 +14,8 @@ #include "ao_lisp.h" #include "ao_lisp_read.h" +#include +#include static const uint16_t lex_classes[128] = { IGNORE, /* ^@ */ @@ -51,18 +53,18 @@ static const uint16_t lex_classes[128] = { PRINTABLE|WHITE, /* */ PRINTABLE, /* ! */ PRINTABLE|STRINGC, /* " */ - PRINTABLE|COMMENT, /* # */ + PRINTABLE|POUND, /* # */ PRINTABLE, /* $ */ PRINTABLE, /* % */ PRINTABLE, /* & */ - PRINTABLE|QUOTEC, /* ' */ - PRINTABLE|BRA, /* ( */ - PRINTABLE|KET, /* ) */ + PRINTABLE|SPECIAL, /* ' */ + PRINTABLE|SPECIAL, /* ( */ + PRINTABLE|SPECIAL, /* ) */ PRINTABLE, /* * */ PRINTABLE|SIGN, /* + */ - PRINTABLE, /* , */ + PRINTABLE|SPECIAL, /* , */ PRINTABLE|SIGN, /* - */ - PRINTABLE, /* . */ + PRINTABLE|DOTC|FLOATC, /* . */ PRINTABLE, /* / */ PRINTABLE|DIGIT, /* 0 */ PRINTABLE|DIGIT, /* 1 */ @@ -85,7 +87,7 @@ static const uint16_t lex_classes[128] = { PRINTABLE, /* B */ PRINTABLE, /* C */ PRINTABLE, /* D */ - PRINTABLE, /* E */ + PRINTABLE|FLOATC, /* E */ PRINTABLE, /* F */ PRINTABLE, /* G */ PRINTABLE, /* H */ @@ -112,12 +114,12 @@ static const uint16_t lex_classes[128] = { PRINTABLE, /* ] */ PRINTABLE, /* ^ */ PRINTABLE, /* _ */ - PRINTABLE, /* ` */ + PRINTABLE|SPECIAL, /* ` */ PRINTABLE, /* a */ PRINTABLE, /* b */ PRINTABLE, /* c */ PRINTABLE, /* d */ - PRINTABLE, /* e */ + PRINTABLE|FLOATC, /* e */ PRINTABLE, /* f */ PRINTABLE, /* g */ PRINTABLE, /* h */ @@ -140,9 +142,9 @@ static const uint16_t lex_classes[128] = { PRINTABLE, /* y */ PRINTABLE, /* z */ PRINTABLE, /* { */ - PRINTABLE|VBAR, /* | */ + PRINTABLE, /* | */ PRINTABLE, /* } */ - PRINTABLE|TWIDDLE, /* ~ */ + PRINTABLE, /* ~ */ IGNORE, /* ^? */ }; @@ -168,16 +170,38 @@ lex_unget(int c) lex_unget_c = c; } +static uint16_t lex_class; + +static int +lexc(void) +{ + int c; + do { + c = lex_get(); + if (c == EOF) { + c = 0; + lex_class = ENDOFFILE; + } else { + c &= 0x7f; + lex_class = lex_classes[c]; + } + } while (lex_class & IGNORE); + return c; +} + static int -lex_quoted (void) +lex_quoted(void) { int c; int v; int count; c = lex_get(); - if (c == EOF) - return EOF; + if (c == EOF) { + lex_class = ENDOFFILE; + return 0; + } + lex_class = 0; c &= 0x7f; switch (c) { case 'n': @@ -220,49 +244,43 @@ lex_quoted (void) } } -static uint16_t lex_class; - -static int -lexc(void) -{ - int c; - do { - c = lex_get(); - if (c == EOF) { - lex_class = ENDOFFILE; - c = 0; - } else { - c &= 0x7f; - lex_class = lex_classes[c]; - if (lex_class & BACKSLASH) { - c = lex_quoted(); - if (c == EOF) - lex_class = ENDOFFILE; - else - lex_class = PRINTABLE; - } - } - } while (lex_class & IGNORE); - return c; -} - #define AO_LISP_TOKEN_MAX 32 static char token_string[AO_LISP_TOKEN_MAX]; -static int token_int; +static int32_t token_int; static int token_len; +static float token_float; static inline void add_token(int c) { if (c && token_len < AO_LISP_TOKEN_MAX - 1) token_string[token_len++] = c; } +static inline void del_token(void) { + if (token_len > 0) + token_len--; +} + static inline void end_token(void) { token_string[token_len] = '\0'; } +struct namedfloat { + const char *name; + float value; +}; + +static const struct namedfloat namedfloats[] = { + { .name = "+inf.0", .value = INFINITY }, + { .name = "-inf.0", .value = -INFINITY }, + { .name = "+nan.0", .value = NAN }, + { .name = "-nan.0", .value = NAN }, +}; + +#define NUM_NAMED_FLOATS (sizeof namedfloats / sizeof namedfloats[0]) + static int -lex(void) +_lex(void) { int c; @@ -270,7 +288,7 @@ lex(void) for (;;) { c = lexc(); if (lex_class & ENDOFFILE) - return AO_LISP_NIL; + return END; if (lex_class & WHITE) continue; @@ -278,30 +296,93 @@ lex(void) if (lex_class & COMMENT) { while ((c = lexc()) != '\n') { if (lex_class & ENDOFFILE) - return AO_LISP_NIL; + return END; } continue; } - if (lex_class & (BRA|KET|QUOTEC)) { + if (lex_class & (SPECIAL|DOTC)) { add_token(c); end_token(); switch (c) { case '(': + case '[': return OPEN; case ')': + case ']': return CLOSE; case '\'': return QUOTE; + case '.': + return DOT; + case '`': + return QUASIQUOTE; + case ',': + c = lexc(); + if (c == '@') { + add_token(c); + end_token(); + return UNQUOTE_SPLICING; + } else { + lex_unget(c); + return UNQUOTE; + } } } - if (lex_class & TWIDDLE) { - token_int = lexc(); - return NUM; + if (lex_class & POUND) { + c = lexc(); + switch (c) { + case 't': + add_token(c); + end_token(); + return BOOL; + case 'f': + add_token(c); + end_token(); + return BOOL; + case '\\': + for (;;) { + int alphabetic; + c = lexc(); + alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')); + if (token_len == 0) { + add_token(c); + if (!alphabetic) + break; + } else { + if (alphabetic) + add_token(c); + else { + lex_unget(c); + break; + } + } + } + end_token(); + if (token_len == 1) + token_int = token_string[0]; + else if (!strcmp(token_string, "space")) + token_int = ' '; + else if (!strcmp(token_string, "newline")) + token_int = '\n'; + else if (!strcmp(token_string, "tab")) + token_int = '\t'; + else if (!strcmp(token_string, "return")) + token_int = '\r'; + else if (!strcmp(token_string, "formfeed")) + token_int = '\f'; + else { + ao_lisp_error(AO_LISP_INVALID, "invalid character token #\\%s", token_string); + continue; + } + return NUM; + } } if (lex_class & STRINGC) { for (;;) { c = lexc(); + if (lex_class & BACKSLASH) + c = lex_quoted(); if (lex_class & (STRINGC|ENDOFFILE)) { end_token(); return STRING; @@ -310,91 +391,131 @@ lex(void) } } if (lex_class & PRINTABLE) { - int isnum; + int isfloat; int hasdigit; int isneg; + int isint; + int epos; - isnum = 1; + isfloat = 1; + isint = 1; hasdigit = 0; token_int = 0; isneg = 0; + epos = 0; for (;;) { if (!(lex_class & NUMBER)) { - isnum = 0; + isint = 0; + isfloat = 0; } else { - if (token_len != 0 && + if (!(lex_class & INTEGER)) + isint = 0; + if (token_len != epos && (lex_class & SIGN)) { - isnum = 0; + isint = 0; + isfloat = 0; } if (c == '-') isneg = 1; + if (c == '.' && epos != 0) + isfloat = 0; + if (c == 'e' || c == 'E') { + if (token_len == 0) + isfloat = 0; + else + epos = token_len + 1; + } if (lex_class & DIGIT) { hasdigit = 1; - if (isnum) + if (isint) token_int = token_int * 10 + c - '0'; } } add_token (c); c = lexc (); - if (lex_class & (NOTNAME)) { + if ((lex_class & (NOTNAME)) && (c != '.' || !isfloat)) { + unsigned int u; // if (lex_class & ENDOFFILE) // clearerr (f); lex_unget(c); end_token (); - if (isnum && hasdigit) { + if (isint && hasdigit) { if (isneg) token_int = -token_int; return NUM; } + if (isfloat && hasdigit) { + token_float = strtof(token_string, NULL); + return FLOAT; + } + for (u = 0; u < NUM_NAMED_FLOATS; u++) + if (!strcmp(namedfloats[u].name, token_string)) { + token_float = namedfloats[u].value; + return FLOAT; + } return NAME; } } - } } } +static inline int lex(void) +{ + int parse_token = _lex(); + RDBGI("token %d (%s)\n", parse_token, token_string); + return parse_token; +} + static int parse_token; -static uint8_t been_here; -static struct ao_lisp_cons *read_cons; -static struct ao_lisp_cons *read_cons_tail; -static struct ao_lisp_cons *read_stack; + +struct ao_lisp_cons *ao_lisp_read_cons; +struct ao_lisp_cons *ao_lisp_read_cons_tail; +struct ao_lisp_cons *ao_lisp_read_stack; + +#define READ_IN_QUOTE 0x01 +#define READ_SAW_DOT 0x02 +#define READ_DONE_DOT 0x04 static int -push_read_stack(int cons, int in_quote) +push_read_stack(int cons, int read_state) { + RDBGI("push read stack %p 0x%x\n", ao_lisp_read_cons, read_state); + RDBG_IN(); if (cons) { - read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(read_cons), - ao_lisp_cons_cons(ao_lisp_int_poly(in_quote), - read_stack)); - if (!read_stack) + ao_lisp_read_stack = ao_lisp_cons_cons(ao_lisp_cons_poly(ao_lisp_read_cons), + ao_lisp__cons(ao_lisp_int_poly(read_state), + ao_lisp_cons_poly(ao_lisp_read_stack))); + if (!ao_lisp_read_stack) return 0; } - read_cons = NULL; - read_cons_tail = NULL; + ao_lisp_read_cons = NULL; + ao_lisp_read_cons_tail = NULL; return 1; } static int pop_read_stack(int cons) { - int in_quote = 0; + int read_state = 0; if (cons) { - read_cons = ao_lisp_poly_cons(read_stack->car); - read_stack = ao_lisp_poly_cons(read_stack->cdr); - in_quote = ao_lisp_poly_int(read_stack->car); - read_stack = ao_lisp_poly_cons(read_stack->cdr); - for (read_cons_tail = read_cons; - read_cons_tail && read_cons_tail->cdr; - read_cons_tail = ao_lisp_poly_cons(read_cons_tail->cdr)) + ao_lisp_read_cons = ao_lisp_poly_cons(ao_lisp_read_stack->car); + ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr); + read_state = ao_lisp_poly_int(ao_lisp_read_stack->car); + ao_lisp_read_stack = ao_lisp_poly_cons(ao_lisp_read_stack->cdr); + for (ao_lisp_read_cons_tail = ao_lisp_read_cons; + ao_lisp_read_cons_tail && ao_lisp_read_cons_tail->cdr; + ao_lisp_read_cons_tail = ao_lisp_poly_cons(ao_lisp_read_cons_tail->cdr)) ; } else { - read_cons = 0; - read_cons_tail = 0; - read_stack = 0; + ao_lisp_read_cons = 0; + ao_lisp_read_cons_tail = 0; + ao_lisp_read_stack = 0; } - return in_quote; + RDBG_OUT(); + RDBGI("pop read stack %p %d\n", ao_lisp_read_cons, read_state); + return read_state; } ao_poly @@ -403,33 +524,28 @@ ao_lisp_read(void) struct ao_lisp_atom *atom; char *string; int cons; - int in_quote; - ao_poly v; - - if (!been_here) { - ao_lisp_root_add(&ao_lisp_cons_type, &read_cons); - ao_lisp_root_add(&ao_lisp_cons_type, &read_cons_tail); - ao_lisp_root_add(&ao_lisp_cons_type, &read_stack); - been_here = 1; - } - parse_token = lex(); + int read_state; + ao_poly v = AO_LISP_NIL; cons = 0; - in_quote = 0; - read_cons = read_cons_tail = read_stack = 0; + read_state = 0; + ao_lisp_read_cons = ao_lisp_read_cons_tail = ao_lisp_read_stack = 0; for (;;) { + parse_token = lex(); while (parse_token == OPEN) { - if (!push_read_stack(cons, in_quote)) + if (!push_read_stack(cons, read_state)) return AO_LISP_NIL; cons++; - in_quote = 0; + read_state = 0; parse_token = lex(); } switch (parse_token) { - case ENDOFFILE: + case END: default: - v = AO_LISP_NIL; + if (cons) + ao_lisp_error(AO_LISP_EOF, "unexpected end of file"); + return _ao_lisp_atom_eof; break; case NAME: atom = ao_lisp_atom_intern(token_string); @@ -439,7 +555,16 @@ ao_lisp_read(void) v = AO_LISP_NIL; break; case NUM: - v = ao_lisp_int_poly(token_int); + v = ao_lisp_integer_poly(token_int); + break; + case FLOAT: + v = ao_lisp_float_get(token_float); + break; + case BOOL: + if (token_string[0] == 't') + v = _ao_lisp_bool_true; + else + v = _ao_lisp_bool_false; break; case STRING: string = ao_lisp_string_copy(token_string); @@ -449,21 +574,48 @@ ao_lisp_read(void) v = AO_LISP_NIL; break; case QUOTE: - if (!push_read_stack(cons, in_quote)) + case QUASIQUOTE: + case UNQUOTE: + case UNQUOTE_SPLICING: + if (!push_read_stack(cons, read_state)) return AO_LISP_NIL; cons++; - in_quote = 1; - v = _ao_lisp_atom_quote; + read_state = READ_IN_QUOTE; + switch (parse_token) { + case QUOTE: + v = _ao_lisp_atom_quote; + break; + case QUASIQUOTE: + v = _ao_lisp_atom_quasiquote; + break; + case UNQUOTE: + v = _ao_lisp_atom_unquote; + break; + case UNQUOTE_SPLICING: + v = _ao_lisp_atom_unquote2dsplicing; + break; + } break; case CLOSE: if (!cons) { v = AO_LISP_NIL; break; } - v = ao_lisp_cons_poly(read_cons); + v = ao_lisp_cons_poly(ao_lisp_read_cons); --cons; - in_quote = pop_read_stack(cons); + read_state = pop_read_stack(cons); break; + case DOT: + if (!cons) { + ao_lisp_error(AO_LISP_INVALID, ". outside of cons"); + return AO_LISP_NIL; + } + if (!ao_lisp_read_cons) { + ao_lisp_error(AO_LISP_INVALID, ". first in cons"); + return AO_LISP_NIL; + } + read_state |= READ_SAW_DOT; + continue; } /* loop over QUOTE ends */ @@ -471,25 +623,33 @@ ao_lisp_read(void) if (!cons) return v; - struct ao_lisp_cons *read = ao_lisp_cons_cons(v, NULL); - if (!read) + if (read_state & READ_DONE_DOT) { + ao_lisp_error(AO_LISP_INVALID, ". not last in cons"); return AO_LISP_NIL; + } - if (read_cons_tail) - read_cons_tail->cdr = ao_lisp_cons_poly(read); - else - read_cons = read; - read_cons_tail = read; + if (read_state & READ_SAW_DOT) { + read_state |= READ_DONE_DOT; + ao_lisp_read_cons_tail->cdr = v; + } else { + struct ao_lisp_cons *read = ao_lisp_cons_cons(v, AO_LISP_NIL); + if (!read) + return AO_LISP_NIL; - if (!in_quote || !read_cons->cdr) + if (ao_lisp_read_cons_tail) + ao_lisp_read_cons_tail->cdr = ao_lisp_cons_poly(read); + else + ao_lisp_read_cons = read; + ao_lisp_read_cons_tail = read; + } + + if (!(read_state & READ_IN_QUOTE) || !ao_lisp_read_cons->cdr) break; - v = ao_lisp_cons_poly(read_cons); + v = ao_lisp_cons_poly(ao_lisp_read_cons); --cons; - in_quote = pop_read_stack(cons); + read_state = pop_read_stack(cons); } - - parse_token = lex(); } return v; }