X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=src%2Fscheme%2Fao_scheme_read.c;fp=src%2Fscheme%2Fao_scheme_read.c;h=6b1e9d66e5e538ba7b16df42734750faaa2e8e6e;hb=195cbeec19a6a44f309a9040d727d37fe4e2ec97;hp=0000000000000000000000000000000000000000;hpb=9dbc686ad7d3289dc0f9bcf4a973f71100e02ded;p=fw%2Faltos diff --git a/src/scheme/ao_scheme_read.c b/src/scheme/ao_scheme_read.c new file mode 100644 index 00000000..6b1e9d66 --- /dev/null +++ b/src/scheme/ao_scheme_read.c @@ -0,0 +1,655 @@ +/* + * Copyright © 2016 Keith Packard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include "ao_scheme.h" +#include "ao_scheme_read.h" +#include +#include + +static const uint16_t lex_classes[128] = { + IGNORE, /* ^@ */ + IGNORE, /* ^A */ + IGNORE, /* ^B */ + IGNORE, /* ^C */ + IGNORE, /* ^D */ + IGNORE, /* ^E */ + IGNORE, /* ^F */ + IGNORE, /* ^G */ + IGNORE, /* ^H */ + WHITE, /* ^I */ + WHITE, /* ^J */ + WHITE, /* ^K */ + WHITE, /* ^L */ + WHITE, /* ^M */ + IGNORE, /* ^N */ + IGNORE, /* ^O */ + IGNORE, /* ^P */ + IGNORE, /* ^Q */ + IGNORE, /* ^R */ + IGNORE, /* ^S */ + IGNORE, /* ^T */ + IGNORE, /* ^U */ + IGNORE, /* ^V */ + IGNORE, /* ^W */ + IGNORE, /* ^X */ + IGNORE, /* ^Y */ + IGNORE, /* ^Z */ + IGNORE, /* ^[ */ + IGNORE, /* ^\ */ + IGNORE, /* ^] */ + IGNORE, /* ^^ */ + IGNORE, /* ^_ */ + PRINTABLE|WHITE, /* */ + PRINTABLE, /* ! */ + PRINTABLE|STRINGC, /* " */ + PRINTABLE|POUND, /* # */ + PRINTABLE, /* $ */ + PRINTABLE, /* % */ + PRINTABLE, /* & */ + PRINTABLE|SPECIAL, /* ' */ + PRINTABLE|SPECIAL, /* ( */ + PRINTABLE|SPECIAL, /* ) */ + PRINTABLE, /* * */ + PRINTABLE|SIGN, /* + */ + PRINTABLE|SPECIAL, /* , */ + PRINTABLE|SIGN, /* - */ + PRINTABLE|DOTC|FLOATC, /* . */ + PRINTABLE, /* / */ + PRINTABLE|DIGIT, /* 0 */ + PRINTABLE|DIGIT, /* 1 */ + PRINTABLE|DIGIT, /* 2 */ + PRINTABLE|DIGIT, /* 3 */ + PRINTABLE|DIGIT, /* 4 */ + PRINTABLE|DIGIT, /* 5 */ + PRINTABLE|DIGIT, /* 6 */ + PRINTABLE|DIGIT, /* 7 */ + PRINTABLE|DIGIT, /* 8 */ + PRINTABLE|DIGIT, /* 9 */ + PRINTABLE, /* : */ + PRINTABLE|COMMENT, /* ; */ + PRINTABLE, /* < */ + PRINTABLE, /* = */ + PRINTABLE, /* > */ + PRINTABLE, /* ? */ + PRINTABLE, /* @ */ + PRINTABLE, /* A */ + PRINTABLE, /* B */ + PRINTABLE, /* C */ + PRINTABLE, /* D */ + PRINTABLE|FLOATC, /* E */ + PRINTABLE, /* F */ + PRINTABLE, /* G */ + PRINTABLE, /* H */ + PRINTABLE, /* I */ + PRINTABLE, /* J */ + PRINTABLE, /* K */ + PRINTABLE, /* L */ + PRINTABLE, /* M */ + PRINTABLE, /* N */ + PRINTABLE, /* O */ + PRINTABLE, /* P */ + PRINTABLE, /* Q */ + PRINTABLE, /* R */ + PRINTABLE, /* S */ + PRINTABLE, /* T */ + PRINTABLE, /* U */ + PRINTABLE, /* V */ + PRINTABLE, /* W */ + PRINTABLE, /* X */ + PRINTABLE, /* Y */ + PRINTABLE, /* Z */ + PRINTABLE, /* [ */ + PRINTABLE|BACKSLASH, /* \ */ + PRINTABLE, /* ] */ + PRINTABLE, /* ^ */ + PRINTABLE, /* _ */ + PRINTABLE|SPECIAL, /* ` */ + PRINTABLE, /* a */ + PRINTABLE, /* b */ + PRINTABLE, /* c */ + PRINTABLE, /* d */ + PRINTABLE|FLOATC, /* e */ + PRINTABLE, /* f */ + PRINTABLE, /* g */ + PRINTABLE, /* h */ + PRINTABLE, /* i */ + PRINTABLE, /* j */ + PRINTABLE, /* k */ + PRINTABLE, /* l */ + PRINTABLE, /* m */ + PRINTABLE, /* n */ + PRINTABLE, /* o */ + PRINTABLE, /* p */ + PRINTABLE, /* q */ + PRINTABLE, /* r */ + PRINTABLE, /* s */ + PRINTABLE, /* t */ + PRINTABLE, /* u */ + PRINTABLE, /* v */ + PRINTABLE, /* w */ + PRINTABLE, /* x */ + PRINTABLE, /* y */ + PRINTABLE, /* z */ + PRINTABLE, /* { */ + PRINTABLE, /* | */ + PRINTABLE, /* } */ + PRINTABLE, /* ~ */ + IGNORE, /* ^? */ +}; + +static int lex_unget_c; + +static inline int +lex_get() +{ + int c; + if (lex_unget_c) { + c = lex_unget_c; + lex_unget_c = 0; + } else { + c = ao_scheme_getc(); + } + return c; +} + +static inline void +lex_unget(int c) +{ + if (c != EOF) + lex_unget_c = c; +} + +static uint16_t lex_class; + +static int +lexc(void) +{ + int c; + do { + c = lex_get(); + if (c == EOF) { + c = 0; + lex_class = ENDOFFILE; + } else { + c &= 0x7f; + lex_class = lex_classes[c]; + } + } while (lex_class & IGNORE); + return c; +} + +static int +lex_quoted(void) +{ + int c; + int v; + int count; + + c = lex_get(); + if (c == EOF) { + lex_class = ENDOFFILE; + return 0; + } + lex_class = 0; + c &= 0x7f; + switch (c) { + case 'n': + return '\n'; + case 'f': + return '\f'; + case 'b': + return '\b'; + case 'r': + return '\r'; + case 'v': + return '\v'; + case 't': + return '\t'; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + v = c - '0'; + count = 1; + while (count <= 3) { + c = lex_get(); + if (c == EOF) + return EOF; + c &= 0x7f; + if (c < '0' || '7' < c) { + lex_unget(c); + break; + } + v = (v << 3) + c - '0'; + ++count; + } + return v; + default: + return c; + } +} + +#define AO_SCHEME_TOKEN_MAX 32 + +static char token_string[AO_SCHEME_TOKEN_MAX]; +static int32_t token_int; +static int token_len; +static float token_float; + +static inline void add_token(int c) { + if (c && token_len < AO_SCHEME_TOKEN_MAX - 1) + token_string[token_len++] = c; +} + +static inline void del_token(void) { + if (token_len > 0) + token_len--; +} + +static inline void end_token(void) { + token_string[token_len] = '\0'; +} + +struct namedfloat { + const char *name; + float value; +}; + +static const struct namedfloat namedfloats[] = { + { .name = "+inf.0", .value = INFINITY }, + { .name = "-inf.0", .value = -INFINITY }, + { .name = "+nan.0", .value = NAN }, + { .name = "-nan.0", .value = NAN }, +}; + +#define NUM_NAMED_FLOATS (sizeof namedfloats / sizeof namedfloats[0]) + +static int +_lex(void) +{ + int c; + + token_len = 0; + for (;;) { + c = lexc(); + if (lex_class & ENDOFFILE) + return END; + + if (lex_class & WHITE) + continue; + + if (lex_class & COMMENT) { + while ((c = lexc()) != '\n') { + if (lex_class & ENDOFFILE) + return END; + } + continue; + } + + if (lex_class & (SPECIAL|DOTC)) { + add_token(c); + end_token(); + switch (c) { + case '(': + case '[': + return OPEN; + case ')': + case ']': + return CLOSE; + case '\'': + return QUOTE; + case '.': + return DOT; + case '`': + return QUASIQUOTE; + case ',': + c = lexc(); + if (c == '@') { + add_token(c); + end_token(); + return UNQUOTE_SPLICING; + } else { + lex_unget(c); + return UNQUOTE; + } + } + } + if (lex_class & POUND) { + c = lexc(); + switch (c) { + case 't': + add_token(c); + end_token(); + return BOOL; + case 'f': + add_token(c); + end_token(); + return BOOL; + case '\\': + for (;;) { + int alphabetic; + c = lexc(); + alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')); + if (token_len == 0) { + add_token(c); + if (!alphabetic) + break; + } else { + if (alphabetic) + add_token(c); + else { + lex_unget(c); + break; + } + } + } + end_token(); + if (token_len == 1) + token_int = token_string[0]; + else if (!strcmp(token_string, "space")) + token_int = ' '; + else if (!strcmp(token_string, "newline")) + token_int = '\n'; + else if (!strcmp(token_string, "tab")) + token_int = '\t'; + else if (!strcmp(token_string, "return")) + token_int = '\r'; + else if (!strcmp(token_string, "formfeed")) + token_int = '\f'; + else { + ao_scheme_error(AO_SCHEME_INVALID, "invalid character token #\\%s", token_string); + continue; + } + return NUM; + } + } + if (lex_class & STRINGC) { + for (;;) { + c = lexc(); + if (lex_class & BACKSLASH) + c = lex_quoted(); + if (lex_class & (STRINGC|ENDOFFILE)) { + end_token(); + return STRING; + } + add_token(c); + } + } + if (lex_class & PRINTABLE) { + int isfloat; + int hasdigit; + int isneg; + int isint; + int epos; + + isfloat = 1; + isint = 1; + hasdigit = 0; + token_int = 0; + isneg = 0; + epos = 0; + for (;;) { + if (!(lex_class & NUMBER)) { + isint = 0; + isfloat = 0; + } else { + if (!(lex_class & INTEGER)) + isint = 0; + if (token_len != epos && + (lex_class & SIGN)) + { + isint = 0; + isfloat = 0; + } + if (c == '-') + isneg = 1; + if (c == '.' && epos != 0) + isfloat = 0; + if (c == 'e' || c == 'E') { + if (token_len == 0) + isfloat = 0; + else + epos = token_len + 1; + } + if (lex_class & DIGIT) { + hasdigit = 1; + if (isint) + token_int = token_int * 10 + c - '0'; + } + } + add_token (c); + c = lexc (); + if ((lex_class & (NOTNAME)) && (c != '.' || !isfloat)) { + unsigned int u; +// if (lex_class & ENDOFFILE) +// clearerr (f); + lex_unget(c); + end_token (); + if (isint && hasdigit) { + if (isneg) + token_int = -token_int; + return NUM; + } + if (isfloat && hasdigit) { + token_float = strtof(token_string, NULL); + return FLOAT; + } + for (u = 0; u < NUM_NAMED_FLOATS; u++) + if (!strcmp(namedfloats[u].name, token_string)) { + token_float = namedfloats[u].value; + return FLOAT; + } + return NAME; + } + } + } + } +} + +static inline int lex(void) +{ + int parse_token = _lex(); + RDBGI("token %d (%s)\n", parse_token, token_string); + return parse_token; +} + +static int parse_token; + +struct ao_scheme_cons *ao_scheme_read_cons; +struct ao_scheme_cons *ao_scheme_read_cons_tail; +struct ao_scheme_cons *ao_scheme_read_stack; + +#define READ_IN_QUOTE 0x01 +#define READ_SAW_DOT 0x02 +#define READ_DONE_DOT 0x04 + +static int +push_read_stack(int cons, int read_state) +{ + RDBGI("push read stack %p 0x%x\n", ao_scheme_read_cons, read_state); + RDBG_IN(); + if (cons) { + ao_scheme_read_stack = ao_scheme_cons_cons(ao_scheme_cons_poly(ao_scheme_read_cons), + ao_scheme__cons(ao_scheme_int_poly(read_state), + ao_scheme_cons_poly(ao_scheme_read_stack))); + if (!ao_scheme_read_stack) + return 0; + } + ao_scheme_read_cons = NULL; + ao_scheme_read_cons_tail = NULL; + return 1; +} + +static int +pop_read_stack(int cons) +{ + int read_state = 0; + if (cons) { + ao_scheme_read_cons = ao_scheme_poly_cons(ao_scheme_read_stack->car); + ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr); + read_state = ao_scheme_poly_int(ao_scheme_read_stack->car); + ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr); + for (ao_scheme_read_cons_tail = ao_scheme_read_cons; + ao_scheme_read_cons_tail && ao_scheme_read_cons_tail->cdr; + ao_scheme_read_cons_tail = ao_scheme_poly_cons(ao_scheme_read_cons_tail->cdr)) + ; + } else { + ao_scheme_read_cons = 0; + ao_scheme_read_cons_tail = 0; + ao_scheme_read_stack = 0; + } + RDBG_OUT(); + RDBGI("pop read stack %p %d\n", ao_scheme_read_cons, read_state); + return read_state; +} + +ao_poly +ao_scheme_read(void) +{ + struct ao_scheme_atom *atom; + char *string; + int cons; + int read_state; + ao_poly v = AO_SCHEME_NIL; + + cons = 0; + read_state = 0; + ao_scheme_read_cons = ao_scheme_read_cons_tail = ao_scheme_read_stack = 0; + for (;;) { + parse_token = lex(); + while (parse_token == OPEN) { + if (!push_read_stack(cons, read_state)) + return AO_SCHEME_NIL; + cons++; + read_state = 0; + parse_token = lex(); + } + + switch (parse_token) { + case END: + default: + if (cons) + ao_scheme_error(AO_SCHEME_EOF, "unexpected end of file"); + return _ao_scheme_atom_eof; + break; + case NAME: + atom = ao_scheme_atom_intern(token_string); + if (atom) + v = ao_scheme_atom_poly(atom); + else + v = AO_SCHEME_NIL; + break; + case NUM: + v = ao_scheme_integer_poly(token_int); + break; + case FLOAT: + v = ao_scheme_float_get(token_float); + break; + case BOOL: + if (token_string[0] == 't') + v = _ao_scheme_bool_true; + else + v = _ao_scheme_bool_false; + break; + case STRING: + string = ao_scheme_string_copy(token_string); + if (string) + v = ao_scheme_string_poly(string); + else + v = AO_SCHEME_NIL; + break; + case QUOTE: + case QUASIQUOTE: + case UNQUOTE: + case UNQUOTE_SPLICING: + if (!push_read_stack(cons, read_state)) + return AO_SCHEME_NIL; + cons++; + read_state = READ_IN_QUOTE; + switch (parse_token) { + case QUOTE: + v = _ao_scheme_atom_quote; + break; + case QUASIQUOTE: + v = _ao_scheme_atom_quasiquote; + break; + case UNQUOTE: + v = _ao_scheme_atom_unquote; + break; + case UNQUOTE_SPLICING: + v = _ao_scheme_atom_unquote2dsplicing; + break; + } + break; + case CLOSE: + if (!cons) { + v = AO_SCHEME_NIL; + break; + } + v = ao_scheme_cons_poly(ao_scheme_read_cons); + --cons; + read_state = pop_read_stack(cons); + break; + case DOT: + if (!cons) { + ao_scheme_error(AO_SCHEME_INVALID, ". outside of cons"); + return AO_SCHEME_NIL; + } + if (!ao_scheme_read_cons) { + ao_scheme_error(AO_SCHEME_INVALID, ". first in cons"); + return AO_SCHEME_NIL; + } + read_state |= READ_SAW_DOT; + continue; + } + + /* loop over QUOTE ends */ + for (;;) { + if (!cons) + return v; + + if (read_state & READ_DONE_DOT) { + ao_scheme_error(AO_SCHEME_INVALID, ". not last in cons"); + return AO_SCHEME_NIL; + } + + if (read_state & READ_SAW_DOT) { + read_state |= READ_DONE_DOT; + ao_scheme_read_cons_tail->cdr = v; + } else { + struct ao_scheme_cons *read = ao_scheme_cons_cons(v, AO_SCHEME_NIL); + if (!read) + return AO_SCHEME_NIL; + + if (ao_scheme_read_cons_tail) + ao_scheme_read_cons_tail->cdr = ao_scheme_cons_poly(read); + else + ao_scheme_read_cons = read; + ao_scheme_read_cons_tail = read; + } + + if (!(read_state & READ_IN_QUOTE) || !ao_scheme_read_cons->cdr) + break; + + v = ao_scheme_cons_poly(ao_scheme_read_cons); + --cons; + read_state = pop_read_stack(cons); + } + } + return v; +}