#include <stdlib.h>
static const uint16_t lex_classes[128] = {
- IGNORE, /* ^@ */
- IGNORE, /* ^A */
- IGNORE, /* ^B */
- IGNORE, /* ^C */
- IGNORE, /* ^D */
- IGNORE, /* ^E */
- IGNORE, /* ^F */
- IGNORE, /* ^G */
- IGNORE, /* ^H */
- WHITE, /* ^I */
- WHITE, /* ^J */
- WHITE, /* ^K */
- WHITE, /* ^L */
- WHITE, /* ^M */
- IGNORE, /* ^N */
- IGNORE, /* ^O */
- IGNORE, /* ^P */
- IGNORE, /* ^Q */
- IGNORE, /* ^R */
- IGNORE, /* ^S */
- IGNORE, /* ^T */
- IGNORE, /* ^U */
- IGNORE, /* ^V */
- IGNORE, /* ^W */
- IGNORE, /* ^X */
- IGNORE, /* ^Y */
- IGNORE, /* ^Z */
- IGNORE, /* ^[ */
- IGNORE, /* ^\ */
- IGNORE, /* ^] */
- IGNORE, /* ^^ */
- IGNORE, /* ^_ */
- PRINTABLE|WHITE, /* */
- PRINTABLE, /* ! */
- PRINTABLE|STRINGC, /* " */
- PRINTABLE|POUND, /* # */
- PRINTABLE, /* $ */
- PRINTABLE, /* % */
- PRINTABLE, /* & */
- PRINTABLE|SPECIAL, /* ' */
- PRINTABLE|SPECIAL, /* ( */
- PRINTABLE|SPECIAL, /* ) */
- PRINTABLE, /* * */
- PRINTABLE|SIGN, /* + */
+ IGNORE, /* ^@ */
+ IGNORE, /* ^A */
+ IGNORE, /* ^B */
+ IGNORE, /* ^C */
+ IGNORE, /* ^D */
+ IGNORE, /* ^E */
+ IGNORE, /* ^F */
+ IGNORE, /* ^G */
+ IGNORE, /* ^H */
+ WHITE, /* ^I */
+ WHITE, /* ^J */
+ WHITE, /* ^K */
+ WHITE, /* ^L */
+ WHITE, /* ^M */
+ IGNORE, /* ^N */
+ IGNORE, /* ^O */
+ IGNORE, /* ^P */
+ IGNORE, /* ^Q */
+ IGNORE, /* ^R */
+ IGNORE, /* ^S */
+ IGNORE, /* ^T */
+ IGNORE, /* ^U */
+ IGNORE, /* ^V */
+ IGNORE, /* ^W */
+ IGNORE, /* ^X */
+ IGNORE, /* ^Y */
+ IGNORE, /* ^Z */
+ IGNORE, /* ^[ */
+ IGNORE, /* ^\ */
+ IGNORE, /* ^] */
+ IGNORE, /* ^^ */
+ IGNORE, /* ^_ */
+ PRINTABLE|WHITE, /* */
+ PRINTABLE, /* ! */
+ PRINTABLE|STRINGC, /* " */
+ PRINTABLE, /* # */
+ PRINTABLE, /* $ */
+ PRINTABLE, /* % */
+ PRINTABLE, /* & */
+ PRINTABLE|SPECIAL, /* ' */
+ PRINTABLE|SPECIAL, /* ( */
+ PRINTABLE|SPECIAL, /* ) */
+ PRINTABLE, /* * */
+ PRINTABLE|SIGN, /* + */
PRINTABLE|SPECIAL_QUASI, /* , */
- PRINTABLE|SIGN, /* - */
- PRINTABLE|DOTC|FLOATC, /* . */
- PRINTABLE, /* / */
- PRINTABLE|DIGIT, /* 0 */
- PRINTABLE|DIGIT, /* 1 */
- PRINTABLE|DIGIT, /* 2 */
- PRINTABLE|DIGIT, /* 3 */
- PRINTABLE|DIGIT, /* 4 */
- PRINTABLE|DIGIT, /* 5 */
- PRINTABLE|DIGIT, /* 6 */
- PRINTABLE|DIGIT, /* 7 */
- PRINTABLE|DIGIT, /* 8 */
- PRINTABLE|DIGIT, /* 9 */
- PRINTABLE, /* : */
- PRINTABLE|COMMENT, /* ; */
- PRINTABLE, /* < */
- PRINTABLE, /* = */
- PRINTABLE, /* > */
- PRINTABLE, /* ? */
- PRINTABLE, /* @ */
- PRINTABLE, /* A */
- PRINTABLE, /* B */
- PRINTABLE, /* C */
- PRINTABLE, /* D */
- PRINTABLE|FLOATC, /* E */
- PRINTABLE, /* F */
- PRINTABLE, /* G */
- PRINTABLE, /* H */
- PRINTABLE, /* I */
- PRINTABLE, /* J */
- PRINTABLE, /* K */
- PRINTABLE, /* L */
- PRINTABLE, /* M */
- PRINTABLE, /* N */
- PRINTABLE, /* O */
- PRINTABLE, /* P */
- PRINTABLE, /* Q */
- PRINTABLE, /* R */
- PRINTABLE, /* S */
- PRINTABLE, /* T */
- PRINTABLE, /* U */
- PRINTABLE, /* V */
- PRINTABLE, /* W */
- PRINTABLE, /* X */
- PRINTABLE, /* Y */
- PRINTABLE, /* Z */
- PRINTABLE, /* [ */
- PRINTABLE|BACKSLASH, /* \ */
- PRINTABLE, /* ] */
- PRINTABLE, /* ^ */
- PRINTABLE, /* _ */
+ PRINTABLE|SIGN, /* - */
+ PRINTABLE|SPECIAL|FLOATC, /* . */
+ PRINTABLE, /* / */
+ PRINTABLE|DIGIT, /* 0 */
+ PRINTABLE|DIGIT, /* 1 */
+ PRINTABLE|DIGIT, /* 2 */
+ PRINTABLE|DIGIT, /* 3 */
+ PRINTABLE|DIGIT, /* 4 */
+ PRINTABLE|DIGIT, /* 5 */
+ PRINTABLE|DIGIT, /* 6 */
+ PRINTABLE|DIGIT, /* 7 */
+ PRINTABLE|DIGIT, /* 8 */
+ PRINTABLE|DIGIT, /* 9 */
+ PRINTABLE, /* : */
+ PRINTABLE|COMMENT, /* ; */
+ PRINTABLE, /* < */
+ PRINTABLE, /* = */
+ PRINTABLE, /* > */
+ PRINTABLE, /* ? */
+ PRINTABLE, /* @ */
+ PRINTABLE|ALPHA|HEX_LETTER, /* A */
+ PRINTABLE|ALPHA|HEX_LETTER, /* B */
+ PRINTABLE|ALPHA|HEX_LETTER, /* C */
+ PRINTABLE|ALPHA|HEX_LETTER, /* D */
+ PRINTABLE|ALPHA|FLOATC|HEX_LETTER,/* E */
+ PRINTABLE|ALPHA|HEX_LETTER, /* F */
+ PRINTABLE|ALPHA, /* G */
+ PRINTABLE|ALPHA, /* H */
+ PRINTABLE|ALPHA, /* I */
+ PRINTABLE|ALPHA, /* J */
+ PRINTABLE|ALPHA, /* K */
+ PRINTABLE|ALPHA, /* L */
+ PRINTABLE|ALPHA, /* M */
+ PRINTABLE|ALPHA, /* N */
+ PRINTABLE|ALPHA, /* O */
+ PRINTABLE|ALPHA, /* P */
+ PRINTABLE|ALPHA, /* Q */
+ PRINTABLE|ALPHA, /* R */
+ PRINTABLE|ALPHA, /* S */
+ PRINTABLE|ALPHA, /* T */
+ PRINTABLE|ALPHA, /* U */
+ PRINTABLE|ALPHA, /* V */
+ PRINTABLE|ALPHA, /* W */
+ PRINTABLE|ALPHA, /* X */
+ PRINTABLE|ALPHA, /* Y */
+ PRINTABLE|ALPHA, /* Z */
+ PRINTABLE, /* [ */
+ PRINTABLE, /* \ */
+ PRINTABLE, /* ] */
+ PRINTABLE, /* ^ */
+ PRINTABLE, /* _ */
PRINTABLE|SPECIAL_QUASI, /* ` */
- PRINTABLE, /* a */
- PRINTABLE, /* b */
- PRINTABLE, /* c */
- PRINTABLE, /* d */
- PRINTABLE|FLOATC, /* e */
- PRINTABLE, /* f */
- PRINTABLE, /* g */
- PRINTABLE, /* h */
- PRINTABLE, /* i */
- PRINTABLE, /* j */
- PRINTABLE, /* k */
- PRINTABLE, /* l */
- PRINTABLE, /* m */
- PRINTABLE, /* n */
- PRINTABLE, /* o */
- PRINTABLE, /* p */
- PRINTABLE, /* q */
- PRINTABLE, /* r */
- PRINTABLE, /* s */
- PRINTABLE, /* t */
- PRINTABLE, /* u */
- PRINTABLE, /* v */
- PRINTABLE, /* w */
- PRINTABLE, /* x */
- PRINTABLE, /* y */
- PRINTABLE, /* z */
- PRINTABLE, /* { */
- PRINTABLE, /* | */
- PRINTABLE, /* } */
- PRINTABLE, /* ~ */
- IGNORE, /* ^? */
+ PRINTABLE|ALPHA|HEX_LETTER, /* a */
+ PRINTABLE|ALPHA|HEX_LETTER, /* b */
+ PRINTABLE|ALPHA|HEX_LETTER, /* c */
+ PRINTABLE|ALPHA|HEX_LETTER, /* d */
+ PRINTABLE|ALPHA|FLOATC|HEX_LETTER,/* e */
+ PRINTABLE|ALPHA|HEX_LETTER, /* f */
+ PRINTABLE|ALPHA, /* g */
+ PRINTABLE|ALPHA, /* h */
+ PRINTABLE|ALPHA, /* i */
+ PRINTABLE|ALPHA, /* j */
+ PRINTABLE|ALPHA, /* k */
+ PRINTABLE|ALPHA, /* l */
+ PRINTABLE|ALPHA, /* m */
+ PRINTABLE|ALPHA, /* n */
+ PRINTABLE|ALPHA, /* o */
+ PRINTABLE|ALPHA, /* p */
+ PRINTABLE|ALPHA, /* q */
+ PRINTABLE|ALPHA, /* r */
+ PRINTABLE|ALPHA, /* s */
+ PRINTABLE|ALPHA, /* t */
+ PRINTABLE|ALPHA, /* u */
+ PRINTABLE|ALPHA, /* v */
+ PRINTABLE|ALPHA, /* w */
+ PRINTABLE|ALPHA, /* x */
+ PRINTABLE|ALPHA, /* y */
+ PRINTABLE|ALPHA, /* z */
+ PRINTABLE, /* { */
+ PRINTABLE, /* | */
+ PRINTABLE, /* } */
+ PRINTABLE, /* ~ */
+ IGNORE, /* ^? */
};
static int lex_unget_c;
+#ifndef ao_scheme_getc
+#define ao_scheme_getc(f) getc(f)
+#endif
+
static inline int
-lex_get(void)
+lex_get(FILE *in)
{
int c;
if (lex_unget_c) {
c = lex_unget_c;
lex_unget_c = 0;
} else {
- c = ao_scheme_getc();
+ c = ao_scheme_getc(in);
}
return c;
}
static uint16_t lex_class;
static int
-lexc(void)
+lexc(FILE *in)
{
int c;
do {
- c = lex_get();
+ c = lex_get(in);
if (c == EOF) {
c = 0;
lex_class = ENDOFFILE;
}
static int
-lex_quoted(void)
+lex_quoted(FILE *in)
{
int c;
int v;
int count;
- c = lex_get();
+ c = lex_get(in);
if (c == EOF) {
+ eof:
lex_class = ENDOFFILE;
return 0;
}
lex_class = 0;
c &= 0x7f;
switch (c) {
- case 'n':
- return '\n';
- case 'f':
- return '\f';
+ case 'a':
+ return '\a';
case 'b':
return '\b';
+ case 't':
+ return '\t';
+ case 'n':
+ return '\n';
case 'r':
return '\r';
+ case 'f':
+ return '\f';
case 'v':
return '\v';
- case 't':
- return '\t';
case '0':
case '1':
case '2':
v = c - '0';
count = 1;
while (count <= 3) {
- c = lex_get();
+ c = lex_get(in);
if (c == EOF)
- return EOF;
+ goto eof;
c &= 0x7f;
if (c < '0' || '7' < c) {
lex_unget(c);
static int32_t token_int;
static int token_len;
-static inline void add_token(int c) {
- if (c && token_len < AO_SCHEME_TOKEN_MAX - 1)
- token_string[token_len++] = c;
+static void start_token(void) {
+ token_len = 0;
}
-static inline void del_token(void) {
- if (token_len > 0)
- token_len--;
+static void add_token(int c) {
+ if (c && token_len < AO_SCHEME_TOKEN_MAX - 1)
+ token_string[token_len++] = c;
}
-static inline void end_token(void) {
+static void end_token(void) {
token_string[token_len] = '\0';
}
#endif
static int
-_lex(void)
+parse_int(FILE *in, int base)
{
+ int cval;
int c;
- token_len = 0;
+ token_int = 0;
+ for (;;) {
+ c = lexc(in);
+ if ((lex_class & HEX_DIGIT) == 0) {
+ lex_unget(c);
+ return NUM;
+ }
+ if ('0' <= c && c <= '9')
+ cval = c - '0';
+ else
+ cval = (c | ('a' - 'A')) - 'a' + 10;
+ token_int = token_int * base + cval;
+ }
+ return NUM;
+}
+
+static int
+_lex(FILE *in)
+{
+ int c;
+
+ start_token();
for (;;) {
- c = lexc();
+ c = lexc(in);
if (lex_class & ENDOFFILE)
return END;
continue;
if (lex_class & COMMENT) {
- while ((c = lexc()) != '\n') {
+ while ((c = lexc(in)) != '\n') {
if (lex_class & ENDOFFILE)
return END;
}
continue;
}
- if (lex_class & (SPECIAL|DOTC)) {
- add_token(c);
- end_token();
+ if (lex_class & SPECIAL) {
switch (c) {
case '(':
case '[':
case '`':
return QUASIQUOTE;
case ',':
- c = lexc();
+ c = lexc(in);
if (c == '@') {
- add_token(c);
- end_token();
return UNQUOTE_SPLICING;
} else {
lex_unget(c);
#endif
}
}
- if (lex_class & POUND) {
- c = lexc();
+ if (c == '#') {
+ c = lexc(in);
switch (c) {
case 't':
- add_token(c);
- end_token();
- return BOOL;
+ return TRUE_TOKEN;
case 'f':
- add_token(c);
- end_token();
- return BOOL;
+ return FALSE_TOKEN;
#ifdef AO_SCHEME_FEATURE_VECTOR
case '(':
return OPEN_VECTOR;
#endif
case '\\':
for (;;) {
- int alphabetic;
- c = lexc();
- alphabetic = (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
+ c = lexc(in);
if (token_len == 0) {
add_token(c);
- if (!alphabetic)
+ if (!(lex_class & ALPHA))
break;
} else {
- if (alphabetic)
+ if (lex_class & ALPHA)
add_token(c);
else {
lex_unget(c);
continue;
}
return NUM;
+ case 'x':
+ return parse_int(in, 16);
+ case 'o':
+ return parse_int(in, 8);
+ case 'b':
+ return parse_int(in, 2);
}
}
if (lex_class & STRINGC) {
for (;;) {
- c = lexc();
- if (lex_class & BACKSLASH)
- c = lex_quoted();
+ c = lexc(in);
+ if (c == '\\')
+ c = lex_quoted(in);
if (lex_class & (STRINGC|ENDOFFILE)) {
end_token();
return STRING;
}
}
add_token (c);
- c = lexc ();
+ c = lexc (in);
if ((lex_class & (NOTNAME))
#ifdef AO_SCHEME_FEATURE_FLOAT
&& (c != '.' || !isfloat)
#ifdef AO_SCHEME_FEATURE_FLOAT
unsigned int u;
#endif
-// if (lex_class & ENDOFFILE)
-// clearerr (f);
lex_unget(c);
end_token ();
if (isint && hasdigit) {
}
}
-static inline int lex(void)
+static inline int lex(FILE *in)
{
- int parse_token = _lex();
- RDBGI("token %d (%s)\n", parse_token, token_string);
+ int parse_token = _lex(in);
+ RDBGI("token %d \"%s\"\n", parse_token, token_string);
return parse_token;
}
RDBG_IN();
if (ao_scheme_read_list) {
ao_scheme_read_stack = ao_scheme_cons_cons(ao_scheme_cons_poly(ao_scheme_read_cons),
- ao_scheme__cons(ao_scheme_int_poly(read_state),
+ ao_scheme_cons(ao_scheme_int_poly(read_state),
ao_scheme_cons_poly(ao_scheme_read_stack)));
if (!ao_scheme_read_stack)
return 0;
ao_scheme_read_cons_tail = ao_scheme_poly_cons(ao_scheme_read_cons_tail->cdr))
;
} else {
- ao_scheme_read_cons = 0;
- ao_scheme_read_cons_tail = 0;
- ao_scheme_read_stack = 0;
read_state = ao_scheme_read_state;
+ ao_scheme_read_cons = NULL;
+ ao_scheme_read_cons_tail = NULL;
+ ao_scheme_read_stack = NULL;
+ ao_scheme_read_state = 0;
}
RDBG_OUT();
RDBGI("pop read stack %p %d\n", ao_scheme_read_cons, read_state);
#endif
ao_poly
-ao_scheme_read(void)
+ao_scheme_read(FILE *in)
{
struct ao_scheme_atom *atom;
- char *string;
+ struct ao_scheme_string *string;
int read_state;
ao_poly v = AO_SCHEME_NIL;
ao_scheme_read_list = 0;
read_state = 0;
- ao_scheme_read_cons = ao_scheme_read_cons_tail = ao_scheme_read_stack = 0;
+ ao_scheme_read_cons = ao_scheme_read_cons_tail = ao_scheme_read_stack = NULL;
for (;;) {
- parse_token = lex();
+ parse_token = lex(in);
while (is_open(parse_token)) {
#ifdef AO_SCHEME_FEATURE_VECTOR
if (parse_token == OPEN_VECTOR)
return AO_SCHEME_NIL;
ao_scheme_read_list++;
read_state = 0;
- parse_token = lex();
+ parse_token = lex(in);
}
switch (parse_token) {
v = ao_scheme_float_get(token_float);
break;
#endif
- case BOOL:
- if (token_string[0] == 't')
- v = _ao_scheme_bool_true;
- else
- v = _ao_scheme_bool_false;
+ case TRUE_TOKEN:
+ v = _ao_scheme_bool_true;
+ break;
+ case FALSE_TOKEN:
+ v = _ao_scheme_bool_false;
break;
case STRING:
- string = ao_scheme_string_copy(token_string);
+ string = ao_scheme_string_new(token_string);
if (string)
v = ao_scheme_string_poly(string);
else
--ao_scheme_read_list;
read_state = pop_read_stack();
#ifdef AO_SCHEME_FEATURE_VECTOR
- if (read_state & READ_SAW_VECTOR)
+ if (read_state & READ_SAW_VECTOR) {
v = ao_scheme_vector_poly(ao_scheme_list_to_vector(ao_scheme_poly_cons(v)));
+ read_state &= ~READ_SAW_VECTOR;
+ }
#endif
break;
case DOT: