2 * Copyright © 2016 Keith Packard <keithp@keithp.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
15 #include "ao_scheme.h"
16 #include "ao_scheme_read.h"
20 static const uint16_t lex_classes[128] = {
53 PRINTABLE|WHITE, /* */
55 PRINTABLE|STRINGC, /* " */
60 PRINTABLE|SPECIAL, /* ' */
61 PRINTABLE|SPECIAL, /* ( */
62 PRINTABLE|SPECIAL, /* ) */
64 PRINTABLE|SIGN, /* + */
65 PRINTABLE|SPECIAL_QUASI, /* , */
66 PRINTABLE|SIGN, /* - */
67 PRINTABLE|SPECIAL|FLOATC, /* . */
69 PRINTABLE|DIGIT, /* 0 */
70 PRINTABLE|DIGIT, /* 1 */
71 PRINTABLE|DIGIT, /* 2 */
72 PRINTABLE|DIGIT, /* 3 */
73 PRINTABLE|DIGIT, /* 4 */
74 PRINTABLE|DIGIT, /* 5 */
75 PRINTABLE|DIGIT, /* 6 */
76 PRINTABLE|DIGIT, /* 7 */
77 PRINTABLE|DIGIT, /* 8 */
78 PRINTABLE|DIGIT, /* 9 */
80 PRINTABLE|COMMENT, /* ; */
86 PRINTABLE|ALPHA|HEX_LETTER, /* A */
87 PRINTABLE|ALPHA|HEX_LETTER, /* B */
88 PRINTABLE|ALPHA|HEX_LETTER, /* C */
89 PRINTABLE|ALPHA|HEX_LETTER, /* D */
90 PRINTABLE|ALPHA|FLOATC|HEX_LETTER,/* E */
91 PRINTABLE|ALPHA|HEX_LETTER, /* F */
92 PRINTABLE|ALPHA, /* G */
93 PRINTABLE|ALPHA, /* H */
94 PRINTABLE|ALPHA, /* I */
95 PRINTABLE|ALPHA, /* J */
96 PRINTABLE|ALPHA, /* K */
97 PRINTABLE|ALPHA, /* L */
98 PRINTABLE|ALPHA, /* M */
99 PRINTABLE|ALPHA, /* N */
100 PRINTABLE|ALPHA, /* O */
101 PRINTABLE|ALPHA, /* P */
102 PRINTABLE|ALPHA, /* Q */
103 PRINTABLE|ALPHA, /* R */
104 PRINTABLE|ALPHA, /* S */
105 PRINTABLE|ALPHA, /* T */
106 PRINTABLE|ALPHA, /* U */
107 PRINTABLE|ALPHA, /* V */
108 PRINTABLE|ALPHA, /* W */
109 PRINTABLE|ALPHA, /* X */
110 PRINTABLE|ALPHA, /* Y */
111 PRINTABLE|ALPHA, /* Z */
117 PRINTABLE|SPECIAL_QUASI, /* ` */
118 PRINTABLE|ALPHA|HEX_LETTER, /* a */
119 PRINTABLE|ALPHA|HEX_LETTER, /* b */
120 PRINTABLE|ALPHA|HEX_LETTER, /* c */
121 PRINTABLE|ALPHA|HEX_LETTER, /* d */
122 PRINTABLE|ALPHA|FLOATC|HEX_LETTER,/* e */
123 PRINTABLE|ALPHA|HEX_LETTER, /* f */
124 PRINTABLE|ALPHA, /* g */
125 PRINTABLE|ALPHA, /* h */
126 PRINTABLE|ALPHA, /* i */
127 PRINTABLE|ALPHA, /* j */
128 PRINTABLE|ALPHA, /* k */
129 PRINTABLE|ALPHA, /* l */
130 PRINTABLE|ALPHA, /* m */
131 PRINTABLE|ALPHA, /* n */
132 PRINTABLE|ALPHA, /* o */
133 PRINTABLE|ALPHA, /* p */
134 PRINTABLE|ALPHA, /* q */
135 PRINTABLE|ALPHA, /* r */
136 PRINTABLE|ALPHA, /* s */
137 PRINTABLE|ALPHA, /* t */
138 PRINTABLE|ALPHA, /* u */
139 PRINTABLE|ALPHA, /* v */
140 PRINTABLE|ALPHA, /* w */
141 PRINTABLE|ALPHA, /* x */
142 PRINTABLE|ALPHA, /* y */
143 PRINTABLE|ALPHA, /* z */
151 static int lex_unget_c;
153 #ifndef ao_scheme_getc
154 #define ao_scheme_getc(f) getc(f)
165 c = ao_scheme_getc(in);
177 static uint16_t lex_class;
187 lex_class = ENDOFFILE;
189 lex_class = PRINTABLE;
191 lex_class = lex_classes[c];
193 } while (lex_class & IGNORE);
207 lex_class = ENDOFFILE;
242 if (c < '0' || '7' < c) {
246 v = (v << 3) + c - '0';
255 #ifndef AO_SCHEME_TOKEN_MAX
256 #define AO_SCHEME_TOKEN_MAX 128
259 static char token_string[AO_SCHEME_TOKEN_MAX];
260 static int32_t token_int;
261 static int token_len;
263 static void start_token(void) {
267 static void add_token(int c) {
268 if (c && token_len < AO_SCHEME_TOKEN_MAX - 1)
269 token_string[token_len++] = c;
272 static void end_token(void) {
273 token_string[token_len] = '\0';
276 #ifdef AO_SCHEME_FEATURE_FLOAT
277 static float token_float;
284 static const struct namedfloat namedfloats[] = {
285 { .name = "+inf.0", .value = INFINITY },
286 { .name = "-inf.0", .value = -INFINITY },
287 { .name = "+nan.0", .value = NAN },
288 { .name = "-nan.0", .value = NAN },
291 #define NUM_NAMED_FLOATS (sizeof namedfloats / sizeof namedfloats[0])
295 parse_int(FILE *in, int base)
303 if ((lex_class & HEX_DIGIT) == 0) {
307 if ('0' <= c && c <= '9')
310 cval = (c | ('a' - 'A')) - 'a' + 10;
311 token_int = token_int * base + cval;
324 if (lex_class & ENDOFFILE)
327 if (lex_class & WHITE)
330 if (lex_class & COMMENT) {
331 while ((c = lexc(in)) != '\n') {
332 if (lex_class & ENDOFFILE)
338 if (lex_class & SPECIAL) {
350 #ifdef AO_SCHEME_FEATURE_QUASI
356 return UNQUOTE_SPLICING;
371 #ifdef AO_SCHEME_FEATURE_VECTOR
378 if (token_len == 0) {
380 if (!(lex_class & ALPHA))
383 if (lex_class & ALPHA)
393 token_int = token_string[0];
394 else if (!strcmp(token_string, "space"))
396 else if (!strcmp(token_string, "newline"))
398 else if (!strcmp(token_string, "tab"))
400 else if (!strcmp(token_string, "return"))
402 else if (!strcmp(token_string, "formfeed"))
405 ao_scheme_error(AO_SCHEME_INVALID, "invalid character token #\\%s", token_string);
410 return parse_int(in, 16);
412 return parse_int(in, 8);
414 return parse_int(in, 2);
417 if (lex_class & STRINGC) {
422 if (lex_class & (STRINGC|ENDOFFILE)) {
429 if (lex_class & PRINTABLE) {
430 #ifdef AO_SCHEME_FEATURE_FLOAT
440 if (!(lex_class & NUMBER)) {
442 #ifdef AO_SCHEME_FEATURE_FLOAT
446 #ifdef AO_SCHEME_FEATURE_FLOAT
447 if (!(lex_class & INTEGER))
449 if (token_len != epos &&
458 #ifdef AO_SCHEME_FEATURE_FLOAT
459 if (c == '.' && epos != 0)
461 if (c == 'e' || c == 'E') {
465 epos = token_len + 1;
468 if (lex_class & DIGIT) {
471 token_int = token_int * 10 + c - '0';
476 if ((lex_class & (NOTNAME))
477 #ifdef AO_SCHEME_FEATURE_FLOAT
478 && (c != '.' || !isfloat)
481 #ifdef AO_SCHEME_FEATURE_FLOAT
486 if (isint && hasdigit) {
488 token_int = -token_int;
491 #ifdef AO_SCHEME_FEATURE_FLOAT
492 if (isfloat && hasdigit) {
493 token_float = strtof(token_string, NULL);
496 for (u = 0; u < NUM_NAMED_FLOATS; u++)
497 if (!strcmp(namedfloats[u].name, token_string)) {
498 token_float = namedfloats[u].value;
509 static inline int lex(FILE *in)
511 int parse_token = _lex(in);
512 RDBGI("token %d \"%s\"\n", parse_token, token_string);
516 static int parse_token;
518 int ao_scheme_read_list;
519 struct ao_scheme_cons *ao_scheme_read_cons;
520 struct ao_scheme_cons *ao_scheme_read_cons_tail;
521 struct ao_scheme_cons *ao_scheme_read_stack;
522 static int ao_scheme_read_state;
524 #define READ_IN_QUOTE 0x01
525 #define READ_SAW_DOT 0x02
526 #define READ_DONE_DOT 0x04
527 #define READ_SAW_VECTOR 0x08
530 push_read_stack(int read_state)
532 RDBGI("push read stack %p 0x%x\n", ao_scheme_read_cons, read_state);
534 if (ao_scheme_read_list) {
535 ao_scheme_read_stack = ao_scheme_cons_cons(ao_scheme_cons_poly(ao_scheme_read_cons),
536 ao_scheme_cons(ao_scheme_int_poly(read_state),
537 ao_scheme_cons_poly(ao_scheme_read_stack)));
538 if (!ao_scheme_read_stack)
541 ao_scheme_read_state = read_state;
542 ao_scheme_read_cons = NULL;
543 ao_scheme_read_cons_tail = NULL;
551 if (ao_scheme_read_list) {
552 ao_scheme_read_cons = ao_scheme_poly_cons(ao_scheme_read_stack->car);
553 ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
554 read_state = ao_scheme_poly_int(ao_scheme_read_stack->car);
555 ao_scheme_read_stack = ao_scheme_poly_cons(ao_scheme_read_stack->cdr);
556 for (ao_scheme_read_cons_tail = ao_scheme_read_cons;
557 ao_scheme_read_cons_tail && ao_scheme_read_cons_tail->cdr;
558 ao_scheme_read_cons_tail = ao_scheme_poly_cons(ao_scheme_read_cons_tail->cdr))
561 read_state = ao_scheme_read_state;
562 ao_scheme_read_cons = NULL;
563 ao_scheme_read_cons_tail = NULL;
564 ao_scheme_read_stack = NULL;
565 ao_scheme_read_state = 0;
568 RDBGI("pop read stack %p %d\n", ao_scheme_read_cons, read_state);
572 #ifdef AO_SCHEME_FEATURE_VECTOR
573 #define is_open(t) ((t) == OPEN || (t) == OPEN_VECTOR)
575 #define is_open(t) ((t) == OPEN)
579 ao_scheme_read(FILE *in)
581 struct ao_scheme_atom *atom;
582 struct ao_scheme_string *string;
584 ao_poly v = AO_SCHEME_NIL;
586 ao_scheme_read_list = 0;
588 ao_scheme_read_cons = ao_scheme_read_cons_tail = ao_scheme_read_stack = NULL;
590 parse_token = lex(in);
591 while (is_open(parse_token)) {
592 #ifdef AO_SCHEME_FEATURE_VECTOR
593 if (parse_token == OPEN_VECTOR)
594 read_state |= READ_SAW_VECTOR;
596 if (!push_read_stack(read_state))
597 return AO_SCHEME_NIL;
598 ao_scheme_read_list++;
600 parse_token = lex(in);
603 switch (parse_token) {
606 if (ao_scheme_read_list)
607 ao_scheme_error(AO_SCHEME_EOF, "unexpected end of file");
608 return _ao_scheme_atom_eof;
611 atom = ao_scheme_atom_intern(token_string);
613 v = ao_scheme_atom_poly(atom);
618 v = ao_scheme_integer_poly(token_int);
620 #ifdef AO_SCHEME_FEATURE_FLOAT
622 v = ao_scheme_float_get(token_float);
626 v = _ao_scheme_bool_true;
629 v = _ao_scheme_bool_false;
632 string = ao_scheme_string_new(token_string);
634 v = ao_scheme_string_poly(string);
639 #ifdef AO_SCHEME_FEATURE_QUASI
642 case UNQUOTE_SPLICING:
644 if (!push_read_stack(read_state))
645 return AO_SCHEME_NIL;
646 ao_scheme_read_list++;
647 read_state = READ_IN_QUOTE;
648 switch (parse_token) {
650 v = _ao_scheme_atom_quote;
652 #ifdef AO_SCHEME_FEATURE_QUASI
654 v = _ao_scheme_atom_quasiquote;
657 v = _ao_scheme_atom_unquote;
659 case UNQUOTE_SPLICING:
660 v = _ao_scheme_atom_unquote2dsplicing;
666 if (!ao_scheme_read_list) {
670 v = ao_scheme_cons_poly(ao_scheme_read_cons);
671 --ao_scheme_read_list;
672 read_state = pop_read_stack();
673 #ifdef AO_SCHEME_FEATURE_VECTOR
674 if (read_state & READ_SAW_VECTOR) {
675 v = ao_scheme_vector_poly(ao_scheme_list_to_vector(ao_scheme_poly_cons(v)));
676 read_state &= ~READ_SAW_VECTOR;
681 if (!ao_scheme_read_list) {
682 ao_scheme_error(AO_SCHEME_INVALID, ". outside of cons");
683 return AO_SCHEME_NIL;
685 if (!ao_scheme_read_cons) {
686 ao_scheme_error(AO_SCHEME_INVALID, ". first in cons");
687 return AO_SCHEME_NIL;
689 read_state |= READ_SAW_DOT;
693 /* loop over QUOTE ends */
695 if (!ao_scheme_read_list)
698 if (read_state & READ_DONE_DOT) {
699 ao_scheme_error(AO_SCHEME_INVALID, ". not last in cons");
700 return AO_SCHEME_NIL;
703 if (read_state & READ_SAW_DOT) {
704 read_state |= READ_DONE_DOT;
705 ao_scheme_read_cons_tail->cdr = v;
707 struct ao_scheme_cons *read = ao_scheme_cons_cons(v, AO_SCHEME_NIL);
709 return AO_SCHEME_NIL;
711 if (ao_scheme_read_cons_tail)
712 ao_scheme_read_cons_tail->cdr = ao_scheme_cons_poly(read);
714 ao_scheme_read_cons = read;
715 ao_scheme_read_cons_tail = read;
718 if (!(read_state & READ_IN_QUOTE) || !ao_scheme_read_cons->cdr)
721 v = ao_scheme_cons_poly(ao_scheme_read_cons);
722 --ao_scheme_read_list;
723 read_state = pop_read_stack();