altos/scheme: Allow unicode in lexer
authorKeith Packard <keithp@keithp.com>
Mon, 8 Jan 2018 21:46:17 +0000 (13:46 -0800)
committerKeith Packard <keithp@keithp.com>
Mon, 8 Jan 2018 21:46:17 +0000 (13:46 -0800)
This just passes any bytes with the high bit set through the system so
programs can include UTF-8 in strings and symbols. What the heck.

Signed-off-by: Keith Packard <keithp@keithp.com>
src/scheme/ao_scheme_read.c
src/scheme/ao_scheme_string.c

index f9630d39e96ef92db8a109a69c071cd0ad45d535..3575ff3fff14cfcdaa3ac3afec22a6c960243624 100644 (file)
@@ -186,8 +186,9 @@ lexc(FILE *in)
                        c = 0;
                        lex_class = ENDOFFILE;
                } else {
-                       c &= 0x7f;
-                       lex_class = lex_classes[c];
+                       lex_class = PRINTABLE;
+                       if (c <= 0x7f)
+                               lex_class = lex_classes[c];
                }
        } while (lex_class & IGNORE);
        return c;
index c49e1e325c9872a97634614afdf6a6669d8774bc..2c6d096000d43d759c9d7a276ad5ad1c88dcafcc 100644 (file)
@@ -47,6 +47,8 @@ ao_scheme_string_alloc(int len)
 {
        struct ao_scheme_string *s;
 
+       if (len < 0)
+               return NULL;
        s = ao_scheme_alloc(len + 2);
        if (!s)
                return NULL;
@@ -182,8 +184,8 @@ ao_scheme_string_write(FILE *out, ao_poly p, bool write)
                                fputs("\\\\", out);
                                break;
                        default:
-                               if (c < ' ')
-                                       fprintf(out, "\\%03o", c);
+                               if ((uint8_t) c < ' ')
+                                       fprintf(out, "\\%03o", (uint8_t) c);
                                else
                                        putc(c, out);
                                break;