X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=src%2Faltlex.c;h=52532248fe88df3c0bf6aba7248a2e055c5a712a;hb=80972b2e54c9b88f11c27b878874fd2a6a681391;hp=da1862bdf97880e4579965a903d056f84a7c203e;hpb=17cd7246c440d6b6a20d05a86bd9f06e029573b6;p=fw%2Fsdcc diff --git a/src/altlex.c b/src/altlex.c index da1862bd..52532248 100644 --- a/src/altlex.c +++ b/src/altlex.c @@ -1,485 +1,1219 @@ /** @file altlex.c An alternate lexer to SDCC.lex. In development - ie messy and just plain wrong. + Inspired by the gcc lexer, c-lex.c. */ #include "common.h" +#include "reswords.h" #include -FILE *yyin; - -int yylineno; -int column; -char *currFname; -char *yytext; +#define DUMP_OUTPUT 0 /* Right. What are the parts of the C stream? From SDCC.lex: - D = [0..9] digits - L = [a..z A..Z _] alphanumerics and _ - H = [a..f A..F 0-9] Hex digits - E = [eE+-0-9] Digits in a float - FS = [fFlL] Specifiers for a float - IS = [uUlL] Specifiers for a int - - L[LD]* A 'token' - cant think of a good name - Check tokens against the reserved words. - If match - return the token id. - else - If in the typedef table, do stuff... - Blah. See check_type() - 0[xX]{H}+ Hex number - PENDING: specifiers - 0{D}+ Octal number - PENDING: specifiers - {D}+ Decimal - PENDING: specifiers - Floats PENDING - + D = [0..9] digits + L = [a..z A..Z _] alphanumerics and _ + H = [a..f A..F 0-9] Hex digits + E = [eE+-0-9] Digits in a float + FS = [fFlL] Specifiers for a float + IS = [uUlL] Specifiers for a int + + L[LD]* A 'token' - cant think of a good name + Check tokens against the reserved words. + If match + return the token id. + else + If in the typedef table, do stuff... + Blah. See check_type() + 0[xX]{H}+ Hex number - PENDING: specifiers + 0{D}+ Octal number - PENDING: specifiers + {D}+ Decimal - PENDING: specifiers + Floats PENDING + Exceptions: - Comment start Strip until end of comment. - ... Ellipses + Comment start Strip until end of comment. + ... Ellipses So the inputs are: - Skip whitespace - switch class - L Try to read a token - D Try to read a number - Punct Try to read punct -*/ + Skip whitespace + switch class + L Try to read a token + D Try to read a number + Punct Try to read punct + */ + +extern int fatalError; +extern int lineno; +extern char *filename; + +FILE *yyin; + +int mylineno; +char *currFname; +char *yytext; -char linebuf[10000]; -int linepos, linelen; +static char linebuf[10000]; +static int linepos, linelen; +static int end_of_file; +#ifdef __GNUC__ #define INLINE inline +#else +#define INLINE +#endif -static int underflow(void) +#define ERRSINK stderr + +static void +error (const char *sz,...) { - linelen = fread(linebuf, 1, sizeof(linebuf), yyin); - if (linelen <= 0) - return EOF; - linepos = 0; - return linebuf[linepos++]; + va_list ap; + fatalError++; + + if (filename && lineno) + { + fprintf (ERRSINK, "%s(%d):", filename, lineno); + } + fprintf (ERRSINK, "error *** "); + va_start (ap, sz); + vfprintf (ERRSINK, sz, ap); + va_end (ap); + fprintf (ERRSINK, "\n"); + fflush (ERRSINK); +} + +static int +underflow (void) +{ + linelen = fread (linebuf, 1, sizeof (linebuf), yyin); + if (linelen <= 0) + return EOF; + linepos = 0; + return linebuf[linepos++]; } -static int INLINE ygetc(void) +static int INLINE +ygetc (void) { - if (linepos < linelen) - return linebuf[linepos++]; - else - return underflow(); + if (linepos < linelen) + return linebuf[linepos++]; + else + return underflow (); }; -static int INLINE yungetc(int c) +static int INLINE +yungetc (int c) { - linebuf[--linepos] = c; - return 0; + linebuf[--linepos] = c; + return 0; } #define GETC() ygetc() #define UNGETC(_a) yungetc(_a) -//#define GETC() fgetc(yyin); -//#define UNGETC(_a) ungetc(_a, yyin) +//#define GETC() fgetc(yyin); +//#define UNGETC(_a) ungetc(_a, yyin) #define ISL(_a) (isalnum(_a) || _a == '_') #define ISALNUM(_a) isalnum(_a) -#define ISHEX(_a) isalnum(_a) +#define ISHEX(_a) isxdigit(_a) -char *stringLiteral (void) +static char * +stringLiteral (void) { - static char line[1000]; - int ch; - char *str = line; - - *str++ = '\"' ; - /* put into the buffer till we hit the */ - /* first \" */ - while (1) { - - ch = GETC(); - if (!ch) break ; /* end of input */ - /* if it is a \ then everything allowed */ - if (ch == '\\') { - *str++ = ch ; /* backslash in place */ - *str++ = GETC() ; /* following char in place */ - continue ; /* carry on */ + static char line[1000]; + int ch; + char *str = line; + + *str++ = '\"'; + /* put into the buffer till we hit the */ + /* first \" */ + while (1) + { + + ch = GETC (); + if (!ch) + break; /* end of input */ + /* if it is a \ then everything allowed */ + if (ch == '\\') + { + *str++ = ch; /* backslash in place */ + *str++ = GETC (); /* following char in place */ + continue; /* carry on */ } - - /* if new line we have a new line break */ - if (ch == '\n') break ; - - /* if this is a quote then we have work to do */ - /* find the next non whitespace character */ - /* if that is a double quote then carry on */ - if (ch == '\"') { - - while ((ch = GETC()) && isspace(ch)) ; - if (!ch) break ; - if (ch != '\"') { - UNGETC(ch) ; - break ; + + /* if new line we have a new line break */ + if (ch == '\n') + break; + + /* if this is a quote then we have work to do */ + /* find the next non whitespace character */ + /* if that is a double quote then carry on */ + if (ch == '\"') + { + + while ((ch = GETC ()) && isspace (ch)); + if (!ch) + break; + if (ch != '\"') + { + UNGETC (ch); + break; } - - continue ; - } - *str++ = ch; - } - *str++ = '\"' ; - *str = '\0'; - return line; + + continue; + } + *str++ = ch; + } + *str++ = '\"'; + *str = '\0'; + return line; } -void discard_comments(int type) +static void +discard_comments (int type) { - int c; - if (type == '*') { - do { - c = GETC(); - if (c == '*') { - c = GETC(); - if (c == '/') - return; - } - else if (c == EOF) + int c; + if (type == '*') + { + do + { + c = GETC (); + if (c == '*') + { + c = GETC (); + if (c == '/') return; - } while (1); + } + else if (c == EOF) + return; + } + while (1); } - else if (type == '/') { - while (c != '\n' && c != EOF) { - c = GETC(); + else if (type == '/') + { + do + { + c = GETC (); } + while (c != '\n' && c != EOF); } - else { - assert(0); + else + { + assert (0); } } -#define TKEYWORD(_a) return _a - -int check_token(const char *sz) +/* will return 1 if the string is a part + of a target specific keyword */ +static INLINE int +isTargetKeyword (const char *s) { - if (!strcmp(sz, "at")) { - TKEYWORD(AT) ; } - - else if (!strcmp(sz, "auto")) { - return(AUTO); } - - else if (!strcmp(sz, "bit")) { - TKEYWORD(BIT) ; } - - else if (!strcmp(sz, "break")) { - return(BREAK); } - - else if (!strcmp(sz, "case")) { - return(CASE); } - - else if (!strcmp(sz, "char")) { - return(CHAR); } - - else if (!strcmp(sz, "code")) { - TKEYWORD(CODE); } - - else if (!strcmp(sz, "const")) { - return(CONST); } - - else if (!strcmp(sz, "continue")) { - return(CONTINUE); } - - else if (!strcmp(sz, "critical")) { - TKEYWORD(CRITICAL); } - - else if (!strcmp(sz, "data")) { - TKEYWORD(DATA); } - - else if (!strcmp(sz, "default")) { - return(DEFAULT); } - - else if (!strcmp(sz, "do")) { - return(DO); } - - else if (!strcmp(sz, "double")) { - werror(W_DOUBLE_UNSUPPORTED);return(FLOAT); } + int i; - else if (!strcmp(sz, "else")) { - return(ELSE); } - - else if (!strcmp(sz, "enum")) { - return(ENUM); } - - else if (!strcmp(sz, "extern")) { - return(EXTERN); } - - else if (!strcmp(sz, "far")) { - TKEYWORD(XDATA); } - - else if (!strcmp(sz, "eeprom")) { - TKEYWORD(EEPROM); } - - else if (!strcmp(sz, "float")) { - return(FLOAT); } - - else if (!strcmp(sz, "flash")) { - TKEYWORD(CODE);} - - else if (!strcmp(sz, "for")) { - return(FOR); } - - else if (!strcmp(sz, "goto")) { - return(GOTO); } - - else if (!strcmp(sz, "idata")) { - TKEYWORD(IDATA);} - - else if (!strcmp(sz, "if")) { - return(IF); } - - else if (!strcmp(sz, "int")) { - return(INT); } - - else if (!strcmp(sz, "interrupt")) { - return(INTERRUPT);} - - else if (!strcmp(sz, "nonbanked")) { - TKEYWORD(NONBANKED);} - - else if (!strcmp(sz, "banked")) { - TKEYWORD(BANKED);} - - else if (!strcmp(sz, "long")) { - return(LONG); } - - else if (!strcmp(sz, "near")) { - TKEYWORD(DATA);} - - else if (!strcmp(sz, "pdata")) { - TKEYWORD(PDATA); } - - else if (!strcmp(sz, "reentrant")) { - TKEYWORD(REENTRANT);} - - else if (!strcmp(sz, "register")) { - return(REGISTER); } - - else if (!strcmp(sz, "return")) { - return(RETURN); } - - else if (!strcmp(sz, "sfr")) { - TKEYWORD(SFR) ; } - - else if (!strcmp(sz, "sbit")) { - TKEYWORD(SBIT) ; } - - else if (!strcmp(sz, "short")) { - return(SHORT); } - - else if (!strcmp(sz, "signed")) { - return(SIGNED); } - - else if (!strcmp(sz, "sizeof")) { - return(SIZEOF); } - - else if (!strcmp(sz, "sram")) { - TKEYWORD(XDATA);} - - else if (!strcmp(sz, "static")) { - return(STATIC); } - - else if (!strcmp(sz, "struct")) { - return(STRUCT); } + if (port->keywords == NULL) + return 0; + for (i = 0; port->keywords[i]; i++) + { + if (strcmp (port->keywords[i], s) == 0) + return 1; + } - else if (!strcmp(sz, "switch")) { - return(SWITCH); } + return 0; +} - else if (!strcmp(sz, "typedef")) { - return(TYPEDEF); } +static INLINE int +check_token (const char *sz) +{ + const struct reserved_words *p; + p = is_reserved_word (sz, strlen (sz)); + if (p) + { + if (!p->is_special || isTargetKeyword (sz)) + return p->token; + } - else if (!strcmp(sz, "union")) { - return(UNION); } + /* check if it is in the typedef table */ + if (findSym (TypedefTab, NULL, sz)) + { + strncpyz (yylval.yychar, sz, sizeof(yylval.yychar)); + return TYPE_NAME; + } + else + { + strncpyz (yylval.yychar, sz, sizeof(yylval.yychar)); + return IDENTIFIER; + } +} - else if (!strcmp(sz, "unsigned")) { - return(UNSIGNED); } +static void +handle_pragma (void) +{ + char line[128], *p; + int c; + + c = GETC (); + while (c == '\t' || c == ' ') + c = GETC (); + p = line; + while (!isspace (c)) + { + *p++ = c; + c = GETC (); + } + *p = '\0'; + if (line[0] == '\0') + error ("Missing argument to pragma"); + else + { + /* First give the port a chance */ + if (port->process_pragma && !port->process_pragma (line)) + return; + /* PENDING: all the SDCC shared pragmas */ + /* Nothing handled it */ + error ("Unrecognised #pragma %s", line); + } +} - else if (!strcmp(sz, "void")) { - return(VOID); } +static void +handle_line (void) +{ + int c; + char line[128], *p; + + c = GETC (); + while (c == '\t' || c == ' ') + c = GETC (); + p = line; + while (isdigit (c)) + { + *p++ = c; + c = GETC (); + } + *p = '\0'; + if (line[0] == '\0') + error ("Error in number in #line"); + /* This is weird but cpp seems to add an extra three to the line no */ + mylineno = atoi (line) - 3; + lineno = mylineno; + /* Fetch the filename if there is one */ + while (c == '\t' || c == ' ') + c = GETC (); + if (c == '\"') + { + p = line; + c = GETC (); + while (c != '\"' && c != EOF && c != '\n') + { + *p++ = c; + c = GETC (); + } + if (c == '\"') + { + *p = '\0'; + currFname = Safe_strdup (line); + } + filename = currFname; + } +} - else if (!strcmp(sz, "volatile")) { - return(VOLATILE); } +static INLINE void +invalid_directive (void) +{ + error ("Invalid directive"); +} - else if (!strcmp(sz, "using")) { - TKEYWORD(USING); } +static INLINE int +check_newline (void) +{ + int c; + mylineno++; + lineno = mylineno; + + /* Skip any leading white space */ + c = GETC (); + while (c == '\t' || c == ' ') + c = GETC (); + /* Were only interested in #something */ + if (c != '#') + return c; + c = GETC (); + while (c == '\t' || c == ' ') + c = GETC (); + /* The text in the stream is the type of directive */ + switch (c) + { + case 'l': + /* Start of line? */ + if (GETC () == 'i' && GETC () == 'n' && GETC () == 'e') + { + c = GETC (); + if (c == '\t' || c == ' ') + handle_line (); + else + invalid_directive (); + } + else + invalid_directive (); + break; + case 'p': + /* Start of pragma? */ + if (GETC () == 'r' && GETC () == 'a' && GETC () == 'g' && + GETC () == 'm' && GETC () == 'a') + { + c = GETC (); + if (c == '\t' || c == ' ') + handle_pragma (); + else + invalid_directive (); + } + else + invalid_directive (); + break; + default: + invalid_directive (); + } + /* Discard from here until the start of the next line */ + while (c != '\n' && c != EOF) + c = GETC (); + return c; +} - else if (!strcmp(sz, "while")) { - return(WHILE); } +static int +skip_whitespace (int c) +{ + while (1) + { + switch (c) + { + case ' ': + case '\t': + case '\f': + case '\v': + case '\b': + case '\r': + c = GETC (); + break; + case '\n': + c = check_newline (); + default: + return c; + } + } +} - else if (!strcmp(sz, "xdata")) { - TKEYWORD(XDATA); } +void +yyerror (const char *s) +{ + if (end_of_file) + error ("%s at end of of input", s); + else if (yytext[0] == '\0') + error ("%s at null character", s); + else if (yytext[0] == '"') + error ("%s before string constant", s); + else if (yytext[0] == '\'') + error ("%s before character constant", s); + else + error ("%s before %s", s, yytext); +} - else if (!strcmp(sz, "_data")) { - TKEYWORD(_NEAR); } +static int +_yylex (void) +{ + int c; + static char line[128]; + char *p; - else if (!strcmp(sz, "_code")) { - TKEYWORD(_CODE); } + yytext = line; - else if (!strcmp(sz, "_eeprom")) { - TKEYWORD(_EEPROM); } + c = GETC (); + while (1) + { + switch (c) + { + case ' ': + case '\t': + case '\f': + case '\v': + case '\b': + /* Skip whitespace */ + c = GETC (); + break; + case '\r': + case '\n': + c = skip_whitespace (c); + break; + case '#': + UNGETC (c); + c = check_newline (); + break; + default: + goto past_ws; + } + } - else if (!strcmp(sz, "_flash")) { - TKEYWORD(_CODE); } +past_ws: + /* Handle comments first */ + if (c == '/') + { + int c2 = GETC (); + if (c2 == '*' || c2 == '/') + { + discard_comments (c2); + c = GETC (); + } + else + UNGETC (c2); + } + switch (c) + { + case EOF: + end_of_file = TRUE; + line[0] = '\0'; + return 0; + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + /* Start of a token. Parse. */ + p = line; + *p++ = c; + c = GETC (); + while (ISL (c)) + { + *p++ = c; + c = GETC (); + } + *p = '\0'; + UNGETC (c); + return check_token (line); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + p = line; + *p++ = c; + c = GETC (); + if (c == 'x' || c == 'X') + { + *p++ = c; + c = GETC (); + } + while (ISHEX (c)) + { + *p++ = c; + c = GETC (); + } + if (c == 'U' || c == 'u' || c == 'L' || c == 'l') + { + *p++ = c; + c = GETC (); + } + if (c == 'U' || c == 'u' || c == 'L' || c == 'l') + { + *p++ = c; + c = GETC (); + } + *p = '\0'; + UNGETC (c); + yylval.val = constVal (line); + return CONSTANT; + case '\"': + /* A string */ + p = stringLiteral (); + yylval.val = strVal (p); + return (STRING_LITERAL); + case '\'': + /* Possible formats: + ['\n', '\\', '\'', '\"'...] + ['a'...] + */ + p = line; + *p++ = c; + c = GETC (); + if (c == '\\') + { + *p++ = c; + c = GETC (); + /* Fall through */ + } + *p++ = c; + c = GETC (); + *p++ = c; + *p = '\0'; + if (c != '\'') + { + error ("Unrecognised character constant %s", line); + } + yylval.val = charVal (line); + return CONSTANT; + case '=': + case '&': + case '!': + case '-': + case '+': + case '*': + case '/': + case '%': + case '<': + case '>': + case '^': + case '|': + { + /* Cases which can be compounds */ + /* The types and classes of composites are: + >>= <<= + += -= *= /= %= &= ^= |= + >> << ++ -- + && || + <= >= == != + -> + So a composite started by char 'x' can be: + 1. Followed by itself then an equals + 2. Followed by itself + 3. Followed by an equals + 4. Be a '->' + 5. Be by itself + */ + int next = GETC (); + /* Class 1 and 2 */ + if (next == c) + { + next = GETC (); + /* Class 1 */ + if (next == '=') + { + switch (c) + { + case '>': // >>= + + yylval.yyint = RIGHT_ASSIGN; + return RIGHT_ASSIGN; + case '<': // <<= + + yylval.yyint = LEFT_ASSIGN; + return LEFT_ASSIGN; + default: + error ("Unrecognised token %c%c=", c, c); + } + } + else + { + /* Push the next char back on and find the class */ + UNGETC (next); + /* Case 2 */ + switch (c) + { + case '>': // >> + + return RIGHT_OP; + case '<': // << + + return LEFT_OP; + case '+': + return INC_OP; + case '-': + return DEC_OP; + case '&': + return AND_OP; + case '|': + return OR_OP; + case '=': + return EQ_OP; + default: + error ("Unrecognised token %c%c", c, c); + } + } + } + /* Case 3 */ + else if (next == '=') + { + int result = 0; + switch (c) + { + case '+': + result = ADD_ASSIGN; + break; + case '-': + result = SUB_ASSIGN; + break; + case '*': + result = MUL_ASSIGN; + break; + case '/': + result = DIV_ASSIGN; + break; + case '%': + result = MOD_ASSIGN; + break; + case '&': + result = AND_ASSIGN; + break; + case '^': + result = XOR_ASSIGN; + break; + case '|': + result = OR_ASSIGN; + break; + case '<': + result = LE_OP; + break; + case '>': + result = GE_OP; + break; + case '!': + result = NE_OP; + break; + default: + error ("Unrecognised token %c=", c); + } + if (result) + { + yylval.yyint = result; + return result; + } + } + /* Case 4 */ + else if (c == '-' && next == '>') + { + return PTR_OP; + } + /* Case 5 */ + else + { + UNGETC (next); + return c; + } + break; + } + case '{': + NestLevel++; + return c; + case '}': + NestLevel--; + return c; + case '.': + c = GETC (); + if (c == '.') + { + c = GETC (); + if (c == '.') + { + return VAR_ARGS; + } + } + UNGETC (c); + return '.'; + case '[': + case ']': + return c; + case ',': + case ':': + case '(': + case ')': + case '~': + case '?': + case ';': + /* Special characters that cant be part of a composite */ + return c; + default: + error ("Unhandled character %c", c); + } + return 0; +} - else if (!strcmp(sz, "_generic")) { - TKEYWORD(_GENERIC); } +#define ENTRY(_a) case (_a): printf(#_a); break; - else if (!strcmp(sz, "_near")) { - TKEYWORD(_NEAR); } +int +yylex (void) +{ + int ret = _yylex (); +#if DUMP_OUTPUT + static int lastpos = 0; + char tmp; + + printf ("Returning "); + switch (ret) + { + /* Wrapper */ + ENTRY (IDENTIFIER); + ENTRY (TYPE_NAME); + ENTRY (CONSTANT); + ENTRY (STRING_LITERAL); + ENTRY (SIZEOF); + ENTRY (PTR_OP); + ENTRY (INC_OP); + ENTRY (DEC_OP); + ENTRY (LEFT_OP); + ENTRY (RIGHT_OP); + ENTRY (LE_OP); + ENTRY (GE_OP); + ENTRY (EQ_OP); + ENTRY (NE_OP); + ENTRY (AND_OP); + ENTRY (OR_OP); + ENTRY (MUL_ASSIGN); + ENTRY (DIV_ASSIGN); + ENTRY (MOD_ASSIGN); + ENTRY (ADD_ASSIGN); + ENTRY (SUB_ASSIGN); + ENTRY (LEFT_ASSIGN); + ENTRY (RIGHT_ASSIGN); + ENTRY (AND_ASSIGN); + ENTRY (XOR_ASSIGN); + ENTRY (OR_ASSIGN); + ENTRY (TYPEDEF); + ENTRY (EXTERN); + ENTRY (STATIC); + ENTRY (AUTO); + ENTRY (REGISTER); + ENTRY (CODE); + ENTRY (EEPROM); + ENTRY (INTERRUPT); + ENTRY (SFR); + ENTRY (AT); + ENTRY (SBIT); + ENTRY (REENTRANT); + ENTRY (USING); + ENTRY (XDATA); + ENTRY (DATA); + ENTRY (IDATA); + ENTRY (PDATA); + ENTRY (VAR_ARGS); + ENTRY (CRITICAL); + ENTRY (NONBANKED); + ENTRY (BANKED); + ENTRY (CHAR); + ENTRY (SHORT); + ENTRY (INT); + ENTRY (LONG); + ENTRY (SIGNED); + ENTRY (UNSIGNED); + ENTRY (FLOAT); + ENTRY (FIXED16X16); + ENTRY (DOUBLE); + ENTRY (CONST); + ENTRY (VOLATILE); + ENTRY (VOID); + ENTRY (BIT); + ENTRY (STRUCT); + ENTRY (UNION); + ENTRY (ENUM); + ENTRY (ELIPSIS); + ENTRY (RANGE); + ENTRY (FAR); + ENTRY (_XDATA); + ENTRY (_CODE); + ENTRY (_GENERIC); + ENTRY (_NEAR); + ENTRY (_PDATA); + ENTRY (_IDATA); + ENTRY (_EEPROM); + ENTRY (CASE); + ENTRY (DEFAULT); + ENTRY (IF); + ENTRY (ELSE); + ENTRY (SWITCH); + ENTRY (WHILE); + ENTRY (DO); + ENTRY (FOR); + ENTRY (GOTO); + ENTRY (CONTINUE); + ENTRY (BREAK); + ENTRY (RETURN); + ENTRY (INLINEASM); + ENTRY (IFX); + ENTRY (ADDRESS_OF); + ENTRY (GET_VALUE_AT_ADDRESS); + ENTRY (SPIL); + ENTRY (UNSPIL); + ENTRY (GETHBIT); + ENTRY (BITWISEAND); + ENTRY (UNARYMINUS); + ENTRY (IPUSH); + ENTRY (IPOP); + ENTRY (PCALL); + ENTRY (ENDFUNCTION); + ENTRY (JUMPTABLE); + ENTRY (RRC); + ENTRY (RLC); + ENTRY (CAST); + ENTRY (CALL); + ENTRY (PARAM); + ENTRY (NULLOP); + ENTRY (BLOCK); + ENTRY (LABEL); + ENTRY (RECEIVE); + ENTRY (SEND); + default: + printf ("default: %c", ret); + } + tmp = linebuf[linepos]; + linebuf[linepos] = '\0'; + printf (" for %s (%u bytes)\n", linebuf + lastpos, linepos - lastpos); + linebuf[linepos] = tmp; + lastpos = linepos; + fflush (stdout); +#endif + return ret; +} - else if (!strcmp(sz, "_sram")) { - TKEYWORD(_XDATA);} +#define TEST(_a) (_a) ? (void)0 : printf("Test %s failed\n", #_a); - else if (!strcmp(sz, "_xdata")) { - TKEYWORD(_XDATA);} +int +altlex_testparse (const char *input) +{ + /* Fiddle with the read-ahead buffer to insert ourselves */ + strncpyz (linebuf, input, sizeof(linebuf)); + linelen = strlen (linebuf) + 1; + linepos = 0; - else if (!strcmp(sz, "_pdata")) { - TKEYWORD(_PDATA); } + return yylex (); +} - else if (!strcmp(sz, "_idata")) { - TKEYWORD(_IDATA); } +int +altlex_testchar (const char *input) +{ + value *val; + if (altlex_testparse (input) != CONSTANT) + return -2; + val = yylval.val; + if (val->type->class != SPECIFIER) + return -3; + if (SPEC_NOUN (val->type) != V_CHAR) + return -4; + if (SPEC_SCLS (val->type) != S_LITERAL) + return -5; + return SPEC_CVAL (val->type).v_int; +} - /* check if it is in the typedef table */ - if (findSym(TypedefTab,NULL,sz)) { - strcpy(yylval.yychar,sz); - return (TYPE_NAME) ; - } - else { - strcpy (yylval.yychar,sz); - return(IDENTIFIER); - } +int +altlex_testnum (const char *input) +{ + value *val; + if (altlex_testparse (input) != CONSTANT) + return -2; + val = yylval.val; + if (val->type->class != SPECIFIER) + return -3; + if (SPEC_NOUN (val->type) != V_INT) + return -4; + if (SPEC_SCLS (val->type) != S_LITERAL) + return -5; + if (SPEC_USIGN (val->type)) + return SPEC_CVAL (val->type).v_uint; + else + return SPEC_CVAL (val->type).v_int; } -int yylex(void) +int +altlex_runtests (void) { - int c; - char line[128]; - char *p; - - c = GETC(); - while (1) { - /* Handle comments first */ - if (c == '/') { - int c2 = GETC(); - if (c2 == '*' || c2 == '/') { - discard_comments(c2); - c = GETC(); - continue; - } - else - UNGETC(c2); - } - switch (c) { - case EOF: - return 0; - case ' ': - case '\t': - case '\r': - case '\n': - /* Skip whitespace */ - break; - case 'a': case 'b': case 'c': case 'd': - case 'e': case 'f': case 'g': case 'h': - case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': - case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case 'A': case 'B': case 'C': case 'D': - case 'E': case 'F': case 'G': case 'H': - case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': - case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '_': - /* Start of a token. Parse. */ - p = line; - *p++ = c; - c = GETC(); - while (ISL(c)) { - *p++ = c; - c = GETC(); - } - *p = '\0'; - UNGETC(c); - return check_token(line); - case '0': case '1': - case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - p = line; - *p++ = c; - c = GETC(); - if (c == 'x' || c == 'X') { - *p++ = c; - c = GETC(); - } - while (ISHEX(c)) { - *p++ = c; - c = GETC(); - } - *p = '\0'; - UNGETC(c); - yylval.val = constVal(line); - return CONSTANT; - case '\"': - /* A string */ - p = stringLiteral(); - yylval.val = strVal(p); - return(STRING_LITERAL); - case '\'': - /* ie '\n' */ - break; - case '#': - /* Assume a pragma and toast the rest of the line. */ - c = GETC(); - while (c != '\n') { - c = GETC(); - } - break; - case '=': - case '&': - case '!': - case '-': - case '+': - case '*': - case '/': - case '%': - case '<': - case '>': - case '^': - case '|': - /* Cases which can be compounds */ - return c; - case '{': - NestLevel++; - return c; - case '}': - NestLevel--; - return c; - case '.': - c = GETC(); - if (c == '.') { - c = GETC(); - if (c == '.') { - return VAR_ARGS; - } - } - UNGETC(c); - /* Fall through */ - case ',': - case ':': - case '(': case ')': - case '[': case ']': - case '~': - case '?': - /* Special characters that cant be part of a composite */ - return c; - default: - printf("Unhandled char %c\n", c); - } - c = GETC(); - } - return 0; + /* These conditions are ripped directly from SDCC.lex */ + /* First check the parsing of the basic tokens */ + TEST (altlex_testparse (">>=") == RIGHT_ASSIGN); + TEST (altlex_testparse ("<<=") == LEFT_ASSIGN); + TEST (altlex_testparse ("+=") == ADD_ASSIGN); + TEST (altlex_testparse ("-=") == SUB_ASSIGN); + TEST (altlex_testparse ("*=") == MUL_ASSIGN); + TEST (altlex_testparse ("/=") == DIV_ASSIGN); + TEST (altlex_testparse ("%=") == MOD_ASSIGN); + TEST (altlex_testparse ("&=") == AND_ASSIGN); + TEST (altlex_testparse ("^=") == XOR_ASSIGN); + TEST (altlex_testparse ("|=") == OR_ASSIGN); + TEST (altlex_testparse (">>") == RIGHT_OP); + TEST (altlex_testparse ("<<") == LEFT_OP); + TEST (altlex_testparse ("++") == INC_OP); + TEST (altlex_testparse ("--") == DEC_OP); + TEST (altlex_testparse ("->") == PTR_OP); + TEST (altlex_testparse ("&&") == AND_OP); + TEST (altlex_testparse ("||") == OR_OP); + TEST (altlex_testparse ("<=") == LE_OP); + TEST (altlex_testparse (">=") == GE_OP); + TEST (altlex_testparse ("==") == EQ_OP); + TEST (altlex_testparse ("!=") == NE_OP); + TEST (altlex_testparse (";") == ';'); + TEST (altlex_testparse ("{") == '{'); + TEST (altlex_testparse ("}") == '}'); + TEST (altlex_testparse (",") == ','); + TEST (altlex_testparse (":") == ':'); + TEST (altlex_testparse ("=") == '='); + TEST (altlex_testparse ("(") == '('); + TEST (altlex_testparse (")") == ')'); + TEST (altlex_testparse ("[") == '['); + TEST (altlex_testparse ("]") == ']'); + TEST (altlex_testparse (".") == '.'); + TEST (altlex_testparse ("&") == '&'); + TEST (altlex_testparse ("!") == '!'); + TEST (altlex_testparse ("~") == '~'); + TEST (altlex_testparse ("-") == '-'); + TEST (altlex_testparse ("+") == '+'); + TEST (altlex_testparse ("*") == '*'); + TEST (altlex_testparse ("/") == '/'); + TEST (altlex_testparse ("%") == '%'); + TEST (altlex_testparse ("<") == '<'); + TEST (altlex_testparse (">") == '>'); + TEST (altlex_testparse ("^") == '^'); + TEST (altlex_testparse ("|") == '|'); + TEST (altlex_testparse ("?") == '?'); + + /* Now some character constants */ + TEST (altlex_testchar ("'1'") == '1'); + TEST (altlex_testchar ("'a'") == 'a'); + TEST (altlex_testchar ("'A'") == 'A'); + TEST (altlex_testchar ("'z'") == 'z'); + TEST (altlex_testchar ("'Z'") == 'Z'); + TEST (altlex_testchar ("'\n'") == '\n'); + TEST (altlex_testchar ("'\\\\'") == '\\'); + TEST (altlex_testchar ("'\\''") == '\''); + + /* And some numbers */ + TEST (altlex_testnum ("0") == 0); + TEST (altlex_testnum ("1") == 1); + TEST (altlex_testnum ("075") == 075); + TEST (altlex_testnum ("0xfeed") == 0xfeed); + TEST (altlex_testnum ("0xFEED") == 0xFEED); + TEST (altlex_testnum ("0x00005678") == 0x5678); + + /* Keywords */ + TEST (altlex_testparse ("auto") == AUTO); + TEST (altlex_testparse ("break") == BREAK); + TEST (altlex_testparse ("case") == CASE); + TEST (altlex_testparse ("char") == CHAR); + TEST (altlex_testparse ("const") == CONST); + TEST (altlex_testparse ("continue") == CONTINUE); + TEST (altlex_testparse ("default") == DEFAULT); + TEST (altlex_testparse ("do") == DO); + /* Prints a warning */ + // TEST(altlex_testparse("double") == FLOAT); + TEST (altlex_testparse ("else") == ELSE); + TEST (altlex_testparse ("enum") == ENUM); + TEST (altlex_testparse ("extern") == EXTERN); + TEST (altlex_testparse ("float") == FLOAT); + TEST (altlex_testparse ("fixed16x16") == FIXED16X16); + TEST (altlex_testparse ("for") == FOR); + TEST (altlex_testparse ("goto") == GOTO); + TEST (altlex_testparse ("if") == IF); + TEST (altlex_testparse ("int") == INT); + TEST (altlex_testparse ("interrupt") == INTERRUPT); + TEST (altlex_testparse ("long") == LONG); + TEST (altlex_testparse ("register") == REGISTER); + TEST (altlex_testparse ("return") == RETURN); + TEST (altlex_testparse ("short") == SHORT); + TEST (altlex_testparse ("signed") == SIGNED); + TEST (altlex_testparse ("sizeof") == SIZEOF); + TEST (altlex_testparse ("static") == STATIC); + TEST (altlex_testparse ("struct") == STRUCT); + TEST (altlex_testparse ("switch") == SWITCH); + TEST (altlex_testparse ("typedef") == TYPEDEF); + TEST (altlex_testparse ("union") == UNION); + TEST (altlex_testparse ("unsigned") == UNSIGNED); + TEST (altlex_testparse ("void") == VOID); + TEST (altlex_testparse ("volatile") == VOLATILE); + TEST (altlex_testparse ("while") == WHILE); + TEST (altlex_testparse ("...") == VAR_ARGS); + +#if 0 + /* Platform specific keywords */ + TEST (altlex_testparse ("sram") ==) + { + count (); + TKEYWORD (XDATA); + } + TEST (altlex_testparse ("using") ==) + { + count (); + TKEYWORD (USING); + } + TEST (altlex_testparse ("near") ==) + { + count (); + TKEYWORD (DATA); + } + TEST (altlex_testparse ("at") ==) + { + count (); + TKEYWORD (AT); + } + TEST (altlex_testparse ("bit") ==) + { + count (); + TKEYWORD (BIT); + } + TEST (altlex_testparse ("code") ==) + { + count (); + TKEYWORD (CODE); + } + TEST (altlex_testparse ("critical") ==) + { + count (); + TKEYWORD (CRITICAL); + } + TEST (altlex_testparse ("data") ==) + { + count (); + TKEYWORD (DATA); + } + TEST (altlex_testparse ("far") ==) + { + count (); + TKEYWORD (XDATA); + } + TEST (altlex_testparse ("eeprom") ==) + { + count (); + TKEYWORD (EEPROM); + } + TEST (altlex_testparse ("flash") ==) + { + count (); + TKEYWORD (CODE); + } + TEST (altlex_testparse ("idata") ==) + { + count (); + TKEYWORD (IDATA); + } + TEST (altlex_testparse ("nonbanked") ==) + { + count (); + TKEYWORD (NONBANKED); + } + TEST (altlex_testparse ("banked") ==) + { + count (); + TKEYWORD (BANKED); + } + TEST (altlex_testparse ("pdata") ==) + { + count (); + TKEYWORD (PDATA); + } + TEST (altlex_testparse ("reentrant") ==) + { + count (); + TKEYWORD (REENTRANT); + } + TEST (altlex_testparse ("sfr") ==) + { + count (); + TKEYWORD (SFR); + } + TEST (altlex_testparse ("sbit") ==) + { + count (); + TKEYWORD (SBIT); + } + TEST (altlex_testparse ("xdata") ==) + { + count (); + TKEYWORD (XDATA); + } + TEST (altlex_testparse ("_data") ==) + { + count (); + TKEYWORD (_NEAR); + } + TEST (altlex_testparse ("_code") ==) + { + count (); + TKEYWORD (_CODE); + } + TEST (altlex_testparse ("_eeprom") ==) + { + count (); + TKEYWORD (_EEPROM); + } + TEST (altlex_testparse ("_flash") ==) + { + count (); + TKEYWORD (_CODE); + } + TEST (altlex_testparse ("_generic") ==) + { + count (); + TKEYWORD (_GENERIC); + } + TEST (altlex_testparse ("_near") ==) + { + count (); + TKEYWORD (_NEAR); + } + TEST (altlex_testparse ("_sram") ==) + { + count (); + TKEYWORD (_XDATA); + } + TEST (altlex_testparse ("_xdata") ==) + { + count (); + TKEYWORD (_XDATA); + } + TEST (altlex_testparse ("_pdata") ==) + { + count (); + TKEYWORD (_PDATA); + } + TEST (altlex_testparse ("_idata") ==) + { + count (); + TKEYWORD (_IDATA); + } +#endif + + return 0; }