From 217472c41ba2d2400067b42dac22d2efee618d3d Mon Sep 17 00:00:00 2001 From: michaelh Date: Sun, 20 Aug 2000 01:39:00 +0000 Subject: [PATCH] Split out yyerror. Added error, #line, and #pragma handling to altlex. Added a few const's here and there. git-svn-id: https://sdcc.svn.sourceforge.net/svnroot/sdcc/trunk/sdcc@327 4a8a32a2-be11-0410-ad9d-d568d2c75423 --- src/SDCC.lex | 14 ++ src/SDCC.y | 17 -- src/SDCCerr.c | 2 +- src/SDCCsymt.c | 4 +- src/SDCCsymt.h | 2 +- src/altlex.c | 625 +++++++++++++++++++++++++++++++------------------ 6 files changed, 416 insertions(+), 248 deletions(-) diff --git a/src/SDCC.lex b/src/SDCC.lex index 94ca769f..a39f0c4f 100644 --- a/src/SDCC.lex +++ b/src/SDCC.lex @@ -521,3 +521,17 @@ int isTargetKeyword(char *s) return 0; } + +extern int fatalError; + +int yyerror(char *s) +{ + fflush(stdout); + + if (yylineno && filename) + fprintf(stdout,"\n%s(%d) %s: token -> '%s' ; column %d\n", + filename,yylineno, + s,yytext,column); + fatalError++; + return 0; +} diff --git a/src/SDCC.y b/src/SDCC.y index add8d202..44b19d5f 100644 --- a/src/SDCC.y +++ b/src/SDCC.y @@ -1334,20 +1334,3 @@ identifier ; %% -extern unsigned char *yytext; -extern int column; -extern char *filename; -extern int fatalError; - -int yyerror(char *s) -{ - fflush(stdout); - - if ( yylineno ) - fprintf(stderr,"\n%s(%d) %s: token -> '%s' ; column %d\n", - filename,yylineno, - s,yytext,column); - fatalError++; - return 0; -} - diff --git a/src/SDCCerr.c b/src/SDCCerr.c index d2e77054..01a54d36 100644 --- a/src/SDCCerr.c +++ b/src/SDCCerr.c @@ -2,7 +2,7 @@ #include "common.h" -#define USE_STDOUT_FOR_ERRORS 1 +#define USE_STDOUT_FOR_ERRORS 0 #if USE_STDOUT_FOR_ERRORS #define ERRSINK stdout diff --git a/src/SDCCsymt.c b/src/SDCCsymt.c index 7ef86605..40aa386b 100644 --- a/src/SDCCsymt.c +++ b/src/SDCCsymt.c @@ -60,7 +60,7 @@ bucket *newBucket () /*-----------------------------------------------------------------*/ /* hashKey - computes the hashkey given a symbol name */ /*-----------------------------------------------------------------*/ -int hashKey (char *s) +int hashKey (const char *s) { unsigned long key = 0; @@ -144,7 +144,7 @@ void deleteSym ( bucket **stab, void *sym, char *sname) /*-----------------------------------------------------------------*/ /* findSym - finds a symbol in a table */ /*-----------------------------------------------------------------*/ -void *findSym ( bucket **stab, void *sym, char *sname) +void *findSym ( bucket **stab, void *sym, const char *sname) { bucket *bp ; diff --git a/src/SDCCsymt.h b/src/SDCCsymt.h index 36e361d8..0e1a7193 100644 --- a/src/SDCCsymt.h +++ b/src/SDCCsymt.h @@ -416,7 +416,7 @@ void initHashT ( ); bucket *newBucket ( ); void addSym ( bucket ** , void *, char *, int, int); void deleteSym ( bucket ** , void *, char *); -void *findSym ( bucket ** , void *, char *); +void *findSym ( bucket ** , void *, const char *); void *findSymWithLevel ( bucket ** , struct symbol * ); void *findSymWithBlock ( bucket ** , struct symbol *,int ); #include "SDCCmem.h" diff --git a/src/altlex.c b/src/altlex.c index 572826b6..816b2fad 100644 --- a/src/altlex.c +++ b/src/altlex.c @@ -1,6 +1,7 @@ /** @file altlex.c An alternate lexer to SDCC.lex. In development - ie messy and just plain wrong. + Inspired by the gcc lexer, c-lex.c. */ #include "common.h" #include "reswords.h" @@ -8,13 +9,6 @@ #define DUMP_OUTPUT 0 -FILE *yyin; - -int yylineno; -int column; -char *currFname; -char *yytext; - /* Right. What are the parts of the C stream? From SDCC.lex: D = [0..9] digits L = [a..z A..Z _] alphanumerics and _ @@ -47,25 +41,37 @@ char *yytext; Punct Try to read punct */ -char linebuf[10000]; -int linepos, linelen; +extern int fatalError; +extern int lineno; +extern char *filename; + +FILE *yyin; + +int yylineno; +char *currFname; +char *yytext; +static char linebuf[10000]; +static int linepos, linelen; +static int end_of_file; + +#ifdef __GNUC__ #define INLINE inline -#define ERRSINK stdout +#else +#define INLINE +#endif -extern int fatalError ; -extern int lineno ; -extern char *filename; +#define ERRSINK stderr static void error(const char *sz, ...) { va_list ap; fatalError++; - if ( filename && lineno ) { + if (filename && lineno) { fprintf(ERRSINK, "%s(%d):",filename,lineno); } - fprintf(ERRSINK, "error:"); + fprintf(ERRSINK, "error *** "); va_start(ap, sz); vfprintf(ERRSINK, sz, ap); va_end(ap); @@ -103,7 +109,7 @@ static int INLINE yungetc(int c) //#define UNGETC(_a) ungetc(_a, yyin) #define ISL(_a) (isalnum(_a) || _a == '_') #define ISALNUM(_a) isalnum(_a) -#define ISHEX(_a) isalnum(_a) +#define ISHEX(_a) isxdigit(_a) static char *stringLiteral (void) { @@ -176,7 +182,7 @@ static void discard_comments(int type) /* will return 1 if the string is a part of a target specific keyword */ -static int isTargetKeyword(const char *s) +static INLINE int isTargetKeyword(const char *s) { int i; @@ -190,7 +196,7 @@ static int isTargetKeyword(const char *s) return 0; } -static int check_token(const char *sz) +static INLINE int check_token(const char *sz) { const struct reserved_words *p; p = is_reserved_word(sz, strlen(sz)); @@ -210,255 +216,420 @@ static int check_token(const char *sz) } } -static int _yylex(void) +static void handle_pragma(void) { + char line[128], *p; int c; - char line[128]; - char *p; c = GETC(); - while (1) { - /* Handle comments first */ - if (c == '/') { - int c2 = GETC(); - if (c2 == '*' || c2 == '/') { - discard_comments(c2); - c = GETC(); - continue; - } + while (c == '\t' || c == ' ') + c = GETC(); + p = line; + while (!isspace(c)) { + *p++ = c; + c = GETC(); + } + *p = '\0'; + if (line[0] == '\0') + error("Missing argument to pragma"); + else { + /* First give the port a chance */ + if (port->process_pragma && !port->process_pragma(line)) + return; + /* PENDING: all the SDCC shared pragmas */ + /* Nothing handled it */ + error("Unrecognised #pragma %s", line); + } +} + +static void handle_line(void) +{ + int c; + char line[128], *p; + + c = GETC(); + while (c == '\t' || c == ' ') + c = GETC(); + p = line; + while (isdigit(c)) { + *p++ = c; + c = GETC(); + } + *p = '\0'; + if (line[0] == '\0') + error("Error in number in #line"); + /* This is weird but cpp seems to add an extra three to the line no */ + yylineno = atoi(line) - 3; + lineno = yylineno; + /* Fetch the filename if there is one */ + while (c == '\t' || c == ' ') + c = GETC(); + if (c == '\"') { + p = line; + c = GETC(); + while (c != '\"' && c != EOF && c != '\n') { + *p++ = c; + c = GETC(); + } + if (c == '\"') { + *p = '\0'; + currFname = gc_strdup(line); + } + filename = currFname; + } +} + +static INLINE void invalid_directive(void) +{ + error("Invalid directive"); +} + +static INLINE int check_newline(void) +{ + int c; + yylineno++; + lineno = yylineno; + + /* Skip any leading white space */ + c = GETC(); + while (c == '\t' || c == ' ') + c = GETC(); + /* Were only interested in #something */ + if (c != '#') + return c; + c = GETC(); + while (c == '\t' || c == ' ') + c = GETC(); + /* The text in the stream is the type of directive */ + switch (c) { + case 'l': + /* Start of line? */ + if (GETC() == 'i' && GETC() == 'n' && GETC() == 'e') { + c = GETC(); + if (c == '\t' || c == ' ') + handle_line(); + else + invalid_directive(); + } + else + invalid_directive(); + break; + case 'p': + /* Start of pragma? */ + if (GETC() == 'r' && GETC() == 'a' && GETC() == 'g' && + GETC() == 'm' && GETC() == 'a') { + c = GETC(); + if (c == '\t' || c == ' ') + handle_pragma(); else - UNGETC(c2); + invalid_directive(); } + else + invalid_directive(); + break; + default: + invalid_directive(); + } + /* Discard from here until the start of the next line */ + while (c != '\n' && c != EOF) + c = GETC(); + return c; +} + +static int skip_whitespace(int c) +{ + while (1) { switch (c) { - case EOF: - return 0; case ' ': case '\t': + case '\f': + case '\v': + case '\b': case '\r': + c = GETC(); + break; case '\n': + c = check_newline(); + default: + return c; + } + } +} + +void yyerror(const char *s) +{ + if (end_of_file) + error("%s at end of of input", s); + else if (yytext[0] == '\0') + error("%s at null character", s); + else if (yytext[0] == '"') + error("%s before string constant", s); + else if (yytext[0] == '\'') + error("%s before character constant", s); + else + error("%s before %s", s, yytext); +} + +static int _yylex(void) +{ + int c; + static char line[128]; + char *p; + + yytext = line; + + c = GETC(); + while (1) { + switch (c) { + case ' ': + case '\t': + case '\f': + case '\v': + case '\b': /* Skip whitespace */ - break; - case 'a': case 'b': case 'c': case 'd': - case 'e': case 'f': case 'g': case 'h': - case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': - case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case 'A': case 'B': case 'C': case 'D': - case 'E': case 'F': case 'G': case 'H': - case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': - case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '_': - /* Start of a token. Parse. */ - p = line; - *p++ = c; c = GETC(); - while (ISL(c)) { - *p++ = c; - c = GETC(); - } - *p = '\0'; + break; + case '\r': + case '\n': + c = skip_whitespace(c); + break; + case '#': UNGETC(c); - return check_token(line); - case '0': case '1': - case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - p = line; + c = check_newline(); + break; + default: + goto past_ws; + } + } + + past_ws: + /* Handle comments first */ + if (c == '/') { + int c2 = GETC(); + if (c2 == '*' || c2 == '/') { + discard_comments(c2); + c = GETC(); + } + else + UNGETC(c2); + } + switch (c) { + case EOF: + end_of_file = TRUE; + line[0] = '\0'; + return 0; + case 'a': case 'b': case 'c': case 'd': + case 'e': case 'f': case 'g': case 'h': + case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': + case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': + case 'E': case 'F': case 'G': case 'H': + case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': + case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + /* Start of a token. Parse. */ + p = line; + *p++ = c; + c = GETC(); + while (ISL(c)) { *p++ = c; c = GETC(); - if (c == 'x' || c == 'X') { - *p++ = c; - c = GETC(); - } - while (ISHEX(c)) { - *p++ = c; - c = GETC(); - } - *p = '\0'; - UNGETC(c); - yylval.val = constVal(line); - return CONSTANT; - case '\"': - /* A string */ - p = stringLiteral(); - yylval.val = strVal(p); - return(STRING_LITERAL); - case '\'': - /* Possible formats: - ['\n', '\\', '\'', '\"'...] - ['a'...] - */ - p = line; + } + *p = '\0'; + UNGETC(c); + return check_token(line); + case '0': case '1': + case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + p = line; + *p++ = c; + c = GETC(); + if (c == 'x' || c == 'X') { *p++ = c; c = GETC(); - if (c == '\\') { - *p++ = c; - c = GETC(); - /* Fall through */ - } + } + while (ISHEX(c)) { *p++ = c; c = GETC(); + } + *p = '\0'; + UNGETC(c); + yylval.val = constVal(line); + return CONSTANT; + case '\"': + /* A string */ + p = stringLiteral(); + yylval.val = strVal(p); + return(STRING_LITERAL); + case '\'': + /* Possible formats: + ['\n', '\\', '\'', '\"'...] + ['a'...] + */ + p = line; + *p++ = c; + c = GETC(); + if (c == '\\') { *p++ = c; - *p = '\0'; - if (c != '\'') { - error("Unrecognised character constant %s", line); - } - yylval.val = charVal(line); - return CONSTANT; - case '#': - /* Assume a pragma and toast the rest of the line. */ c = GETC(); - while (c != '\n') { - c = GETC(); - } - break; - case '=': - case '&': - case '!': - case '-': - case '+': - case '*': - case '/': - case '%': - case '<': - case '>': - case '^': - case '|': { - /* Cases which can be compounds */ - /* The types and classes of composites are: - >>= <<= - += -= *= /= %= &= ^= |= - >> << ++ -- - && || - <= >= == != - -> - So a composite started by char 'x' can be: - 1. Followed by itself then an equals - 2. Followed by itself - 3. Followed by an equals - 4. Be a '->' - 5. Be by itself - */ - int next = GETC(); - /* Class 1 and 2 */ - if (next == c) { - next = GETC(); - /* Class 1 */ - if (next == '=') { - switch (c) { - case '>': // >>= - yylval.yyint = RIGHT_ASSIGN; - return RIGHT_ASSIGN; - case '<': // <<= - yylval.yyint = LEFT_ASSIGN; - return LEFT_ASSIGN; - default: - error("Unrecognised token %c%c=", c, c); - } - } - else { - /* Push the next char back on and find the class */ - UNGETC(next); - /* Case 2 */ - switch (c) { - case '>': // >> - return RIGHT_OP; - case '<': // << - return LEFT_OP; - case '+': - return INC_OP; - case '-': - return DEC_OP; - case '&': - return AND_OP; - case '|': - return OR_OP; - case '=': - return EQ_OP; - default: - error("Unrecognised token %c%c", c, c); - } + /* Fall through */ + } + *p++ = c; + c = GETC(); + *p++ = c; + *p = '\0'; + if (c != '\'') { + error("Unrecognised character constant %s", line); + } + yylval.val = charVal(line); + return CONSTANT; + case '=': + case '&': + case '!': + case '-': + case '+': + case '*': + case '/': + case '%': + case '<': + case '>': + case '^': + case '|': { + /* Cases which can be compounds */ + /* The types and classes of composites are: + >>= <<= + += -= *= /= %= &= ^= |= + >> << ++ -- + && || + <= >= == != + -> + So a composite started by char 'x' can be: + 1. Followed by itself then an equals + 2. Followed by itself + 3. Followed by an equals + 4. Be a '->' + 5. Be by itself + */ + int next = GETC(); + /* Class 1 and 2 */ + if (next == c) { + next = GETC(); + /* Class 1 */ + if (next == '=') { + switch (c) { + case '>': // >>= + yylval.yyint = RIGHT_ASSIGN; + return RIGHT_ASSIGN; + case '<': // <<= + yylval.yyint = LEFT_ASSIGN; + return LEFT_ASSIGN; + default: + error("Unrecognised token %c%c=", c, c); } } - /* Case 3 */ - else if (next == '=') { - int result = 0; + else { + /* Push the next char back on and find the class */ + UNGETC(next); + /* Case 2 */ switch (c) { + case '>': // >> + return RIGHT_OP; + case '<': // << + return LEFT_OP; case '+': - result = ADD_ASSIGN; break; + return INC_OP; case '-': - result = SUB_ASSIGN; break; - case '*': - result = MUL_ASSIGN; break; - case '/': - result = DIV_ASSIGN; break; - case '%': - result = MOD_ASSIGN; break; + return DEC_OP; case '&': - result = AND_ASSIGN; break; - case '^': - result = XOR_ASSIGN; break; + return AND_OP; case '|': - result = OR_ASSIGN; break; - case '<': - result = LE_OP; break; - case '>': - result = GE_OP; break; - case '!': - result = NE_OP; break; + return OR_OP; + case '=': + return EQ_OP; default: - error("Unrecognised token %c=", c); - } - if (result) { - yylval.yyint = result; - return result; + error("Unrecognised token %c%c", c, c); } } - /* Case 4 */ - else if (c == '-' && next == '>') { - return PTR_OP; + } + /* Case 3 */ + else if (next == '=') { + int result = 0; + switch (c) { + case '+': + result = ADD_ASSIGN; break; + case '-': + result = SUB_ASSIGN; break; + case '*': + result = MUL_ASSIGN; break; + case '/': + result = DIV_ASSIGN; break; + case '%': + result = MOD_ASSIGN; break; + case '&': + result = AND_ASSIGN; break; + case '^': + result = XOR_ASSIGN; break; + case '|': + result = OR_ASSIGN; break; + case '<': + result = LE_OP; break; + case '>': + result = GE_OP; break; + case '!': + result = NE_OP; break; + default: + error("Unrecognised token %c=", c); } - /* Case 5 */ - else { - UNGETC(next); - return c; + if (result) { + yylval.yyint = result; + return result; } - break; } - case '{': - NestLevel++; - return c; - case '}': - NestLevel--; + /* Case 4 */ + else if (c == '-' && next == '>') { + return PTR_OP; + } + /* Case 5 */ + else { + UNGETC(next); return c; - case '.': + } + break; + } + case '{': + NestLevel++; + return c; + case '}': + NestLevel--; + return c; + case '.': + c = GETC(); + if (c == '.') { c = GETC(); if (c == '.') { - c = GETC(); - if (c == '.') { - return VAR_ARGS; - } + return VAR_ARGS; } - UNGETC(c); - return '.'; - case '[': case ']': - return c; - case ',': - case ':': - case '(': case ')': - case '~': - case '?': - case ';': - /* Special characters that cant be part of a composite */ - return c; - default: - error("Unhandled char %c", c); } - c = GETC(); + UNGETC(c); + return '.'; + case '[': case ']': + return c; + case ',': + case ':': + case '(': case ')': + case '~': + case '?': + case ';': + /* Special characters that cant be part of a composite */ + return c; + default: + error("Unhandled character %c", c); } return 0; } -- 2.47.2