From 217472c41ba2d2400067b42dac22d2efee618d3d Mon Sep 17 00:00:00 2001
From: michaelh <michaelh@4a8a32a2-be11-0410-ad9d-d568d2c75423>
Date: Sun, 20 Aug 2000 01:39:00 +0000
Subject: [PATCH] Split out yyerror. Added error, #line, and #pragma handling
 to altlex. Added a few const's here and there.

git-svn-id: https://sdcc.svn.sourceforge.net/svnroot/sdcc/trunk/sdcc@327 4a8a32a2-be11-0410-ad9d-d568d2c75423
---
 src/SDCC.lex   |  14 ++
 src/SDCC.y     |  17 --
 src/SDCCerr.c  |   2 +-
 src/SDCCsymt.c |   4 +-
 src/SDCCsymt.h |   2 +-
 src/altlex.c   | 625 +++++++++++++++++++++++++++++++------------------
 6 files changed, 416 insertions(+), 248 deletions(-)

diff --git a/src/SDCC.lex b/src/SDCC.lex
index 94ca769f..a39f0c4f 100644
--- a/src/SDCC.lex
+++ b/src/SDCC.lex
@@ -521,3 +521,17 @@ int isTargetKeyword(char *s)
     
     return 0;
 }
+
+extern int fatalError;
+
+int yyerror(char *s)
+{
+   fflush(stdout);
+
+   if (yylineno && filename)
+	fprintf(stdout,"\n%s(%d) %s: token -> '%s' ; column %d\n",
+		filename,yylineno,
+		s,yytext,column);
+   fatalError++;
+   return 0;
+}
diff --git a/src/SDCC.y b/src/SDCC.y
index add8d202..44b19d5f 100644
--- a/src/SDCC.y
+++ b/src/SDCC.y
@@ -1334,20 +1334,3 @@ identifier
    ;
 %%
 
-extern unsigned char *yytext;
-extern int column;
-extern char *filename;
-extern int fatalError;
-
-int yyerror(char *s)
-{
-   fflush(stdout);
-
-   if ( yylineno )
-	fprintf(stderr,"\n%s(%d) %s: token -> '%s' ; column %d\n",
-		filename,yylineno,
-		s,yytext,column);
-   fatalError++;
-   return 0;
-}
-
diff --git a/src/SDCCerr.c b/src/SDCCerr.c
index d2e77054..01a54d36 100644
--- a/src/SDCCerr.c
+++ b/src/SDCCerr.c
@@ -2,7 +2,7 @@
 
 #include "common.h"
 
-#define USE_STDOUT_FOR_ERRORS		1
+#define USE_STDOUT_FOR_ERRORS		0
 
 #if USE_STDOUT_FOR_ERRORS
 #define ERRSINK		stdout
diff --git a/src/SDCCsymt.c b/src/SDCCsymt.c
index 7ef86605..40aa386b 100644
--- a/src/SDCCsymt.c
+++ b/src/SDCCsymt.c
@@ -60,7 +60,7 @@ bucket	 *newBucket ()
 /*-----------------------------------------------------------------*/
 /* hashKey - computes the hashkey given a symbol name              */
 /*-----------------------------------------------------------------*/
-int hashKey (char *s)
+int hashKey (const char *s)
 {
     unsigned long key = 0;
 
@@ -144,7 +144,7 @@ void  deleteSym ( bucket **stab, void *sym, char *sname)
 /*-----------------------------------------------------------------*/
 /* findSym - finds a symbol in a table				   */
 /*-----------------------------------------------------------------*/
-void  *findSym ( bucket **stab, void *sym, char *sname)
+void  *findSym ( bucket **stab, void *sym, const char *sname)
 {
    bucket *bp ;
 
diff --git a/src/SDCCsymt.h b/src/SDCCsymt.h
index 36e361d8..0e1a7193 100644
--- a/src/SDCCsymt.h
+++ b/src/SDCCsymt.h
@@ -416,7 +416,7 @@ void	       initHashT	    (				   );
 bucket	      *newBucket	    (				   );
 void	       addSym		    ( bucket ** , void	 *, char  *, int, int);
 void	       deleteSym	    ( bucket ** , void	 *, char  *);
-void	      *findSym		    ( bucket ** , void	 *, char  *);
+void	      *findSym		    ( bucket ** , void	 *, const char  *);
 void          *findSymWithLevel     ( bucket ** , struct symbol *  );
 void          *findSymWithBlock     ( bucket ** , struct symbol *,int   );
 #include "SDCCmem.h"
diff --git a/src/altlex.c b/src/altlex.c
index 572826b6..816b2fad 100644
--- a/src/altlex.c
+++ b/src/altlex.c
@@ -1,6 +1,7 @@
 /** @file altlex.c
     An alternate lexer to SDCC.lex.
     In development - ie messy and just plain wrong.
+    Inspired by the gcc lexer, c-lex.c.
 */
 #include "common.h"
 #include "reswords.h"
@@ -8,13 +9,6 @@
 
 #define DUMP_OUTPUT		0
 
-FILE *yyin;
-
-int yylineno;
-int column;
-char *currFname;
-char *yytext;
-
 /* Right.  What are the parts of the C stream?  From SDCC.lex:
    D = [0..9]		digits
    L = [a..z A..Z _]	alphanumerics and _
@@ -47,25 +41,37 @@ char *yytext;
 	 Punct	Try to read punct
 */	
 
-char linebuf[10000];
-int linepos, linelen;
+extern int fatalError;
+extern int lineno;
+extern char *filename;
+
+FILE *yyin;
+
+int yylineno;
+char *currFname;
+char *yytext;
 
+static char linebuf[10000];
+static int linepos, linelen;
+static int end_of_file;
+
+#ifdef __GNUC__
 #define INLINE	inline
-#define ERRSINK stdout
+#else
+#define INLINE 
+#endif
 
-extern int fatalError ;
-extern int lineno ;
-extern char *filename;
+#define ERRSINK stderr
 
 static void error(const char *sz, ...)
 {
     va_list ap;
     fatalError++;
     
-    if ( filename && lineno ) {
+    if (filename && lineno) {
 	fprintf(ERRSINK, "%s(%d):",filename,lineno);
     }
-    fprintf(ERRSINK, "error:");
+    fprintf(ERRSINK, "error *** ");
     va_start(ap, sz);
     vfprintf(ERRSINK, sz, ap);
     va_end(ap);
@@ -103,7 +109,7 @@ static int INLINE yungetc(int c)
 //#define UNGETC(_a)	ungetc(_a, yyin)
 #define ISL(_a)		(isalnum(_a) || _a == '_')
 #define ISALNUM(_a)	isalnum(_a)
-#define ISHEX(_a)	isalnum(_a)
+#define ISHEX(_a)	isxdigit(_a)
 
 static char *stringLiteral (void)
 {
@@ -176,7 +182,7 @@ static void discard_comments(int type)
 
 /* will return 1 if the string is a part
    of a target specific keyword */
-static int isTargetKeyword(const char *s)
+static INLINE int isTargetKeyword(const char *s)
 {
     int i;
     
@@ -190,7 +196,7 @@ static int isTargetKeyword(const char *s)
     return 0;
 }
 
-static int check_token(const char *sz)
+static INLINE int check_token(const char *sz)
 {
     const struct reserved_words *p;
     p = is_reserved_word(sz, strlen(sz));
@@ -210,255 +216,420 @@ static int check_token(const char *sz)
     }
 }
 
-static int _yylex(void)
+static void handle_pragma(void)
 {
+    char line[128], *p;
     int c;
-    char line[128];
-    char *p;
 
     c = GETC();
-    while (1) {
-	/* Handle comments first */
-	if (c == '/') {
-	    int c2 = GETC();
-	    if (c2 == '*' || c2 == '/') {
-		discard_comments(c2);
-		c = GETC();
-		continue;
-	    }
+    while (c == '\t' || c == ' ')
+	c = GETC();
+    p = line;
+    while (!isspace(c)) {
+	*p++ = c;
+	c = GETC();
+    }
+    *p = '\0';
+    if (line[0] == '\0')
+	error("Missing argument to pragma");
+    else {
+	/* First give the port a chance */
+	if (port->process_pragma && !port->process_pragma(line))
+	    return;
+	/* PENDING: all the SDCC shared pragmas */
+	/* Nothing handled it */
+	error("Unrecognised #pragma %s", line);
+    }
+}
+
+static void handle_line(void)
+{
+    int c;
+    char line[128], *p;
+
+    c = GETC();
+    while (c == '\t' || c == ' ')
+	c = GETC();
+    p = line;
+    while (isdigit(c)) {
+	*p++ = c;
+	c = GETC();
+    }
+    *p = '\0';
+    if (line[0] == '\0')
+	error("Error in number in #line");
+    /* This is weird but cpp seems to add an extra three to the line no */
+    yylineno = atoi(line) - 3;
+    lineno = yylineno;
+    /* Fetch the filename if there is one */
+    while (c == '\t' || c == ' ')
+	c = GETC();
+    if (c == '\"') {
+	p = line;
+	c = GETC();
+	while (c != '\"' && c != EOF && c != '\n') {
+	    *p++ = c;
+	    c = GETC();
+	}
+	if (c == '\"') {
+	    *p = '\0';
+	    currFname = gc_strdup(line);
+	}
+	filename = currFname;
+    }
+}
+
+static INLINE void invalid_directive(void)
+{
+    error("Invalid directive");
+}
+
+static INLINE int check_newline(void)
+{
+    int c;
+    yylineno++;
+    lineno = yylineno;
+
+    /* Skip any leading white space */
+    c = GETC();
+    while (c == '\t' || c == ' ')
+	c = GETC();
+    /* Were only interested in #something */
+    if (c != '#')
+	return c;
+    c = GETC();
+    while (c == '\t' || c == ' ')
+	c = GETC();
+    /* The text in the stream is the type of directive */
+    switch (c) {
+    case 'l':
+	/* Start of line? */
+	if (GETC() == 'i' && GETC() == 'n' && GETC() == 'e') {
+	    c = GETC();
+	    if (c == '\t' || c == ' ')
+		handle_line();
+	    else
+		invalid_directive();
+	}
+	else
+	    invalid_directive();
+	break;
+    case 'p':
+	/* Start of pragma? */
+	if (GETC() == 'r' && GETC() == 'a' && GETC() == 'g' &&
+	    GETC() == 'm' && GETC() == 'a') {
+	    c = GETC();
+	    if (c == '\t' || c == ' ')
+		handle_pragma();
 	    else
-		UNGETC(c2);
+		invalid_directive();
 	}
+	else
+	    invalid_directive();
+	break;
+    default:
+	invalid_directive();
+    }
+    /* Discard from here until the start of the next line */
+    while (c != '\n' && c != EOF)
+	c = GETC();
+    return c;
+}
+
+static int skip_whitespace(int c)
+{
+    while (1) {
 	switch (c) {
-	case EOF:
-	    return 0;
 	case ' ':
 	case '\t':
+	case '\f':
+	case '\v':
+	case '\b':
 	case '\r':
+	    c = GETC();
+	    break;
 	case '\n':
+	    c = check_newline();
+	default:
+	    return c;
+	}
+    }
+}
+
+void yyerror(const char *s)
+{
+   if (end_of_file)
+       error("%s at end of of input", s);
+   else if (yytext[0] == '\0')
+       error("%s at null character", s);
+   else if (yytext[0] == '"')
+       error("%s before string constant", s);
+   else if (yytext[0] == '\'')
+       error("%s before character constant", s);
+   else 
+       error("%s before %s", s, yytext);
+}
+
+static int _yylex(void)
+{
+    int c;
+    static char line[128];
+    char *p;
+
+    yytext = line;
+
+    c = GETC();
+    while (1) {
+	switch (c) {
+	case ' ':
+	case '\t':
+	case '\f':
+	case '\v':
+	case '\b':
 	    /* Skip whitespace */
-	    break;
-	case 'a': case 'b': case 'c': case 'd':
-	case 'e': case 'f': case 'g': case 'h':
-	case 'i': case 'j': case 'k': case 'l':
-	case 'm': case 'n': case 'o': case 'p':
-	case 'q': case 'r': case 's': case 't':
-	case 'u': case 'v': case 'w': case 'x':
-	case 'y': case 'z':
-	case 'A': case 'B': case 'C': case 'D':
-	case 'E': case 'F': case 'G': case 'H':
-	case 'I': case 'J': case 'K': case 'L':
-	case 'M': case 'N': case 'O': case 'P':
-	case 'Q': case 'R': case 'S': case 'T':
-	case 'U': case 'V': case 'W': case 'X':
-	case 'Y': case 'Z':
-	case '_':
-	    /* Start of a token.  Parse. */
-	    p = line;
-	    *p++ = c;
 	    c = GETC();
-	    while (ISL(c)) {
-		*p++ = c;
-		c = GETC();
-	    }
-	    *p = '\0';
+	    break;
+	case '\r':
+	case '\n':
+	    c = skip_whitespace(c);
+	    break;
+	case '#':
 	    UNGETC(c);
-	    return check_token(line);
-	case '0': case '1':
-	case '2': case '3': case '4': case '5':
-	case '6': case '7': case '8': case '9':
-	    p = line;
+	    c = check_newline();
+	    break;
+	default:
+	    goto past_ws;
+	}
+    }
+
+ past_ws:
+    /* Handle comments first */
+    if (c == '/') {
+	int c2 = GETC();
+	if (c2 == '*' || c2 == '/') {
+	    discard_comments(c2);
+	    c = GETC();
+	}
+	else
+	    UNGETC(c2);
+    }
+    switch (c) {
+    case EOF:
+	end_of_file = TRUE;
+	line[0] = '\0';
+	return 0;
+    case 'a': case 'b': case 'c': case 'd':
+    case 'e': case 'f': case 'g': case 'h':
+    case 'i': case 'j': case 'k': case 'l':
+    case 'm': case 'n': case 'o': case 'p':
+    case 'q': case 'r': case 's': case 't':
+    case 'u': case 'v': case 'w': case 'x':
+    case 'y': case 'z':
+    case 'A': case 'B': case 'C': case 'D':
+    case 'E': case 'F': case 'G': case 'H':
+    case 'I': case 'J': case 'K': case 'L':
+    case 'M': case 'N': case 'O': case 'P':
+    case 'Q': case 'R': case 'S': case 'T':
+    case 'U': case 'V': case 'W': case 'X':
+    case 'Y': case 'Z':
+    case '_':
+	/* Start of a token.  Parse. */
+	p = line;
+	*p++ = c;
+	c = GETC();
+	while (ISL(c)) {
 	    *p++ = c;
 	    c = GETC();
-	    if (c == 'x' || c == 'X') {
-		*p++ = c;
-		c = GETC();
-	    }
-	    while (ISHEX(c)) {
-		*p++ = c;
-		c = GETC();
-	    }
-	    *p = '\0';
-	    UNGETC(c);
-	    yylval.val = constVal(line);
-	    return CONSTANT;
-	case '\"':
-	    /* A string */
-	    p = stringLiteral();
-	    yylval.val = strVal(p);
-	    return(STRING_LITERAL);
-	case '\'':
-	    /* Possible formats:
-	       ['\n', '\\', '\'', '\"'...]
-	       ['a'...]
-	    */
-	    p = line;
+	}
+	*p = '\0';
+	UNGETC(c);
+	return check_token(line);
+    case '0': case '1':
+    case '2': case '3': case '4': case '5':
+    case '6': case '7': case '8': case '9':
+	p = line;
+	*p++ = c;
+	c = GETC();
+	if (c == 'x' || c == 'X') {
 	    *p++ = c;
 	    c = GETC();
-	    if (c == '\\') {
-		*p++ = c;
-		c = GETC();
-		/* Fall through */
-	    }
+	}
+	while (ISHEX(c)) {
 	    *p++ = c;
 	    c = GETC();
+	}
+	*p = '\0';
+	UNGETC(c);
+	yylval.val = constVal(line);
+	return CONSTANT;
+    case '\"':
+	/* A string */
+	p = stringLiteral();
+	yylval.val = strVal(p);
+	return(STRING_LITERAL);
+    case '\'':
+	/* Possible formats:
+	   ['\n', '\\', '\'', '\"'...]
+	   ['a'...]
+	*/
+	p = line;
+	*p++ = c;
+	c = GETC();
+	if (c == '\\') {
 	    *p++ = c;
-	    *p = '\0';
-	    if (c != '\'') {
-		error("Unrecognised character constant %s", line);
-	    }
-	    yylval.val = charVal(line);
-	    return CONSTANT;
-	case '#':
-	    /* Assume a pragma and toast the rest of the line. */
 	    c = GETC();
-	    while (c != '\n') {
-		c = GETC();
-	    }
-	    break;
-	case '=':
-	case '&':
-	case '!':
-	case '-':
-	case '+':
-	case '*':
-	case '/':
-	case '%':
-	case '<':
-	case '>':
-	case '^':
-	case '|': {
-	    /* Cases which can be compounds */
-	    /* The types and classes of composites are:
-	       >>= <<=
-	       += -= *= /= %= &= ^= |=
-	       >> << ++ --
-	       && ||
-	       <= >= == !=
-	       ->
-	       So a composite started by char 'x' can be:
-	         1. Followed by itself then an equals
-		 2. Followed by itself
-		 3. Followed by an equals
-		 4. Be a '->'
-		 5. Be by itself
-	    */
-	    int next = GETC();
-	    /* Class 1 and 2 */
-	    if (next == c) {
-		next = GETC();
-		/* Class 1 */
-		if (next == '=') {
-		    switch (c) {
-		    case '>':	// >>=
-			yylval.yyint = RIGHT_ASSIGN;
-			return RIGHT_ASSIGN;
-		    case '<':	// <<=
-			yylval.yyint = LEFT_ASSIGN;
-			return LEFT_ASSIGN;
-		    default:
-			error("Unrecognised token %c%c=", c, c);
-		    }
-		}
-		else {
-		    /* Push the next char back on and find the class */
-		    UNGETC(next);
-		    /* Case 2 */
-		    switch (c) {
-		    case '>':	// >>
-			return RIGHT_OP;
-		    case '<':	// <<
-			return LEFT_OP;
-		    case '+':
-			return INC_OP;
-		    case '-':
-			return DEC_OP;
-		    case '&':
-			return AND_OP;
-		    case '|':
-			return OR_OP;
-		    case '=':
-			return EQ_OP;
-		    default:
-			error("Unrecognised token %c%c", c, c);
-		    }
+	    /* Fall through */
+	}
+	*p++ = c;
+	c = GETC();
+	*p++ = c;
+	*p = '\0';
+	if (c != '\'') {
+	    error("Unrecognised character constant %s", line);
+	}
+	yylval.val = charVal(line);
+	return CONSTANT;
+    case '=':
+    case '&':
+    case '!':
+    case '-':
+    case '+':
+    case '*':
+    case '/':
+    case '%':
+    case '<':
+    case '>':
+    case '^':
+    case '|': {
+	/* Cases which can be compounds */
+	/* The types and classes of composites are:
+	   >>= <<=
+	   += -= *= /= %= &= ^= |=
+	   >> << ++ --
+	   && ||
+	   <= >= == !=
+	   ->
+	   So a composite started by char 'x' can be:
+	   1. Followed by itself then an equals
+	   2. Followed by itself
+	   3. Followed by an equals
+	   4. Be a '->'
+	   5. Be by itself
+	*/
+	int next = GETC();
+	/* Class 1 and 2 */
+	if (next == c) {
+	    next = GETC();
+	    /* Class 1 */
+	    if (next == '=') {
+		switch (c) {
+		case '>':	// >>=
+		    yylval.yyint = RIGHT_ASSIGN;
+		    return RIGHT_ASSIGN;
+		case '<':	// <<=
+		    yylval.yyint = LEFT_ASSIGN;
+		    return LEFT_ASSIGN;
+		default:
+		    error("Unrecognised token %c%c=", c, c);
 		}
 	    }
-	    /* Case 3 */
-	    else if (next == '=') {
-		int result = 0;
+	    else {
+		/* Push the next char back on and find the class */
+		UNGETC(next);
+		/* Case 2 */
 		switch (c) {
+		case '>':	// >>
+		    return RIGHT_OP;
+		case '<':	// <<
+		    return LEFT_OP;
 		case '+':
-		    result = ADD_ASSIGN; break;
+		    return INC_OP;
 		case '-':
-		    result = SUB_ASSIGN; break;
-		case '*':
-		    result = MUL_ASSIGN; break;
-		case '/':
-		    result = DIV_ASSIGN; break;
-		case '%':
-		    result = MOD_ASSIGN; break;
+		    return DEC_OP;
 		case '&':
-		    result = AND_ASSIGN; break;
-		case '^':
-		    result = XOR_ASSIGN; break;
+		    return AND_OP;
 		case '|':
-		    result = OR_ASSIGN; break;
-		case '<':
-		    result = LE_OP; break;
-		case '>':
-		    result = GE_OP; break;
-		case '!':
-		    result = NE_OP; break;
+		    return OR_OP;
+		case '=':
+		    return EQ_OP;
 		default:
-		    error("Unrecognised token %c=", c);
-		}
-		if (result) {
-		    yylval.yyint = result;
-		    return result;
+		    error("Unrecognised token %c%c", c, c);
 		}
 	    }
-	    /* Case 4 */
-	    else if (c == '-' && next == '>') {
-		return PTR_OP;
+	}
+	/* Case 3 */
+	else if (next == '=') {
+	    int result = 0;
+	    switch (c) {
+	    case '+':
+		result = ADD_ASSIGN; break;
+	    case '-':
+		result = SUB_ASSIGN; break;
+	    case '*':
+		result = MUL_ASSIGN; break;
+	    case '/':
+		result = DIV_ASSIGN; break;
+	    case '%':
+		result = MOD_ASSIGN; break;
+	    case '&':
+		result = AND_ASSIGN; break;
+	    case '^':
+		result = XOR_ASSIGN; break;
+	    case '|':
+		result = OR_ASSIGN; break;
+	    case '<':
+		result = LE_OP; break;
+	    case '>':
+		result = GE_OP; break;
+	    case '!':
+		result = NE_OP; break;
+	    default:
+		error("Unrecognised token %c=", c);
 	    }
-	    /* Case 5 */
-	    else {
-		UNGETC(next);
-		return c;
+	    if (result) {
+		yylval.yyint = result;
+		return result;
 	    }
-	    break;
 	}
-	case '{':
-	    NestLevel++;
-	    return c;
-	case '}':
-	    NestLevel--;
+	/* Case 4 */
+	else if (c == '-' && next == '>') {
+	    return PTR_OP;
+	}
+	/* Case 5 */
+	else {
+	    UNGETC(next);
 	    return c;
-	case '.':
+	}
+	break;
+    }
+    case '{':
+	NestLevel++;
+	return c;
+    case '}':
+	NestLevel--;
+	return c;
+    case '.':
+	c = GETC();
+	if (c == '.') {
 	    c = GETC();
 	    if (c == '.') {
-		c = GETC();
-		if (c == '.') {
-		    return VAR_ARGS;
-		}
+		return VAR_ARGS;
 	    }
-	    UNGETC(c);
-	    return '.';
-	case '[': case ']':
-	    return c;
-	case ',':
-	case ':':
-	case '(': case ')':
-	case '~':
-	case '?':
-	case ';':
-	    /* Special characters that cant be part of a composite */
-	    return c;
-	default:
-	    error("Unhandled char %c", c);
 	}
-	c = GETC();
+	UNGETC(c);
+	return '.';
+    case '[': case ']':
+	return c;
+    case ',':
+    case ':':
+    case '(': case ')':
+    case '~':
+    case '?':
+    case ';':
+	/* Special characters that cant be part of a composite */
+	return c;
+    default:
+	error("Unhandled character %c", c);
     }
     return 0;
 }
-- 
2.47.2